mirror of https://gitee.com/bigwinds/arangodb
fix fulltext index removal performance, simplified code (#3015)
* simplify index API a bit * fix fulltext index removal performance, simplified code * updated CHANGELOG * fix hanging test * try to fix shutdown problem * improve fulltext query performance * fixed duplicate var * removed obsolete code * fix some shutdown races * do not call ensureIndex that often
This commit is contained in:
parent
0eaf4cabd9
commit
8e4dac4fc4
16
CHANGELOG
16
CHANGELOG
|
@ -1,24 +1,27 @@
|
|||
devel
|
||||
-----
|
||||
|
||||
* improved MMFiles fulltext index document removal performance
|
||||
and fulltext index query performance for bigger result sets
|
||||
|
||||
* ui: fixed a bug when success event triggers twice in a modal
|
||||
|
||||
* ui: fixed the appearance of the documents filter
|
||||
|
||||
* ui: graph vertex collections not restricted to 10 anymore
|
||||
|
||||
* fixed docs for issue #2968
|
||||
|
||||
* fixed issue #2835: UI detection of JWT token in case of server restart or upgrade
|
||||
|
||||
* upgrade jemalloc version to 5.0.1
|
||||
|
||||
* fixed docs for issue #2968
|
||||
|
||||
* fixed issue #2978: log something more obvious when you log a Buffer
|
||||
|
||||
* fixed issue #2982: AQL parse error?
|
||||
|
||||
|
||||
v3.2.1 (2017-08-09)
|
||||
-------------------
|
||||
|
||||
* fixed docs for issue #2968: Collection _key autoincrement value increases on error
|
||||
|
||||
* fixed issue #3011: Optimizer rule reduce-extraction-to-projection breaks queries
|
||||
|
@ -27,7 +30,7 @@ devel
|
|||
It is still not possible to restore collections that are sharded
|
||||
differently than by _key.
|
||||
|
||||
* fixed an isse with restoring of system collections and user rights.
|
||||
* fixed an issue with restoring of system collections and user rights.
|
||||
It was not possible to restore users into an authenticated server.
|
||||
|
||||
* fixed issue #2977: Documentation for db._createDatabase is wrong
|
||||
|
@ -38,8 +41,6 @@ devel
|
|||
|
||||
* ui: fixed a bug when moving multiple documents was not possible
|
||||
|
||||
* fixed docs for issue #2968: Collection _key autoincrement value increases on error
|
||||
|
||||
* AQL CHAR_LENGTH(null) returns now 0. Since AQL TO_STRING(null) is '' (string of length 0)
|
||||
|
||||
* ui: now supports single js file upload for Foxx services in addition to zip files
|
||||
|
@ -125,6 +126,7 @@ devel
|
|||
* option "--rocksdb.compaction-read-ahead-size" now defaults to 2MB
|
||||
|
||||
* change Windows build so that RocksDB doesn't enforce AVX optimizations by default
|
||||
This fixes startup crashes on servers that do not have AVX CPU extensions
|
||||
|
||||
* speed up RocksDB secondary index creation and dropping
|
||||
|
||||
|
|
|
@ -396,9 +396,9 @@ std::string Index::context() const {
|
|||
|
||||
/// @brief create a VelocyPack representation of the index
|
||||
/// base functionality (called from derived classes)
|
||||
std::shared_ptr<VPackBuilder> Index::toVelocyPack(bool withFigures) const {
|
||||
std::shared_ptr<VPackBuilder> Index::toVelocyPack(bool withFigures, bool forPersistence) const {
|
||||
auto builder = std::make_shared<VPackBuilder>();
|
||||
toVelocyPack(*builder, withFigures, false);
|
||||
toVelocyPack(*builder, withFigures, forPersistence);
|
||||
return builder;
|
||||
}
|
||||
|
||||
|
@ -550,12 +550,6 @@ void Index::batchInsert(
|
|||
}
|
||||
}
|
||||
|
||||
/// @brief default implementation for cleanup
|
||||
int Index::cleanup() {
|
||||
// do nothing
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
/// @brief default implementation for drop
|
||||
int Index::drop() {
|
||||
// do nothing
|
||||
|
|
|
@ -241,8 +241,8 @@ class Index {
|
|||
|
||||
virtual size_t memory() const = 0;
|
||||
|
||||
virtual void toVelocyPack(arangodb::velocypack::Builder&, bool, bool) const;
|
||||
std::shared_ptr<arangodb::velocypack::Builder> toVelocyPack(bool) const;
|
||||
virtual void toVelocyPack(arangodb::velocypack::Builder&, bool withFigures, bool forPersistence) const;
|
||||
std::shared_ptr<arangodb::velocypack::Builder> toVelocyPack(bool withFigures, bool forPersistence) const;
|
||||
|
||||
virtual void toVelocyPackFigures(arangodb::velocypack::Builder&) const;
|
||||
std::shared_ptr<arangodb::velocypack::Builder> toVelocyPackFigures() const;
|
||||
|
@ -257,11 +257,9 @@ class Index {
|
|||
std::vector<std::pair<TRI_voc_rid_t, arangodb::velocypack::Slice>> const&,
|
||||
std::shared_ptr<arangodb::basics::LocalTaskQueue> queue);
|
||||
|
||||
virtual int load() = 0;
|
||||
virtual int unload() = 0;
|
||||
virtual void load() = 0;
|
||||
virtual void unload() = 0;
|
||||
|
||||
// a garbage collection function for the index
|
||||
virtual int cleanup();
|
||||
// called when the index is dropped
|
||||
virtual int drop();
|
||||
|
||||
|
|
|
@ -2,11 +2,9 @@
|
|||
|
||||
# add sources for mmfiles engine
|
||||
set(MMFILES_SOURCES
|
||||
MMFiles/mmfiles-fulltext-handles.cpp
|
||||
MMFiles/mmfiles-fulltext-index.cpp
|
||||
MMFiles/mmfiles-fulltext-list.cpp
|
||||
MMFiles/mmfiles-fulltext-query.cpp
|
||||
MMFiles/mmfiles-fulltext-result.cpp
|
||||
MMFiles/mmfiles-geo-index.cpp
|
||||
MMFiles/MMFilesAllocatorThread.cpp
|
||||
MMFiles/MMFilesAqlFunctions.cpp
|
||||
|
|
|
@ -27,9 +27,9 @@
|
|||
#include "Aql/AqlFunctionFeature.h"
|
||||
#include "MMFiles/MMFilesFulltextIndex.h"
|
||||
#include "MMFiles/MMFilesGeoIndex.h"
|
||||
#include "MMFiles/MMFilesToken.h"
|
||||
#include "MMFiles/mmfiles-fulltext-index.h"
|
||||
#include "MMFiles/mmfiles-fulltext-query.h"
|
||||
#include "MMFiles/mmfiles-fulltext-result.h"
|
||||
#include "StorageEngine/DocumentIdentifierToken.h"
|
||||
#include "Utils/CollectionNameResolver.h"
|
||||
#include "Transaction/Helpers.h"
|
||||
|
@ -279,33 +279,21 @@ AqlValue MMFilesAqlFunctions::Fulltext(
|
|||
}
|
||||
|
||||
// note: the following call will free "ft"!
|
||||
TRI_fulltext_result_t* queryResult =
|
||||
TRI_QueryMMFilesFulltextIndex(fulltextIndex->internals(), ft);
|
||||
std::set<TRI_voc_rid_t> queryResult = TRI_QueryMMFilesFulltextIndex(fulltextIndex->internals(), ft);
|
||||
|
||||
if (queryResult == nullptr) {
|
||||
THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
|
||||
TRI_ASSERT(trx->isPinned(cid));
|
||||
|
||||
transaction::BuilderLeaser builder(trx);
|
||||
try {
|
||||
builder->openArray();
|
||||
builder->openArray();
|
||||
|
||||
ManagedDocumentResult mmdr;
|
||||
size_t const numResults = queryResult->_numDocuments;
|
||||
for (size_t i = 0; i < numResults; ++i) {
|
||||
if (collection->readDocument(trx, queryResult->_documents[i], mmdr)) {
|
||||
mmdr.addToBuilder(*builder.get(), true);
|
||||
}
|
||||
ManagedDocumentResult mmdr;
|
||||
for (auto const& it : queryResult) {
|
||||
if (collection->readDocument(trx, MMFilesToken{it}, mmdr)) {
|
||||
mmdr.addToBuilder(*builder.get(), true);
|
||||
}
|
||||
builder->close();
|
||||
TRI_FreeResultMMFilesFulltextIndex(queryResult);
|
||||
return AqlValue(builder.get());
|
||||
} catch (...) {
|
||||
TRI_FreeResultMMFilesFulltextIndex(queryResult);
|
||||
THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
builder->close();
|
||||
return AqlValue(builder.get());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -96,16 +96,6 @@ void MMFilesCleanupThread::run() {
|
|||
|
||||
// we're the only ones that can unload the collection, so using
|
||||
// the collection pointer outside the lock is ok
|
||||
|
||||
// maybe cleanup indexes, unload the collection or some datafiles
|
||||
|
||||
// clean indexes?
|
||||
if (iterations % cleanupIndexIterations() == 0 && status != TRI_VOC_COL_STATUS_DELETED) {
|
||||
auto physical = static_cast<MMFilesCollection*>(collection->getPhysical());
|
||||
TRI_ASSERT(physical != nullptr);
|
||||
physical->cleanupIndexes();
|
||||
}
|
||||
|
||||
cleanupCollection(collection);
|
||||
}
|
||||
}, false);
|
||||
|
@ -160,11 +150,13 @@ void MMFilesCleanupThread::cleanupCollection(arangodb::LogicalCollection* collec
|
|||
|
||||
// but if we are in server shutdown, we can force unloading of collections
|
||||
bool isInShutdown = application_features::ApplicationServer::isStopping();
|
||||
|
||||
// loop until done
|
||||
|
||||
// loop until done
|
||||
|
||||
auto mmfiles = arangodb::MMFilesCollection::toMMFilesCollection(collection);
|
||||
TRI_ASSERT(mmfiles != nullptr);
|
||||
|
||||
while (true) {
|
||||
auto mmfiles = arangodb::MMFilesCollection::toMMFilesCollection(collection);
|
||||
auto ditches = mmfiles->ditches();
|
||||
|
||||
TRI_ASSERT(ditches != nullptr);
|
||||
|
|
|
@ -50,9 +50,6 @@ class MMFilesCleanupThread final : public Thread {
|
|||
/// @brief how many cleanup iterations until query cursors are cleaned
|
||||
static constexpr uint64_t cleanupCursorIterations() { return 3; }
|
||||
|
||||
/// @brief how many cleanup iterations until indexes are cleaned
|
||||
static constexpr uint64_t cleanupIndexIterations() { return 5; }
|
||||
|
||||
/// @brief clean up cursors
|
||||
void cleanupCursors(bool force);
|
||||
|
||||
|
|
|
@ -480,7 +480,6 @@ MMFilesCollection::MMFilesCollection(LogicalCollection* collection,
|
|||
TRI_JOURNAL_DEFAULT_SIZE))),
|
||||
_isVolatile(arangodb::basics::VelocyPackHelper::readBooleanValue(
|
||||
info, "isVolatile", false)),
|
||||
_cleanupIndexes(0),
|
||||
_persistentIndexes(0),
|
||||
_indexBuckets(Helper::readNumericValue<uint32_t>(
|
||||
info, "indexBuckets", defaultIndexBuckets)),
|
||||
|
@ -512,7 +511,6 @@ MMFilesCollection::MMFilesCollection(LogicalCollection* logical,
|
|||
_ditches(logical),
|
||||
_isVolatile(static_cast<MMFilesCollection*>(physical)->isVolatile()) {
|
||||
MMFilesCollection& mmfiles = *static_cast<MMFilesCollection*>(physical);
|
||||
_cleanupIndexes = mmfiles._cleanupIndexes;
|
||||
_persistentIndexes = mmfiles._persistentIndexes;
|
||||
_useSecondaryIndexes = mmfiles._useSecondaryIndexes;
|
||||
_initialCount = mmfiles._initialCount;
|
||||
|
@ -594,6 +592,13 @@ int MMFilesCollection::close() {
|
|||
}
|
||||
}
|
||||
|
||||
// wait until ditches have been processed fully
|
||||
while (_ditches.contains(MMFilesDitch::TRI_DITCH_DATAFILE_DROP) ||
|
||||
_ditches.contains(MMFilesDitch::TRI_DITCH_DATAFILE_RENAME) ||
|
||||
_ditches.contains(MMFilesDitch::TRI_DITCH_COMPACTION)) {
|
||||
usleep(20000);
|
||||
}
|
||||
|
||||
{
|
||||
WRITE_LOCKER(writeLocker, _filesLock);
|
||||
|
||||
|
@ -2121,7 +2126,7 @@ int MMFilesCollection::saveIndex(transaction::Methods* trx,
|
|||
|
||||
std::shared_ptr<VPackBuilder> builder;
|
||||
try {
|
||||
builder = idx->toVelocyPack(false);
|
||||
builder = idx->toVelocyPack(false, true);
|
||||
} catch (arangodb::basics::Exception const& ex) {
|
||||
return ex.code();
|
||||
} catch (...) {
|
||||
|
@ -2182,9 +2187,6 @@ void MMFilesCollection::addIndexLocal(std::shared_ptr<arangodb::Index> idx) {
|
|||
}
|
||||
|
||||
// update statistics
|
||||
if (idx->type() == arangodb::Index::TRI_IDX_TYPE_FULLTEXT_INDEX) {
|
||||
++_cleanupIndexes;
|
||||
}
|
||||
if (idx->isPersistent()) {
|
||||
++_persistentIndexes;
|
||||
}
|
||||
|
@ -2315,9 +2317,6 @@ bool MMFilesCollection::removeIndex(TRI_idx_iid_t iid) {
|
|||
_indexes.erase(_indexes.begin() + i);
|
||||
|
||||
// update statistics
|
||||
if (idx->type() == arangodb::Index::TRI_IDX_TYPE_FULLTEXT_INDEX) {
|
||||
--_cleanupIndexes;
|
||||
}
|
||||
if (idx->isPersistent()) {
|
||||
--_persistentIndexes;
|
||||
}
|
||||
|
@ -2330,27 +2329,6 @@ bool MMFilesCollection::removeIndex(TRI_idx_iid_t iid) {
|
|||
return false;
|
||||
}
|
||||
|
||||
/// @brief garbage-collect a collection's indexes
|
||||
int MMFilesCollection::cleanupIndexes() {
|
||||
int res = TRI_ERROR_NO_ERROR;
|
||||
|
||||
// cleaning indexes is expensive, so only do it if the flag is set for the
|
||||
// collection
|
||||
if (_cleanupIndexes > 0) {
|
||||
WRITE_LOCKER(writeLocker, _dataLock);
|
||||
for (auto& idx : _indexes) {
|
||||
if (idx->type() == arangodb::Index::TRI_IDX_TYPE_FULLTEXT_INDEX) {
|
||||
res = idx->cleanup();
|
||||
|
||||
if (res != TRI_ERROR_NO_ERROR) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::unique_ptr<IndexIterator> MMFilesCollection::getAllIterator(
|
||||
transaction::Methods* trx, ManagedDocumentResult* mdr, bool reverse) const {
|
||||
return std::unique_ptr<IndexIterator>(
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#define ARANGOD_MMFILES_MMFILES_COLLECTION_H 1
|
||||
|
||||
#include "Basics/Common.h"
|
||||
#include "Basics/Mutex.h"
|
||||
#include "Basics/ReadWriteLock.h"
|
||||
#include "Indexes/IndexIterator.h"
|
||||
#include "Indexes/IndexLookupContext.h"
|
||||
|
@ -302,8 +303,6 @@ class MMFilesCollection final : public PhysicalCollection {
|
|||
/// @brief Drop an index with the given iid.
|
||||
bool dropIndex(TRI_idx_iid_t iid) override;
|
||||
|
||||
int cleanupIndexes();
|
||||
|
||||
////////////////////////////////////
|
||||
// -- SECTION Locking --
|
||||
///////////////////////////////////
|
||||
|
@ -555,7 +554,6 @@ class MMFilesCollection final : public PhysicalCollection {
|
|||
|
||||
// SECTION: Indexes
|
||||
|
||||
size_t _cleanupIndexes;
|
||||
size_t _persistentIndexes;
|
||||
uint32_t _indexBuckets;
|
||||
|
||||
|
|
|
@ -235,6 +235,7 @@ MMFilesCollectorThread::MMFilesCollectorThread(MMFilesLogfileManager* logfileMan
|
|||
: Thread("WalCollector"),
|
||||
_logfileManager(logfileManager),
|
||||
_condition(),
|
||||
_forcedStopIterations(-1),
|
||||
_operationsQueueLock(),
|
||||
_operationsQueue(),
|
||||
_operationsQueueInUse(false),
|
||||
|
@ -272,6 +273,13 @@ void MMFilesCollectorThread::signal() {
|
|||
guard.signal();
|
||||
}
|
||||
|
||||
/// @brief signal the thread that there is something to do
|
||||
void MMFilesCollectorThread::forceStop() {
|
||||
CONDITION_LOCKER(guard, _condition);
|
||||
_forcedStopIterations = 0;
|
||||
guard.signal();
|
||||
}
|
||||
|
||||
/// @brief main loop
|
||||
void MMFilesCollectorThread::run() {
|
||||
int counter = 0;
|
||||
|
@ -295,20 +303,13 @@ void MMFilesCollectorThread::run() {
|
|||
}
|
||||
|
||||
// step 2: update master pointers
|
||||
try {
|
||||
bool worked;
|
||||
int res = this->processQueuedOperations(worked);
|
||||
bool worked;
|
||||
int res = this->processQueuedOperations(worked);
|
||||
|
||||
if (res == TRI_ERROR_NO_ERROR) {
|
||||
hasWorked |= worked;
|
||||
} else if (res == TRI_ERROR_ARANGO_FILESYSTEM_FULL) {
|
||||
doDelay = true;
|
||||
}
|
||||
} catch (...) {
|
||||
// re-activate the queue
|
||||
MUTEX_LOCKER(mutexLocker, _operationsQueueLock);
|
||||
_operationsQueueInUse = false;
|
||||
throw;
|
||||
if (res == TRI_ERROR_NO_ERROR) {
|
||||
hasWorked |= worked;
|
||||
} else if (res == TRI_ERROR_ARANGO_FILESYSTEM_FULL) {
|
||||
doDelay = true;
|
||||
}
|
||||
} catch (arangodb::basics::Exception const& ex) {
|
||||
int res = ex.code();
|
||||
|
@ -338,10 +339,20 @@ void MMFilesCollectorThread::run() {
|
|||
counter = 0;
|
||||
}
|
||||
}
|
||||
} else if (isStopping() && !hasQueuedOperations()) {
|
||||
// no operations left to execute, we can exit
|
||||
break;
|
||||
}
|
||||
} else if (isStopping()) {
|
||||
if (!hasQueuedOperations()) {
|
||||
// no operations left to execute, we can exit
|
||||
break;
|
||||
}
|
||||
if (_forcedStopIterations >= 0) {
|
||||
if (++_forcedStopIterations == 10) {
|
||||
// forceful exit
|
||||
break;
|
||||
} else {
|
||||
guard.wait(interval);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// all queues are empty, so we can exit
|
||||
|
@ -480,6 +491,7 @@ int MMFilesCollectorThread::processQueuedOperations(bool& worked) {
|
|||
if (res == TRI_ERROR_LOCK_TIMEOUT) {
|
||||
// could not acquire write-lock for collection in time
|
||||
// do not delete the operations
|
||||
LOG_TOPIC(TRACE, Logger::COLLECTOR) << "got lock timeout while trying to apply queued operations";
|
||||
++it2;
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -56,6 +56,9 @@ class MMFilesCollectorThread final : public Thread {
|
|||
|
||||
/// @brief signal the thread that there is something to do
|
||||
void signal();
|
||||
|
||||
/// @brief force the shutdown by setting _forcedStopIterations
|
||||
void forceStop();
|
||||
|
||||
/// @brief check whether there are queued operations left
|
||||
bool hasQueuedOperations();
|
||||
|
@ -113,6 +116,10 @@ class MMFilesCollectorThread final : public Thread {
|
|||
/// @brief condition variable for the collector thread
|
||||
basics::ConditionVariable _condition;
|
||||
|
||||
/// @brief used for counting the number of iterations during
|
||||
/// forcedIterations. defaults to -1
|
||||
int _forcedStopIterations;
|
||||
|
||||
/// @brief operations lock
|
||||
arangodb::Mutex _operationsQueueLock;
|
||||
|
||||
|
|
|
@ -261,13 +261,10 @@ size_t MMFilesEdgeIndex::memory() const {
|
|||
void MMFilesEdgeIndex::toVelocyPack(VPackBuilder& builder, bool withFigures,
|
||||
bool forPersistence) const {
|
||||
builder.openObject();
|
||||
{
|
||||
Index::toVelocyPack(builder, withFigures, forPersistence);
|
||||
|
||||
// hard-coded
|
||||
builder.add("unique", VPackValue(false));
|
||||
builder.add("sparse", VPackValue(false));
|
||||
}
|
||||
Index::toVelocyPack(builder, withFigures, forPersistence);
|
||||
// hard-coded
|
||||
builder.add("unique", VPackValue(false));
|
||||
builder.add("sparse", VPackValue(false));
|
||||
builder.close();
|
||||
}
|
||||
|
||||
|
@ -381,11 +378,9 @@ void MMFilesEdgeIndex::batchInsert(
|
|||
}
|
||||
|
||||
/// @brief unload the index data from memory
|
||||
int MMFilesEdgeIndex::unload() {
|
||||
void MMFilesEdgeIndex::unload() {
|
||||
_edgesFrom->truncate([](MMFilesSimpleIndexElement const&) { return true; });
|
||||
_edgesTo->truncate([](MMFilesSimpleIndexElement const&) { return true; });
|
||||
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
/// @brief provides a size hint for the edge index
|
||||
|
|
|
@ -113,8 +113,8 @@ class MMFilesEdgeIndex final : public Index {
|
|||
std::vector<std::pair<TRI_voc_rid_t, VPackSlice>> const&,
|
||||
std::shared_ptr<arangodb::basics::LocalTaskQueue>) override;
|
||||
|
||||
int load() override { return 0; }
|
||||
int unload() override;
|
||||
void load() override {}
|
||||
void unload() override;
|
||||
|
||||
int sizeHint(transaction::Methods*, size_t) override;
|
||||
|
||||
|
|
|
@ -39,26 +39,25 @@ using namespace arangodb;
|
|||
|
||||
/// @brief walk over the attribute. Also Extract sub-attributes and elements in
|
||||
/// list.
|
||||
static void ExtractWords(std::set<std::string>& words,
|
||||
VPackSlice const value,
|
||||
size_t minWordLength,
|
||||
int level) {
|
||||
void MMFilesFulltextIndex::extractWords(std::set<std::string>& words,
|
||||
VPackSlice value,
|
||||
int level) const {
|
||||
if (value.isString()) {
|
||||
// extract the string value for the indexed attribute
|
||||
std::string text = value.copyString();
|
||||
|
||||
// parse the document text
|
||||
arangodb::basics::Utf8Helper::DefaultUtf8Helper.tokenize(
|
||||
words, text, minWordLength, TRI_FULLTEXT_MAX_WORD_LENGTH, true);
|
||||
words, text, _minWordLength, TRI_FULLTEXT_MAX_WORD_LENGTH, true);
|
||||
// We don't care for the result. If the result is false, words stays
|
||||
// unchanged and is not indexed
|
||||
} else if (value.isArray() && level == 0) {
|
||||
for (auto const& v : VPackArrayIterator(value)) {
|
||||
ExtractWords(words, v, minWordLength, level + 1);
|
||||
extractWords(words, v, level + 1);
|
||||
}
|
||||
} else if (value.isObject() && level == 0) {
|
||||
for (auto const& v : VPackObjectIterator(value)) {
|
||||
ExtractWords(words, v.value, minWordLength, level + 1);
|
||||
extractWords(words, v.value, level + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -216,19 +215,10 @@ Result MMFilesFulltextIndex::insert(transaction::Methods*,
|
|||
TRI_voc_rid_t revisionId,
|
||||
VPackSlice const& doc, bool isRollback) {
|
||||
int res = TRI_ERROR_NO_ERROR;
|
||||
|
||||
std::set<std::string> words = wordlist(doc);
|
||||
|
||||
if (words.empty()) {
|
||||
// TODO: distinguish the cases "empty wordlist" and "out of memory"
|
||||
// LOG_TOPIC(WARN, arangodb::Logger::FIXME) << "could not build wordlist";
|
||||
return IndexResult(res, this);
|
||||
}
|
||||
|
||||
// TODO: use status codes
|
||||
if (!TRI_InsertWordsMMFilesFulltextIndex(_fulltextIndex, revisionId, words)) {
|
||||
LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "adding document to fulltext index failed";
|
||||
res = TRI_ERROR_INTERNAL;
|
||||
if (!words.empty()) {
|
||||
res = TRI_InsertWordsMMFilesFulltextIndex(_fulltextIndex, revisionId, words);
|
||||
}
|
||||
return IndexResult(res, this);
|
||||
}
|
||||
|
@ -236,46 +226,30 @@ Result MMFilesFulltextIndex::insert(transaction::Methods*,
|
|||
Result MMFilesFulltextIndex::remove(transaction::Methods*,
|
||||
TRI_voc_rid_t revisionId,
|
||||
VPackSlice const& doc, bool isRollback) {
|
||||
TRI_DeleteDocumentMMFilesFulltextIndex(_fulltextIndex, revisionId);
|
||||
|
||||
return Result(TRI_ERROR_NO_ERROR);
|
||||
}
|
||||
|
||||
int MMFilesFulltextIndex::unload() {
|
||||
TRI_TruncateMMFilesFulltextIndex(_fulltextIndex);
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
int MMFilesFulltextIndex::cleanup() {
|
||||
LOG_TOPIC(TRACE, arangodb::Logger::FIXME) << "fulltext cleanup called";
|
||||
|
||||
int res = TRI_ERROR_NO_ERROR;
|
||||
std::set<std::string> words = wordlist(doc);
|
||||
|
||||
// check whether we should do a cleanup at all
|
||||
if (!TRI_CompactMMFilesFulltextIndex(_fulltextIndex)) {
|
||||
res = TRI_ERROR_INTERNAL;
|
||||
if (!words.empty()) {
|
||||
res = TRI_RemoveWordsMMFilesFulltextIndex(_fulltextIndex, revisionId, words);
|
||||
}
|
||||
return IndexResult(res, this);
|
||||
}
|
||||
|
||||
return res;
|
||||
void MMFilesFulltextIndex::unload() {
|
||||
TRI_TruncateMMFilesFulltextIndex(_fulltextIndex);
|
||||
}
|
||||
|
||||
/// @brief callback function called by the fulltext index to determine the
|
||||
/// words to index for a specific document
|
||||
std::set<std::string> MMFilesFulltextIndex::wordlist(VPackSlice const& doc) {
|
||||
std::set<std::string> words;
|
||||
try {
|
||||
VPackSlice const value = doc.get(_attr);
|
||||
VPackSlice const value = doc.get(_attr);
|
||||
|
||||
if (!value.isString() && !value.isArray() && !value.isObject()) {
|
||||
// Invalid Input
|
||||
return words;
|
||||
}
|
||||
|
||||
ExtractWords(words, value, _minWordLength, 0);
|
||||
} catch (...) {
|
||||
// Backwards compatibility
|
||||
// The pre-vpack impl. did just ignore all errors and returned nulltpr
|
||||
if (!value.isString() && !value.isArray() && !value.isObject()) {
|
||||
// Invalid Input
|
||||
return words;
|
||||
}
|
||||
|
||||
extractWords(words, value, 0);
|
||||
return words;
|
||||
}
|
||||
|
|
|
@ -60,7 +60,7 @@ class MMFilesFulltextIndex final : public Index {
|
|||
|
||||
size_t memory() const override;
|
||||
|
||||
void toVelocyPack(VPackBuilder&, bool, bool) const override;
|
||||
void toVelocyPack(VPackBuilder&, bool withFigures, bool forPersistence) const override;
|
||||
// Uses default toVelocyPackFigures
|
||||
|
||||
bool matchesDefinition(VPackSlice const&) const override;
|
||||
|
@ -71,10 +71,8 @@ class MMFilesFulltextIndex final : public Index {
|
|||
Result remove(transaction::Methods*, TRI_voc_rid_t,
|
||||
arangodb::velocypack::Slice const&, bool isRollback) override;
|
||||
|
||||
int load() override { return 0; }
|
||||
int unload() override;
|
||||
|
||||
int cleanup() override;
|
||||
void load() override {}
|
||||
void unload() override;
|
||||
|
||||
bool isSame(std::string const& field, int minWordLength) const {
|
||||
std::string fieldString;
|
||||
|
@ -91,6 +89,7 @@ class MMFilesFulltextIndex final : public Index {
|
|||
|
||||
private:
|
||||
std::set<std::string> wordlist(arangodb::velocypack::Slice const&);
|
||||
void extractWords(std::set<std::string>& words, arangodb::velocypack::Slice value, int level) const;
|
||||
|
||||
private:
|
||||
/// @brief the indexed attribute (path)
|
||||
|
|
|
@ -513,7 +513,7 @@ Result MMFilesGeoIndex::remove(transaction::Methods*, TRI_voc_rid_t revisionId,
|
|||
return Result(TRI_ERROR_NO_ERROR);
|
||||
}
|
||||
|
||||
int MMFilesGeoIndex::unload() {
|
||||
void MMFilesGeoIndex::unload() {
|
||||
// create a new, empty index
|
||||
auto empty = GeoIndex_new();
|
||||
|
||||
|
@ -528,8 +528,6 @@ int MMFilesGeoIndex::unload() {
|
|||
|
||||
// and assign it
|
||||
_geoIndex = empty;
|
||||
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
/// @brief looks up all points within a given radius
|
||||
|
|
|
@ -131,7 +131,7 @@ class MMFilesGeoIndex final : public Index {
|
|||
|
||||
size_t memory() const override;
|
||||
|
||||
void toVelocyPack(VPackBuilder&, bool, bool) const override;
|
||||
void toVelocyPack(VPackBuilder&, bool withFigures, bool forPersistence) const override;
|
||||
// Uses default toVelocyPackFigures
|
||||
|
||||
bool matchesDefinition(VPackSlice const& info) const override;
|
||||
|
@ -142,8 +142,8 @@ class MMFilesGeoIndex final : public Index {
|
|||
Result remove(transaction::Methods*, TRI_voc_rid_t,
|
||||
arangodb::velocypack::Slice const&, bool isRollback) override;
|
||||
|
||||
int load() override { return 0; }
|
||||
int unload() override;
|
||||
void load() override {}
|
||||
void unload() override;
|
||||
|
||||
/// @brief looks up all points within a given radius
|
||||
GeoCoordinates* withinQuery(transaction::Methods*, double, double,
|
||||
|
|
|
@ -502,21 +502,9 @@ size_t MMFilesHashIndex::memory() const {
|
|||
_multiArray->_hashArray->memoryUsage());
|
||||
}
|
||||
|
||||
/// @brief return a velocypack representation of the index
|
||||
void MMFilesHashIndex::toVelocyPack(VPackBuilder& builder, bool withFigures,
|
||||
bool forPersistence) const {
|
||||
builder.openObject();
|
||||
Index::toVelocyPack(builder, withFigures, forPersistence);
|
||||
builder.add("unique", VPackValue(_unique));
|
||||
builder.add("sparse", VPackValue(_sparse));
|
||||
builder.add("deduplicate", VPackValue(_deduplicate));
|
||||
builder.close();
|
||||
}
|
||||
|
||||
/// @brief return a velocypack representation of the index figures
|
||||
void MMFilesHashIndex::toVelocyPackFigures(VPackBuilder& builder) const {
|
||||
TRI_ASSERT(builder.isOpenObject());
|
||||
builder.add("memory", VPackValue(memory()));
|
||||
MMFilesPathBasedIndex::toVelocyPackFigures(builder);
|
||||
if (_unique) {
|
||||
_uniqueArray->_hashArray->appendToVelocyPack(builder);
|
||||
} else {
|
||||
|
@ -644,7 +632,7 @@ void MMFilesHashIndex::batchInsert(
|
|||
}
|
||||
}
|
||||
|
||||
int MMFilesHashIndex::unload() {
|
||||
void MMFilesHashIndex::unload() {
|
||||
if (_unique) {
|
||||
_uniqueArray->_hashArray->truncate(
|
||||
[](MMFilesHashIndexElement*) -> bool { return true; });
|
||||
|
@ -653,7 +641,6 @@ int MMFilesHashIndex::unload() {
|
|||
[](MMFilesHashIndexElement*) -> bool { return true; });
|
||||
}
|
||||
_allocator->deallocateAll();
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
/// @brief provides a size hint for the hash index
|
||||
|
|
|
@ -159,7 +159,6 @@ class MMFilesHashIndex final : public MMFilesPathBasedIndex {
|
|||
|
||||
size_t memory() const override;
|
||||
|
||||
void toVelocyPack(VPackBuilder&, bool, bool) const override;
|
||||
void toVelocyPackFigures(VPackBuilder&) const override;
|
||||
|
||||
bool matchesDefinition(VPackSlice const& info) const override;
|
||||
|
@ -175,7 +174,7 @@ class MMFilesHashIndex final : public MMFilesPathBasedIndex {
|
|||
std::vector<std::pair<TRI_voc_rid_t, arangodb::velocypack::Slice>> const&,
|
||||
std::shared_ptr<arangodb::basics::LocalTaskQueue> queue) override;
|
||||
|
||||
int unload() override;
|
||||
void unload() override;
|
||||
|
||||
int sizeHint(transaction::Methods*, size_t) override;
|
||||
|
||||
|
|
|
@ -525,6 +525,7 @@ void MMFilesLogfileManager::unprepare() {
|
|||
|
||||
if (_collectorThread != nullptr) {
|
||||
LOG_TOPIC(TRACE, arangodb::Logger::FIXME) << "stopping collector thread";
|
||||
_collectorThread->forceStop();
|
||||
while (_collectorThread->isRunning()) {
|
||||
usleep(10000);
|
||||
}
|
||||
|
|
|
@ -70,6 +70,23 @@ MMFilesPathBasedIndex::~MMFilesPathBasedIndex() {
|
|||
_allocator->deallocateAll();
|
||||
}
|
||||
|
||||
void MMFilesPathBasedIndex::toVelocyPackFigures(VPackBuilder& builder) const {
|
||||
TRI_ASSERT(builder.isOpenObject());
|
||||
builder.add("memory", VPackValue(memory()));
|
||||
}
|
||||
|
||||
/// @brief return a VelocyPack representation of the index
|
||||
void MMFilesPathBasedIndex::toVelocyPack(VPackBuilder& builder,
|
||||
bool withFigures,
|
||||
bool forPersistence) const {
|
||||
builder.openObject();
|
||||
Index::toVelocyPack(builder, withFigures, forPersistence);
|
||||
builder.add("unique", VPackValue(_unique));
|
||||
builder.add("sparse", VPackValue(_sparse));
|
||||
builder.add("deduplicate", VPackValue(_deduplicate));
|
||||
builder.close();
|
||||
}
|
||||
|
||||
/// @brief whether or not the index is implicitly unique
|
||||
/// this can be the case if the index is not declared as unique, but contains a
|
||||
/// unique attribute such as _key
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
#include "VocBase/vocbase.h"
|
||||
#include "VocBase/voc-types.h"
|
||||
|
||||
#include <velocypack/Builder.h>
|
||||
|
||||
namespace arangodb {
|
||||
namespace aql {
|
||||
enum AstNodeType : uint32_t;
|
||||
|
@ -47,6 +49,11 @@ class MMFilesPathBasedIndex : public Index {
|
|||
~MMFilesPathBasedIndex();
|
||||
|
||||
public:
|
||||
void toVelocyPackFigures(arangodb::velocypack::Builder&) const override;
|
||||
|
||||
void toVelocyPack(arangodb::velocypack::Builder& builder,
|
||||
bool withFigures,
|
||||
bool forPersistence) const override;
|
||||
|
||||
/// @brief return the attribute paths
|
||||
std::vector<std::vector<std::string>> const& paths()
|
||||
|
@ -61,7 +68,7 @@ class MMFilesPathBasedIndex : public Index {
|
|||
}
|
||||
|
||||
bool implicitlyUnique() const override;
|
||||
int load() override { return 0; }
|
||||
void load() override {}
|
||||
|
||||
protected:
|
||||
/// @brief helper function to insert a document into any index type
|
||||
|
|
|
@ -217,24 +217,6 @@ size_t MMFilesPersistentIndex::memory() const {
|
|||
return 0; // TODO
|
||||
}
|
||||
|
||||
/// @brief return a VelocyPack representation of the index
|
||||
void MMFilesPersistentIndex::toVelocyPack(VPackBuilder& builder,
|
||||
bool withFigures,
|
||||
bool forPersistence) const {
|
||||
builder.openObject();
|
||||
Index::toVelocyPack(builder, withFigures, forPersistence);
|
||||
builder.add("unique", VPackValue(_unique));
|
||||
builder.add("sparse", VPackValue(_sparse));
|
||||
builder.add("deduplicate", VPackValue(_deduplicate));
|
||||
builder.close();
|
||||
}
|
||||
|
||||
/// @brief return a VelocyPack representation of the index figures
|
||||
void MMFilesPersistentIndex::toVelocyPackFigures(VPackBuilder& builder) const {
|
||||
TRI_ASSERT(builder.isOpenObject());
|
||||
builder.add("memory", VPackValue(memory()));
|
||||
}
|
||||
|
||||
/// @brief inserts a document into the index
|
||||
Result MMFilesPersistentIndex::insert(transaction::Methods* trx,
|
||||
TRI_voc_rid_t revisionId,
|
||||
|
@ -474,11 +456,6 @@ Result MMFilesPersistentIndex::remove(transaction::Methods* trx,
|
|||
return IndexResult(res, this);
|
||||
}
|
||||
|
||||
int MMFilesPersistentIndex::unload() {
|
||||
// nothing to do
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
/// @brief called when the index is dropped
|
||||
int MMFilesPersistentIndex::drop() {
|
||||
return MMFilesPersistentIndexFeature::instance()->dropIndex(
|
||||
|
|
|
@ -123,9 +123,6 @@ class MMFilesPersistentIndex final : public MMFilesPathBasedIndex {
|
|||
|
||||
size_t memory() const override;
|
||||
|
||||
void toVelocyPack(VPackBuilder&, bool, bool) const override;
|
||||
void toVelocyPackFigures(VPackBuilder&) const override;
|
||||
|
||||
static constexpr size_t minimalPrefixSize() { return sizeof(TRI_voc_tick_t); }
|
||||
|
||||
static constexpr size_t keyPrefixSize() {
|
||||
|
@ -170,7 +167,7 @@ class MMFilesPersistentIndex final : public MMFilesPathBasedIndex {
|
|||
Result remove(transaction::Methods*, TRI_voc_rid_t,
|
||||
arangodb::velocypack::Slice const&, bool isRollback) override;
|
||||
|
||||
int unload() override;
|
||||
void unload() override {}
|
||||
|
||||
int drop() override;
|
||||
|
||||
|
|
|
@ -279,11 +279,9 @@ Result MMFilesPrimaryIndex::remove(transaction::Methods*, TRI_voc_rid_t,
|
|||
}
|
||||
|
||||
/// @brief unload the index data from memory
|
||||
int MMFilesPrimaryIndex::unload() {
|
||||
void MMFilesPrimaryIndex::unload() {
|
||||
_primaryIndex->truncate(
|
||||
[](MMFilesSimpleIndexElement const&) { return true; });
|
||||
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
/// @brief looks up an element given a key
|
||||
|
|
|
@ -150,7 +150,7 @@ class MMFilesPrimaryIndex final : public Index {
|
|||
|
||||
size_t memory() const override;
|
||||
|
||||
void toVelocyPack(VPackBuilder&, bool, bool) const override;
|
||||
void toVelocyPack(VPackBuilder&, bool withFigures, bool forPersistence) const override;
|
||||
void toVelocyPackFigures(VPackBuilder&) const override;
|
||||
|
||||
Result insert(transaction::Methods*, TRI_voc_rid_t,
|
||||
|
@ -159,8 +159,8 @@ class MMFilesPrimaryIndex final : public Index {
|
|||
Result remove(transaction::Methods*, TRI_voc_rid_t,
|
||||
arangodb::velocypack::Slice const&, bool isRollback) override;
|
||||
|
||||
int load() override { return 0; }
|
||||
int unload() override;
|
||||
void load() override {}
|
||||
void unload() override;
|
||||
|
||||
MMFilesSimpleIndexElement lookupKey(transaction::Methods*,
|
||||
VPackSlice const&) const;
|
||||
|
|
|
@ -701,23 +701,9 @@ size_t MMFilesSkiplistIndex::memory() const {
|
|||
MMFilesSkiplistIndexElement::baseMemoryUsage(_paths.size());
|
||||
}
|
||||
|
||||
/// @brief return a VelocyPack representation of the index
|
||||
void MMFilesSkiplistIndex::toVelocyPack(VPackBuilder& builder, bool withFigures,
|
||||
bool forPersistence) const {
|
||||
builder.openObject();
|
||||
{
|
||||
Index::toVelocyPack(builder, withFigures, forPersistence);
|
||||
builder.add("unique", VPackValue(_unique));
|
||||
builder.add("sparse", VPackValue(_sparse));
|
||||
builder.add("deduplicate", VPackValue(_deduplicate));
|
||||
}
|
||||
builder.close();
|
||||
}
|
||||
|
||||
/// @brief return a VelocyPack representation of the index figures
|
||||
void MMFilesSkiplistIndex::toVelocyPackFigures(VPackBuilder& builder) const {
|
||||
TRI_ASSERT(builder.isOpenObject());
|
||||
builder.add("memory", VPackValue(memory()));
|
||||
MMFilesPathBasedIndex::toVelocyPackFigures(builder);
|
||||
_skiplistIndex->appendToVelocyPack(builder);
|
||||
}
|
||||
|
||||
|
@ -824,9 +810,8 @@ Result MMFilesSkiplistIndex::remove(transaction::Methods* trx,
|
|||
return IndexResult(res, this);
|
||||
}
|
||||
|
||||
int MMFilesSkiplistIndex::unload() {
|
||||
void MMFilesSkiplistIndex::unload() {
|
||||
_skiplistIndex->truncate(true);
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
/// @brief Checks if the interval is valid. It is declared invalid if
|
||||
|
|
|
@ -282,7 +282,6 @@ class MMFilesSkiplistIndex final : public MMFilesPathBasedIndex {
|
|||
|
||||
size_t memory() const override;
|
||||
|
||||
void toVelocyPack(VPackBuilder&, bool, bool) const override;
|
||||
void toVelocyPackFigures(VPackBuilder&) const override;
|
||||
|
||||
Result insert(transaction::Methods*, TRI_voc_rid_t,
|
||||
|
@ -291,7 +290,7 @@ class MMFilesSkiplistIndex final : public MMFilesPathBasedIndex {
|
|||
Result remove(transaction::Methods*, TRI_voc_rid_t,
|
||||
arangodb::velocypack::Slice const&, bool isRollback) override;
|
||||
|
||||
int unload() override;
|
||||
void unload() override;
|
||||
|
||||
bool supportsFilterCondition(arangodb::aql::AstNode const*,
|
||||
arangodb::aql::Variable const*, size_t, size_t&,
|
||||
|
|
|
@ -1,432 +0,0 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
||||
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Jan Steemann
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "mmfiles-fulltext-handles.h"
|
||||
|
||||
#include "MMFiles/MMFilesToken.h"
|
||||
|
||||
/// @brief at what percentage of deleted documents should the handle list be
|
||||
/// cleaned?
|
||||
#define CLEANUP_THRESHOLD 0.25
|
||||
|
||||
/// @brief free a handle slot
|
||||
static void FreeSlot(TRI_fulltext_handle_slot_t* slot) {
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, slot->_documents);
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, slot->_deleted);
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, slot);
|
||||
}
|
||||
|
||||
/// @brief allocate a slot on demand
|
||||
static bool AllocateSlot(TRI_fulltext_handles_t* const handles,
|
||||
uint32_t slotNumber) {
|
||||
TRI_ASSERT(handles->_slots != nullptr);
|
||||
|
||||
if (handles->_slots[slotNumber] != nullptr) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto slot = static_cast<TRI_fulltext_handle_slot_t*>(TRI_Allocate(
|
||||
TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_fulltext_handle_slot_t)));
|
||||
|
||||
if (slot == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// allocate and clear
|
||||
slot->_documents = static_cast<TRI_voc_rid_t*>(
|
||||
TRI_Allocate(TRI_UNKNOWN_MEM_ZONE,
|
||||
sizeof(TRI_voc_rid_t) * handles->_slotSize));
|
||||
|
||||
if (slot->_documents == nullptr) {
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, slot);
|
||||
return false;
|
||||
}
|
||||
|
||||
memset(slot->_documents, 0, sizeof(TRI_voc_rid_t) * handles->_slotSize);
|
||||
|
||||
// allocate and clear deleted flags
|
||||
slot->_deleted = static_cast<uint8_t*>(TRI_Allocate(
|
||||
TRI_UNKNOWN_MEM_ZONE, sizeof(uint8_t) * handles->_slotSize));
|
||||
|
||||
if (slot->_deleted == nullptr) {
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, slot->_documents);
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, slot);
|
||||
return false;
|
||||
}
|
||||
|
||||
memset(slot->_deleted, 0, sizeof(uint8_t) * handles->_slotSize);
|
||||
|
||||
// set initial statistics
|
||||
slot->_min = UINT32_MAX; // yes, this is intentional
|
||||
slot->_max = 0;
|
||||
slot->_numUsed = 0;
|
||||
slot->_numDeleted = 0;
|
||||
|
||||
if (slotNumber == 0) {
|
||||
// first slot is an exception
|
||||
slot->_numUsed = 1;
|
||||
}
|
||||
|
||||
handles->_slots[slotNumber] = slot;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// @brief allocate or grow the slot list on demand
|
||||
static bool AllocateSlotList(TRI_fulltext_handles_t* const handles,
|
||||
uint32_t targetNumber) {
|
||||
if (targetNumber == 0) {
|
||||
// error!
|
||||
return false;
|
||||
}
|
||||
|
||||
if (targetNumber <= handles->_numSlots) {
|
||||
// nothing to do
|
||||
return true;
|
||||
}
|
||||
|
||||
TRI_fulltext_handle_slot_t** slots =
|
||||
static_cast<TRI_fulltext_handle_slot_t**>(TRI_Allocate(
|
||||
TRI_UNKNOWN_MEM_ZONE,
|
||||
sizeof(TRI_fulltext_handle_slot_t*) * targetNumber));
|
||||
|
||||
if (slots == nullptr) {
|
||||
// out of memory
|
||||
return false;
|
||||
}
|
||||
|
||||
memset(slots, 0, sizeof(TRI_fulltext_handle_slot_t*) * targetNumber);
|
||||
uint32_t currentNumber = handles->_numSlots;
|
||||
|
||||
if (currentNumber > 0) {
|
||||
// copy old slot pointers
|
||||
memcpy(slots, handles->_slots,
|
||||
sizeof(TRI_fulltext_handle_slot_t*) * currentNumber);
|
||||
}
|
||||
|
||||
if (handles->_slots != nullptr) {
|
||||
// free old list pointer
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, handles->_slots);
|
||||
}
|
||||
|
||||
// new slot is empty
|
||||
slots[targetNumber - 1] = nullptr;
|
||||
|
||||
handles->_slots = slots;
|
||||
handles->_numSlots = targetNumber;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// @brief create a handles instance
|
||||
TRI_fulltext_handles_t* TRI_CreateHandlesMMFilesFulltextIndex(uint32_t slotSize) {
|
||||
TRI_fulltext_handles_t* handles =
|
||||
static_cast<TRI_fulltext_handles_t*>(TRI_Allocate(
|
||||
TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_fulltext_handles_t)));
|
||||
|
||||
if (handles == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
handles->_numDeleted = 0;
|
||||
handles->_next = 1;
|
||||
|
||||
handles->_slotSize = slotSize;
|
||||
handles->_numSlots = 0;
|
||||
handles->_slots = nullptr;
|
||||
handles->_map = nullptr;
|
||||
|
||||
return handles;
|
||||
}
|
||||
|
||||
/// @brief free a handles instance
|
||||
void TRI_FreeHandlesMMFilesFulltextIndex(TRI_fulltext_handles_t* handles) {
|
||||
if (handles->_slots != nullptr) {
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < handles->_numSlots; ++i) {
|
||||
if (handles->_slots[i] != nullptr) {
|
||||
FreeSlot(handles->_slots[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, handles->_slots);
|
||||
}
|
||||
|
||||
if (handles->_map != nullptr) {
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, handles->_map);
|
||||
}
|
||||
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, handles);
|
||||
}
|
||||
|
||||
/// @brief get number of documents (including deleted)
|
||||
uint32_t TRI_NumHandlesHandleMMFilesFulltextIndex(
|
||||
TRI_fulltext_handles_t* const handles) {
|
||||
return (handles->_next - 1);
|
||||
}
|
||||
|
||||
/// @brief get number of deleted documents
|
||||
uint32_t TRI_NumDeletedHandleMMFilesFulltextIndex(
|
||||
TRI_fulltext_handles_t* const handles) {
|
||||
return handles->_numDeleted;
|
||||
}
|
||||
|
||||
/// @brief get handle list deletion grade
|
||||
double TRI_DeletionGradeHandleMMFilesFulltextIndex(
|
||||
TRI_fulltext_handles_t* const handles) {
|
||||
return ((double)handles->_numDeleted / (double)handles->_next);
|
||||
}
|
||||
|
||||
/// @brief whether or not the handle list should be compacted
|
||||
bool TRI_ShouldCompactHandleMMFilesFulltextIndex(
|
||||
TRI_fulltext_handles_t* const handles) {
|
||||
return (TRI_DeletionGradeHandleMMFilesFulltextIndex(handles) > CLEANUP_THRESHOLD);
|
||||
}
|
||||
|
||||
/// @brief compact the handle list. this will create a new handle list
|
||||
/// and leaves the old one untouched
|
||||
TRI_fulltext_handles_t* TRI_CompactHandleMMFilesFulltextIndex(
|
||||
TRI_fulltext_handles_t* const original) {
|
||||
TRI_fulltext_handles_t* clone;
|
||||
uint32_t originalHandle, targetHandle;
|
||||
uint32_t i;
|
||||
|
||||
TRI_fulltext_handle_t* map = static_cast<TRI_fulltext_handle_t*>(
|
||||
TRI_Allocate(TRI_UNKNOWN_MEM_ZONE,
|
||||
sizeof(TRI_fulltext_handle_t) * original->_next));
|
||||
|
||||
if (map == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
clone = TRI_CreateHandlesMMFilesFulltextIndex(original->_slotSize);
|
||||
|
||||
if (clone == nullptr) {
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, map);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
originalHandle = 1;
|
||||
targetHandle = 1;
|
||||
|
||||
for (i = 0; i < original->_numSlots; ++i) {
|
||||
TRI_fulltext_handle_slot_t* originalSlot;
|
||||
uint32_t start;
|
||||
uint32_t j;
|
||||
|
||||
if (i == 0) {
|
||||
start = 1;
|
||||
} else {
|
||||
start = 0;
|
||||
}
|
||||
|
||||
originalSlot = original->_slots[i];
|
||||
|
||||
for (j = start; j < originalSlot->_numUsed; ++j) {
|
||||
if (originalSlot->_deleted[j] == 1) {
|
||||
// printf("- setting map at #%lu to 0\n", (unsigned long) j);
|
||||
map[originalHandle++] = 0;
|
||||
} else {
|
||||
// printf("- setting map at #%lu to %lu\n", (unsigned long) j, (unsigned
|
||||
// long) targetHandle);
|
||||
map[originalHandle++] = targetHandle++;
|
||||
TRI_InsertHandleMMFilesFulltextIndex(clone, originalSlot->_documents[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clone->_map = map;
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
||||
/// @brief insert a document and return a handle for it
|
||||
TRI_fulltext_handle_t TRI_InsertHandleMMFilesFulltextIndex(
|
||||
TRI_fulltext_handles_t* const handles, const TRI_voc_rid_t document) {
|
||||
TRI_fulltext_handle_t handle;
|
||||
TRI_fulltext_handle_slot_t* slot;
|
||||
uint32_t slotNumber;
|
||||
uint32_t slotPosition;
|
||||
|
||||
if (handles == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
handle = handles->_next;
|
||||
|
||||
if (handle == UINT32_MAX - 1) {
|
||||
// out of handles
|
||||
return 0;
|
||||
}
|
||||
|
||||
slotNumber = handle / handles->_slotSize;
|
||||
slotPosition = handle % handles->_slotSize;
|
||||
|
||||
if (!AllocateSlotList(handles, slotNumber + 1)) {
|
||||
// out of memory
|
||||
return 0;
|
||||
}
|
||||
|
||||
TRI_ASSERT(handles->_slots != nullptr);
|
||||
|
||||
if (!AllocateSlot(handles, slotNumber)) {
|
||||
// out of memory
|
||||
handles->_numSlots--;
|
||||
return 0;
|
||||
}
|
||||
|
||||
slot = handles->_slots[slotNumber];
|
||||
|
||||
TRI_ASSERT(slot != nullptr);
|
||||
|
||||
// fill in document
|
||||
slot->_documents[slotPosition] = document;
|
||||
slot->_deleted[slotPosition] = 0;
|
||||
slot->_numUsed++;
|
||||
// no need to fill in deleted flag as it is initialized to false
|
||||
|
||||
if (document > slot->_max) {
|
||||
slot->_max = document;
|
||||
}
|
||||
if (document < slot->_min) {
|
||||
slot->_min = document;
|
||||
}
|
||||
|
||||
handles->_next++;
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
/// @brief mark a document as deleted in the handle list
|
||||
bool TRI_DeleteDocumentHandleMMFilesFulltextIndex(
|
||||
TRI_fulltext_handles_t* const handles, const TRI_voc_rid_t document) {
|
||||
uint32_t i;
|
||||
|
||||
if (document == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (i = 0; i < handles->_numSlots; ++i) {
|
||||
TRI_fulltext_handle_slot_t* slot;
|
||||
uint32_t lastPosition;
|
||||
uint32_t j;
|
||||
|
||||
slot = handles->_slots[i];
|
||||
TRI_ASSERT(slot != nullptr);
|
||||
lastPosition = slot->_numUsed;
|
||||
|
||||
if (slot->_min > document || slot->_max < document ||
|
||||
lastPosition <= slot->_numDeleted) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// we're in a relevant slot. now check its documents
|
||||
for (j = 0; j < lastPosition; ++j) {
|
||||
if (slot->_documents[j] == document) {
|
||||
slot->_deleted[j] = 1;
|
||||
slot->_documents[j] = 0;
|
||||
slot->_numDeleted++;
|
||||
handles->_numDeleted++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// this wasn't the correct slot unfortunately. now try next
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// @brief get the document id for a handle
|
||||
arangodb::DocumentIdentifierToken TRI_GetDocumentMMFilesFulltextIndex(
|
||||
const TRI_fulltext_handles_t* const handles,
|
||||
const TRI_fulltext_handle_t handle) {
|
||||
TRI_fulltext_handle_slot_t* slot;
|
||||
uint32_t slotNumber;
|
||||
uint32_t slotPosition;
|
||||
|
||||
slotNumber = handle / handles->_slotSize;
|
||||
#if TRI_FULLTEXT_DEBUG
|
||||
if (slotNumber >= handles->_numSlots) {
|
||||
// not found
|
||||
return arangodb::MMFilesToken{};
|
||||
}
|
||||
#endif
|
||||
|
||||
slot = handles->_slots[slotNumber];
|
||||
TRI_ASSERT(slot != nullptr);
|
||||
|
||||
slotPosition = handle % handles->_slotSize;
|
||||
if (slot->_deleted[slotPosition]) {
|
||||
// document was deleted
|
||||
return arangodb::MMFilesToken{};
|
||||
}
|
||||
|
||||
return arangodb::MMFilesToken{slot->_documents[slotPosition]};
|
||||
}
|
||||
|
||||
/// @brief dump all handles
|
||||
#if TRI_FULLTEXT_DEBUG
|
||||
void TRI_DumpHandleMMFilesFulltextIndex(TRI_fulltext_handles_t* const handles) {
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < handles->_numSlots; ++i) {
|
||||
TRI_fulltext_handle_slot_t* slot = handles->_slots[i];
|
||||
|
||||
if (slot == nullptr) {
|
||||
continue;
|
||||
}
|
||||
TRI_ASSERT(slot != nullptr);
|
||||
|
||||
printf("- slot %lu (%lu used, %lu deleted)\n", (unsigned long)i,
|
||||
(unsigned long)slot->_numUsed, (unsigned long)slot->_numDeleted);
|
||||
|
||||
// we're in a relevant slot. now check its documents
|
||||
for (uint32_t j = 0; j < slot->_numUsed; ++j) {
|
||||
printf(" - #%lu %d %llu\n",
|
||||
(unsigned long)(i * handles->_slotSize + j),
|
||||
(int)slot->_deleted[j], (unsigned long long)slot->_documents[j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/// @brief return the memory usage for the handles
|
||||
size_t TRI_MemoryHandleMMFilesFulltextIndex(
|
||||
const TRI_fulltext_handles_t* const handles) {
|
||||
size_t memory;
|
||||
size_t perSlot;
|
||||
uint32_t numSlots;
|
||||
|
||||
numSlots = handles->_numSlots;
|
||||
|
||||
perSlot = (sizeof(TRI_voc_rid_t) + sizeof(uint8_t)) * handles->_slotSize;
|
||||
|
||||
// slots list
|
||||
memory = sizeof(TRI_fulltext_handle_slot_t*) * numSlots;
|
||||
// slot memory
|
||||
memory += (sizeof(TRI_fulltext_handle_slot_t) + perSlot) * numSlots;
|
||||
|
||||
return memory;
|
||||
}
|
|
@ -1,138 +0,0 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
||||
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Jan Steemann
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ARANGOD_MMFILES_MMFILES_FULLTEXT_HANDLES_H
|
||||
#define ARANGOD_MMFILES_MMFILES_FULLTEXT_HANDLES_H 1
|
||||
|
||||
#include "mmfiles-fulltext-common.h"
|
||||
#include "VocBase/voc-types.h"
|
||||
|
||||
/// @brief typedef for a fulltext handle entry
|
||||
typedef uint32_t TRI_fulltext_handle_t;
|
||||
|
||||
namespace arangodb {
|
||||
struct DocumentIdentifierToken;
|
||||
}
|
||||
|
||||
/// @brief a slot containing _numUsed handles and has some statistics about
|
||||
/// itself
|
||||
///
|
||||
/// the fulltext index will not store document ids in its nodes, because that
|
||||
/// will be complicated in the case of deleting a document. in this case, all
|
||||
/// nodes would need to be traversed to find where the document was referenced.
|
||||
/// this would be too slow. instead of storing document ids, a node stores
|
||||
/// handles. handles are increasing integer numbers that are each mapped to a
|
||||
/// specific document. when a document is deleted from the index, its handle is
|
||||
/// marked as deleted, but the handle value may remain stored in one or many
|
||||
/// index nodes. handles of deleted documents are removed from result sets at
|
||||
/// the end of each index query on-the-fly, so query results are still correct.
|
||||
/// To finally get rid of handles of deleted documents, the index can perform
|
||||
/// a compaction. The compaction rewrites a new, dense handle list consisting
|
||||
/// with only handles that point to existing documents. The old handles used in
|
||||
/// nodes become invalid by this, so the handles stores in the nodes have to
|
||||
/// be rewritten. When the rewrite is done, the old handle list is freed and
|
||||
/// the new one is put in place.
|
||||
///
|
||||
/// Inserting a new document will simply allocate a new handle, and the handle
|
||||
/// will be stored for the node. We simply assign the next handle number for
|
||||
/// the document. After that, we can quickly look up the document id for a
|
||||
/// handle value. It's more tricky the other way around, because there is no
|
||||
/// simple mapping from document ids to handles. To find the handle for a
|
||||
/// document id, we have to check all handles already used.
|
||||
/// As this would mean traversing over all handles used and comparing their
|
||||
/// document values with the sought document id, there is some optimisation:
|
||||
/// handles are stored in slots of fixed sizes. Each slot has some statistics
|
||||
/// about the number of used and deleted documents/handles in it, as well as
|
||||
/// its min and max document values.
|
||||
/// When looking for a specific document id in all handles in the case of
|
||||
/// deletion, the slot statistics are used to early prune non-relevant slots
|
||||
/// from
|
||||
/// the further search. The simple min/max document id check implemented is
|
||||
/// sufficient because normally document memory is contiguous so the pointers
|
||||
/// to documents are just adjacent (second pointer is higher than first
|
||||
/// pointer).
|
||||
/// This is only true for documents that are created on the same memory page
|
||||
/// but this should be the common case to optimize for.
|
||||
typedef struct TRI_fulltext_handle_slot_s {
|
||||
uint32_t _numUsed; // number of handles used in slot
|
||||
uint32_t _numDeleted; // number of deleted handles in slot
|
||||
TRI_voc_rid_t _min; // minimum handle value in slot
|
||||
TRI_voc_rid_t _max; // maximum handle value in slot
|
||||
TRI_voc_rid_t* _documents; // document ids for the slots
|
||||
uint8_t* _deleted; // deleted flags for the slots
|
||||
} TRI_fulltext_handle_slot_t;
|
||||
|
||||
/// @brief typedef for a fulltext handles instance
|
||||
typedef struct TRI_fulltext_handles_s {
|
||||
TRI_fulltext_handle_t _next; // next handle to use
|
||||
uint32_t _numSlots; // current number of slots
|
||||
TRI_fulltext_handle_slot_t** _slots; // pointers to slots
|
||||
uint32_t _slotSize; // the size of each slot
|
||||
uint32_t _numDeleted; // total number of deleted documents
|
||||
TRI_fulltext_handle_t* _map; // a temporary map for remapping existing
|
||||
// handles to new handles during compaction
|
||||
} TRI_fulltext_handles_t;
|
||||
|
||||
/// @brief create a handles instance
|
||||
TRI_fulltext_handles_t* TRI_CreateHandlesMMFilesFulltextIndex(const uint32_t);
|
||||
|
||||
/// @brief free a handles instance
|
||||
void TRI_FreeHandlesMMFilesFulltextIndex(TRI_fulltext_handles_t*);
|
||||
|
||||
/// @brief get number of documents (including deleted)
|
||||
uint32_t TRI_NumHandlesHandleMMFilesFulltextIndex(TRI_fulltext_handles_t* const);
|
||||
|
||||
/// @brief get number of deleted documents
|
||||
uint32_t TRI_NumDeletedHandleMMFilesFulltextIndex(TRI_fulltext_handles_t* const);
|
||||
|
||||
/// @brief get handle list fill grade
|
||||
double TRI_DeletionGradeHandleMMFilesFulltextIndex(TRI_fulltext_handles_t* const);
|
||||
|
||||
/// @brief whether or not the handle list should be compacted
|
||||
bool TRI_ShouldCompactHandleMMFilesFulltextIndex(TRI_fulltext_handles_t* const);
|
||||
|
||||
/// @brief compact the handle list
|
||||
TRI_fulltext_handles_t* TRI_CompactHandleMMFilesFulltextIndex(
|
||||
TRI_fulltext_handles_t* const);
|
||||
|
||||
/// @brief insert a document and return a handle for it
|
||||
TRI_fulltext_handle_t TRI_InsertHandleMMFilesFulltextIndex(
|
||||
TRI_fulltext_handles_t* const, const TRI_voc_rid_t);
|
||||
|
||||
/// @brief mark a document as deleted in the handle list
|
||||
bool TRI_DeleteDocumentHandleMMFilesFulltextIndex(TRI_fulltext_handles_t* const,
|
||||
const TRI_voc_rid_t);
|
||||
|
||||
/// @brief get the document id for a handle
|
||||
arangodb::DocumentIdentifierToken TRI_GetDocumentMMFilesFulltextIndex(
|
||||
const TRI_fulltext_handles_t* const, const TRI_fulltext_handle_t);
|
||||
|
||||
/// @brief dump all handles
|
||||
#if TRI_FULLTEXT_DEBUG
|
||||
void TRI_DumpHandleMMFilesFulltextIndex(TRI_fulltext_handles_t* const);
|
||||
#endif
|
||||
|
||||
/// @brief return the memory usage for the handles
|
||||
size_t TRI_MemoryHandleMMFilesFulltextIndex(const TRI_fulltext_handles_t* const);
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -50,10 +50,6 @@ typedef struct TRI_fulltext_stats_s {
|
|||
size_t _memoryDocuments;
|
||||
uint32_t _numNodes;
|
||||
#endif
|
||||
size_t _memoryHandles;
|
||||
uint32_t _numDocuments;
|
||||
uint32_t _numDeleted;
|
||||
double _handleDeletionGrade;
|
||||
bool _shouldCompact;
|
||||
} TRI_fulltext_stats_t;
|
||||
|
||||
|
@ -65,51 +61,25 @@ void TRI_FreeFtsIndex(TRI_fts_index_t*);
|
|||
|
||||
void TRI_TruncateMMFilesFulltextIndex(TRI_fts_index_t*);
|
||||
|
||||
/// @brief delete a document from the index
|
||||
void TRI_DeleteDocumentMMFilesFulltextIndex(TRI_fts_index_t* const,
|
||||
TRI_voc_rid_t);
|
||||
/// @brief insert a list of words to the index
|
||||
int TRI_InsertWordsMMFilesFulltextIndex(TRI_fts_index_t*,
|
||||
TRI_voc_rid_t,
|
||||
std::set<std::string> const&);
|
||||
|
||||
/// @brief insert a list of words to the index
|
||||
bool TRI_InsertWordsMMFilesFulltextIndex(TRI_fts_index_t* const,
|
||||
TRI_voc_rid_t,
|
||||
std::set<std::string> const&);
|
||||
|
||||
/// @brief find all documents that contain a word (exact match)
|
||||
#if 0
|
||||
struct TRI_fulltext_result_s* TRI_FindExactMMFilesFulltextIndex (TRI_fts_index_t* const,
|
||||
char const* const,
|
||||
size_t const);
|
||||
#endif
|
||||
|
||||
/// @brief find all documents that contain a word (prefix match)
|
||||
#if 0
|
||||
struct TRI_fulltext_result_s* TRI_FindPrefixMMFilesFulltextIndex (TRI_fts_index_t* const,
|
||||
char const*,
|
||||
size_t const);
|
||||
#endif
|
||||
int TRI_RemoveWordsMMFilesFulltextIndex(TRI_fts_index_t*,
|
||||
TRI_voc_rid_t,
|
||||
std::set<std::string> const&);
|
||||
|
||||
/// @brief execute a query on the fulltext index
|
||||
/// note: this will free the query
|
||||
struct TRI_fulltext_result_s* TRI_QueryMMFilesFulltextIndex(
|
||||
std::set<TRI_voc_rid_t> TRI_QueryMMFilesFulltextIndex(
|
||||
TRI_fts_index_t* const, struct TRI_fulltext_query_s*);
|
||||
|
||||
/// @brief dump index tree
|
||||
#if TRI_FULLTEXT_DEBUG
|
||||
void TRI_DumpTreeFtsIndex(TRI_fts_index_t*);
|
||||
#endif
|
||||
|
||||
/// @brief dump index statistics
|
||||
#if TRI_FULLTEXT_DEBUG
|
||||
void TRI_DumpStatsFtsIndex(TRI_fts_index_t*);
|
||||
#endif
|
||||
|
||||
/// @brief return stats about the index
|
||||
TRI_fulltext_stats_t TRI_StatsMMFilesFulltextIndex(TRI_fts_index_t*);
|
||||
|
||||
/// @brief return the total memory used by the index
|
||||
size_t TRI_MemoryMMFilesFulltextIndex(TRI_fts_index_t*);
|
||||
|
||||
/// @brief compact the fulltext index
|
||||
bool TRI_CompactMMFilesFulltextIndex(TRI_fts_index_t*);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -34,25 +34,11 @@
|
|||
/// @brief growth factor for lists
|
||||
#define GROWTH_FACTOR 1.2
|
||||
|
||||
/// @brief compare two entries in a list
|
||||
static int CompareEntries(const void* lhs, const void* rhs) {
|
||||
TRI_fulltext_list_entry_t l = (*(TRI_fulltext_list_entry_t*)lhs);
|
||||
TRI_fulltext_list_entry_t r = (*(TRI_fulltext_list_entry_t*)rhs);
|
||||
|
||||
if (l < r) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (l > r) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#include "Logger/Logger.h"
|
||||
|
||||
/// @brief return whether the list is sorted
|
||||
/// this will check the sorted bit at the start of the list
|
||||
static inline bool IsSorted(const TRI_fulltext_list_t* const list) {
|
||||
static inline bool IsSorted(TRI_fulltext_list_t const* list) {
|
||||
uint32_t* head = (uint32_t*)list;
|
||||
|
||||
return ((*head & SORTED_BIT) != 0);
|
||||
|
@ -103,6 +89,51 @@ static inline uint32_t GetNumAllocated(TRI_fulltext_list_t const* list) {
|
|||
return (*head & ~SORTED_BIT);
|
||||
}
|
||||
|
||||
static uint32_t FindListEntry(TRI_fulltext_list_t* list,
|
||||
TRI_fulltext_list_entry_t* listEntries,
|
||||
uint32_t numEntries,
|
||||
TRI_fulltext_list_entry_t entry) {
|
||||
if (numEntries >= 10 && IsSorted(list)) {
|
||||
// binary search
|
||||
uint32_t l = 0;
|
||||
uint32_t r = numEntries - 1;
|
||||
|
||||
while (true) {
|
||||
// determine midpoint
|
||||
uint32_t m = l + ((r - l) / 2);
|
||||
TRI_fulltext_list_entry_t value = listEntries[m];
|
||||
if (value == entry) {
|
||||
return m;
|
||||
}
|
||||
|
||||
if (value > entry) {
|
||||
if (m == 0) {
|
||||
// we must abort because the following subtraction would
|
||||
// make the uin32_t underflow to UINT32_MAX!
|
||||
break;
|
||||
}
|
||||
// this is safe
|
||||
r = m - 1;
|
||||
} else {
|
||||
l = m + 1;
|
||||
}
|
||||
|
||||
if (r < l) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// linear search
|
||||
for (uint32_t i = 0; i < numEntries; ++i) {
|
||||
if (listEntries[i] == entry) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
/// @brief initialize a new list
|
||||
static void InitList(TRI_fulltext_list_t* list, uint32_t size) {
|
||||
uint32_t* head = (uint32_t*)list;
|
||||
|
@ -111,24 +142,6 @@ static void InitList(TRI_fulltext_list_t* list, uint32_t size) {
|
|||
*(head) = 0;
|
||||
}
|
||||
|
||||
/// @brief sort a list in place
|
||||
static void SortList(TRI_fulltext_list_t* list) {
|
||||
if (IsSorted(list)) {
|
||||
// nothing to do
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t numEntries = GetNumEntries(list);
|
||||
|
||||
if (numEntries > 1) {
|
||||
// only sort if more than one elements
|
||||
qsort(GetStart(list), numEntries, sizeof(TRI_fulltext_list_entry_t),
|
||||
&CompareEntries);
|
||||
}
|
||||
|
||||
SetIsSorted(list, true);
|
||||
}
|
||||
|
||||
/// @brief get the memory usage for a list of the specified size
|
||||
static inline size_t MemoryList(uint32_t size) {
|
||||
return sizeof(uint32_t) + // numAllocated
|
||||
|
@ -149,6 +162,21 @@ static TRI_fulltext_list_t* IncreaseList(TRI_fulltext_list_t* list,
|
|||
return copy;
|
||||
}
|
||||
|
||||
void TRI_CloneListMMFilesFulltextIndex(TRI_fulltext_list_t const* source,
|
||||
std::set<TRI_voc_rid_t>& result) {
|
||||
if (source == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t numEntries = GetNumEntries(source);
|
||||
if (numEntries > 0) {
|
||||
TRI_fulltext_list_entry_t* entries = GetStart(source);
|
||||
for (uint32_t i = 0; i < numEntries; ++i) {
|
||||
result.emplace(entries[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief clone a list by copying an existing one
|
||||
TRI_fulltext_list_t* TRI_CloneListMMFilesFulltextIndex(
|
||||
TRI_fulltext_list_t const* source) {
|
||||
|
@ -199,256 +227,10 @@ size_t TRI_MemoryListMMFilesFulltextIndex(TRI_fulltext_list_t const* list) {
|
|||
return MemoryList(size);
|
||||
}
|
||||
|
||||
/// @brief unionise two lists (a.k.a. logical OR)
|
||||
/// this will create a new list and free both lhs & rhs
|
||||
TRI_fulltext_list_t* TRI_UnioniseListMMFilesFulltextIndex(TRI_fulltext_list_t* lhs,
|
||||
TRI_fulltext_list_t* rhs) {
|
||||
TRI_fulltext_list_t* list;
|
||||
TRI_fulltext_list_entry_t last;
|
||||
TRI_fulltext_list_entry_t* lhsEntries;
|
||||
TRI_fulltext_list_entry_t* rhsEntries;
|
||||
TRI_fulltext_list_entry_t* listEntries;
|
||||
uint32_t l, r;
|
||||
uint32_t numLhs, numRhs;
|
||||
uint32_t listPos;
|
||||
|
||||
if (lhs == nullptr) {
|
||||
return rhs;
|
||||
}
|
||||
if (rhs == nullptr) {
|
||||
return lhs;
|
||||
}
|
||||
|
||||
numLhs = GetNumEntries(lhs);
|
||||
numRhs = GetNumEntries(rhs);
|
||||
|
||||
// check the easy cases when one of the lists is empty
|
||||
if (numLhs == 0) {
|
||||
TRI_FreeListMMFilesFulltextIndex(lhs);
|
||||
return rhs;
|
||||
}
|
||||
|
||||
if (numRhs == 0) {
|
||||
TRI_FreeListMMFilesFulltextIndex(rhs);
|
||||
return lhs;
|
||||
}
|
||||
|
||||
list = TRI_CreateListMMFilesFulltextIndex(numLhs + numRhs);
|
||||
if (list == nullptr) {
|
||||
TRI_FreeListMMFilesFulltextIndex(lhs);
|
||||
TRI_FreeListMMFilesFulltextIndex(rhs);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SortList(lhs);
|
||||
lhsEntries = GetStart(lhs);
|
||||
l = 0;
|
||||
|
||||
SortList(rhs);
|
||||
rhsEntries = GetStart(rhs);
|
||||
r = 0;
|
||||
|
||||
listPos = 0;
|
||||
listEntries = GetStart(list);
|
||||
last = 0;
|
||||
|
||||
while (true) {
|
||||
while (l < numLhs && lhsEntries[l] <= last) {
|
||||
++l;
|
||||
}
|
||||
|
||||
while (r < numRhs && rhsEntries[r] <= last) {
|
||||
++r;
|
||||
}
|
||||
|
||||
if (l >= numLhs && r >= numRhs) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (l >= numLhs && r < numRhs) {
|
||||
listEntries[listPos++] = last = rhsEntries[r++];
|
||||
} else if (l < numLhs && r >= numRhs) {
|
||||
listEntries[listPos++] = last = lhsEntries[l++];
|
||||
} else if (lhsEntries[l] < rhsEntries[r]) {
|
||||
listEntries[listPos++] = last = lhsEntries[l++];
|
||||
} else {
|
||||
listEntries[listPos++] = last = rhsEntries[r++];
|
||||
}
|
||||
}
|
||||
|
||||
SetNumEntries(list, listPos);
|
||||
SetIsSorted(list, true);
|
||||
|
||||
TRI_FreeListMMFilesFulltextIndex(lhs);
|
||||
TRI_FreeListMMFilesFulltextIndex(rhs);
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
/// @brief intersect two lists (a.k.a. logical AND)
|
||||
/// this will create a new list and free both lhs & rhs
|
||||
TRI_fulltext_list_t* TRI_IntersectListMMFilesFulltextIndex(TRI_fulltext_list_t* lhs,
|
||||
TRI_fulltext_list_t* rhs) {
|
||||
TRI_fulltext_list_t* list;
|
||||
TRI_fulltext_list_entry_t last;
|
||||
TRI_fulltext_list_entry_t* lhsEntries;
|
||||
TRI_fulltext_list_entry_t* rhsEntries;
|
||||
TRI_fulltext_list_entry_t* listEntries;
|
||||
uint32_t l, r;
|
||||
uint32_t numLhs, numRhs;
|
||||
uint32_t listPos;
|
||||
|
||||
// check if one of the pointers is NULL
|
||||
if (lhs == nullptr) {
|
||||
return rhs;
|
||||
}
|
||||
|
||||
if (rhs == nullptr) {
|
||||
return lhs;
|
||||
}
|
||||
|
||||
numLhs = GetNumEntries(lhs);
|
||||
numRhs = GetNumEntries(rhs);
|
||||
|
||||
// printf("list intersection lhs: %lu rhs: %lu\n\n", (unsigned long) numLhs,
|
||||
// (unsigned long) numRhs);
|
||||
|
||||
// check the easy cases when one of the lists is empty
|
||||
if (numLhs == 0 || numRhs == 0) {
|
||||
if (lhs != nullptr) {
|
||||
TRI_FreeListMMFilesFulltextIndex(lhs);
|
||||
}
|
||||
if (rhs != nullptr) {
|
||||
TRI_FreeListMMFilesFulltextIndex(rhs);
|
||||
}
|
||||
|
||||
return TRI_CreateListMMFilesFulltextIndex(0);
|
||||
}
|
||||
|
||||
// we have at least one entry in each list
|
||||
list = TRI_CreateListMMFilesFulltextIndex(numLhs < numRhs ? numLhs : numRhs);
|
||||
if (list == nullptr) {
|
||||
TRI_FreeListMMFilesFulltextIndex(lhs);
|
||||
TRI_FreeListMMFilesFulltextIndex(rhs);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SortList(lhs);
|
||||
lhsEntries = GetStart(lhs);
|
||||
l = 0;
|
||||
|
||||
SortList(rhs);
|
||||
rhsEntries = GetStart(rhs);
|
||||
r = 0;
|
||||
|
||||
listPos = 0;
|
||||
listEntries = GetStart(list);
|
||||
last = 0;
|
||||
|
||||
while (true) {
|
||||
while (l < numLhs && lhsEntries[l] <= last) {
|
||||
++l;
|
||||
}
|
||||
|
||||
while (r < numRhs && rhsEntries[r] <= last) {
|
||||
++r;
|
||||
}
|
||||
|
||||
again:
|
||||
if (l >= numLhs || r >= numRhs) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (lhsEntries[l] < rhsEntries[r]) {
|
||||
++l;
|
||||
goto again;
|
||||
} else if (lhsEntries[l] > rhsEntries[r]) {
|
||||
++r;
|
||||
goto again;
|
||||
}
|
||||
|
||||
// match
|
||||
listEntries[listPos++] = last = lhsEntries[l];
|
||||
++l;
|
||||
++r;
|
||||
}
|
||||
|
||||
SetNumEntries(list, listPos);
|
||||
SetIsSorted(list, true);
|
||||
|
||||
TRI_FreeListMMFilesFulltextIndex(lhs);
|
||||
TRI_FreeListMMFilesFulltextIndex(rhs);
|
||||
|
||||
// printf("result list has %lu\n\n", (unsigned long) listPos);
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
/// @brief exclude values from a list
|
||||
/// this will modify list in place
|
||||
TRI_fulltext_list_t* TRI_ExcludeListMMFilesFulltextIndex(
|
||||
TRI_fulltext_list_t* list, TRI_fulltext_list_t* exclude) {
|
||||
TRI_fulltext_list_entry_t* listEntries;
|
||||
TRI_fulltext_list_entry_t* excludeEntries;
|
||||
uint32_t numEntries;
|
||||
uint32_t numExclude;
|
||||
uint32_t i, j, listPos;
|
||||
|
||||
if (list == nullptr) {
|
||||
TRI_FreeListMMFilesFulltextIndex(exclude);
|
||||
return list;
|
||||
}
|
||||
|
||||
if (exclude == nullptr) {
|
||||
return list;
|
||||
}
|
||||
|
||||
numEntries = GetNumEntries(list);
|
||||
numExclude = GetNumEntries(exclude);
|
||||
|
||||
if (numEntries == 0 || numExclude == 0) {
|
||||
// original list or exclusion list are empty
|
||||
TRI_FreeListMMFilesFulltextIndex(exclude);
|
||||
return list;
|
||||
}
|
||||
|
||||
SortList(list);
|
||||
|
||||
listEntries = GetStart(list);
|
||||
excludeEntries = GetStart(exclude);
|
||||
|
||||
j = 0;
|
||||
listPos = 0;
|
||||
for (i = 0; i < numEntries; ++i) {
|
||||
TRI_fulltext_list_entry_t entry;
|
||||
|
||||
entry = listEntries[i];
|
||||
while (j < numExclude && excludeEntries[j] < entry) {
|
||||
++j;
|
||||
}
|
||||
|
||||
if (j < numExclude && excludeEntries[j] == entry) {
|
||||
// entry is contained in exclusion list
|
||||
continue;
|
||||
}
|
||||
|
||||
if (listPos != i) {
|
||||
listEntries[listPos] = listEntries[i];
|
||||
}
|
||||
++listPos;
|
||||
}
|
||||
|
||||
// we may have less results in the list of exclusion
|
||||
SetNumEntries(list, listPos);
|
||||
TRI_FreeListMMFilesFulltextIndex(exclude);
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
/// @brief insert an element into a list
|
||||
/// this might free the old list and allocate a new, bigger one
|
||||
TRI_fulltext_list_t* TRI_InsertListMMFilesFulltextIndex(
|
||||
TRI_fulltext_list_t* list, const TRI_fulltext_list_entry_t entry) {
|
||||
TRI_fulltext_list_t* list, TRI_fulltext_list_entry_t entry) {
|
||||
TRI_fulltext_list_entry_t* listEntries;
|
||||
uint32_t numAllocated;
|
||||
uint32_t numEntries;
|
||||
|
@ -512,77 +294,60 @@ TRI_fulltext_list_t* TRI_InsertListMMFilesFulltextIndex(
|
|||
return list;
|
||||
}
|
||||
|
||||
/// @brief rewrites the list of entries using a map of handles
|
||||
/// returns the number of entries remaining in the list after rewrite
|
||||
/// the map is provided by the routines that handle the compaction
|
||||
uint32_t TRI_RewriteListMMFilesFulltextIndex(TRI_fulltext_list_t* list,
|
||||
void const* data) {
|
||||
TRI_fulltext_list_entry_t* listEntries;
|
||||
TRI_fulltext_list_entry_t* map;
|
||||
uint32_t numEntries;
|
||||
uint32_t i, j;
|
||||
/// @brief remove an element from a list
|
||||
/// this might free the old list and allocate a new, smaller one
|
||||
TRI_fulltext_list_t* TRI_RemoveListMMFilesFulltextIndex(
|
||||
TRI_fulltext_list_t* list, TRI_fulltext_list_entry_t entry) {
|
||||
if (list == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint32_t numEntries = GetNumEntries(list);
|
||||
|
||||
numEntries = GetNumEntries(list);
|
||||
if (numEntries == 0) {
|
||||
return 0;
|
||||
// definitely not contained...
|
||||
return list;
|
||||
}
|
||||
|
||||
TRI_fulltext_list_entry_t* listEntries = GetStart(list);
|
||||
uint32_t i = FindListEntry(list, listEntries, numEntries, entry);
|
||||
|
||||
if (i == UINT32_MAX) {
|
||||
// not found
|
||||
return list;
|
||||
}
|
||||
|
||||
// found!
|
||||
--numEntries;
|
||||
|
||||
if (numEntries == 0) {
|
||||
// free all memory
|
||||
TRI_FreeListMMFilesFulltextIndex(list);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
map = (TRI_fulltext_list_entry_t*)data;
|
||||
listEntries = GetStart(list);
|
||||
j = 0;
|
||||
while (i < numEntries) {
|
||||
listEntries[i] = listEntries[i + 1];
|
||||
++i;
|
||||
}
|
||||
|
||||
for (i = 0; i < numEntries; ++i) {
|
||||
TRI_fulltext_list_entry_t entry;
|
||||
TRI_fulltext_list_entry_t mapped;
|
||||
SetNumEntries(list, numEntries);
|
||||
|
||||
uint32_t numAllocated = GetNumAllocated(list);
|
||||
|
||||
entry = listEntries[i];
|
||||
if (entry == 0) {
|
||||
continue;
|
||||
if (numAllocated > 4 && numEntries < numAllocated / 2) {
|
||||
// list is only half full now
|
||||
TRI_fulltext_list_t* clone = TRI_CloneListMMFilesFulltextIndex(list);
|
||||
|
||||
if (clone != nullptr) {
|
||||
TRI_FreeListMMFilesFulltextIndex(list);
|
||||
return clone;
|
||||
}
|
||||
|
||||
mapped = map[entry];
|
||||
if (mapped == 0) {
|
||||
// original value has been deleted
|
||||
continue;
|
||||
}
|
||||
|
||||
listEntries[j++] = mapped;
|
||||
}
|
||||
|
||||
if (j != numEntries) {
|
||||
SetNumEntries(list, j);
|
||||
}
|
||||
|
||||
return j;
|
||||
return list;
|
||||
}
|
||||
|
||||
/// @brief dump the contents of a list
|
||||
#if TRI_FULLTEXT_DEBUG
|
||||
void TRI_DumpListMMFilesFulltextIndex(TRI_fulltext_list_t const* list) {
|
||||
TRI_fulltext_list_entry_t* listEntries;
|
||||
uint32_t numEntries;
|
||||
uint32_t i;
|
||||
|
||||
numEntries = GetNumEntries(list);
|
||||
listEntries = GetStart(list);
|
||||
|
||||
printf("(");
|
||||
|
||||
for (i = 0; i < numEntries; ++i) {
|
||||
TRI_fulltext_list_entry_t entry;
|
||||
|
||||
if (i > 0) {
|
||||
printf(", ");
|
||||
}
|
||||
|
||||
entry = listEntries[i];
|
||||
printf("%lu", (unsigned long)entry);
|
||||
}
|
||||
|
||||
printf(")");
|
||||
}
|
||||
#endif
|
||||
|
||||
/// @brief return the number of entries
|
||||
uint32_t TRI_NumEntriesListMMFilesFulltextIndex(TRI_fulltext_list_t const* list) {
|
||||
return GetNumEntries(list);
|
||||
|
|
|
@ -25,14 +25,18 @@
|
|||
#define ARANGOD_MMFILES_MMFILES_FULLTEXT_LIST_H 1
|
||||
|
||||
#include "mmfiles-fulltext-common.h"
|
||||
#include "VocBase/voc-types.h"
|
||||
|
||||
/// @brief typedef for a fulltext list
|
||||
typedef void TRI_fulltext_list_t;
|
||||
|
||||
/// @brief typedef for a fulltext list entry
|
||||
typedef uint32_t TRI_fulltext_list_entry_t;
|
||||
typedef TRI_voc_rid_t TRI_fulltext_list_entry_t;
|
||||
|
||||
/// @brief clone a list
|
||||
void TRI_CloneListMMFilesFulltextIndex(TRI_fulltext_list_t const*,
|
||||
std::set<TRI_voc_rid_t>& result);
|
||||
|
||||
TRI_fulltext_list_t* TRI_CloneListMMFilesFulltextIndex(TRI_fulltext_list_t const*);
|
||||
|
||||
/// @brief create a list
|
||||
|
@ -44,34 +48,15 @@ void TRI_FreeListMMFilesFulltextIndex(TRI_fulltext_list_t*);
|
|||
/// @brief get the memory usage of a list
|
||||
size_t TRI_MemoryListMMFilesFulltextIndex(TRI_fulltext_list_t const*);
|
||||
|
||||
/// @brief unionise two lists
|
||||
/// this will create a new list and free both lhs & rhs
|
||||
TRI_fulltext_list_t* TRI_UnioniseListMMFilesFulltextIndex(TRI_fulltext_list_t*,
|
||||
TRI_fulltext_list_t*);
|
||||
|
||||
/// @brief intersect two lists
|
||||
/// this will create a new list and free both lhs & rhs
|
||||
TRI_fulltext_list_t* TRI_IntersectListMMFilesFulltextIndex(TRI_fulltext_list_t*,
|
||||
TRI_fulltext_list_t*);
|
||||
|
||||
/// @brief exclude values from a list
|
||||
/// this will modify the result in place
|
||||
TRI_fulltext_list_t* TRI_ExcludeListMMFilesFulltextIndex(TRI_fulltext_list_t*,
|
||||
TRI_fulltext_list_t*);
|
||||
|
||||
/// @brief insert an element into a list
|
||||
/// this might free the old list and allocate a new, bigger one
|
||||
TRI_fulltext_list_t* TRI_InsertListMMFilesFulltextIndex(
|
||||
TRI_fulltext_list_t*, const TRI_fulltext_list_entry_t);
|
||||
TRI_fulltext_list_t*, TRI_fulltext_list_entry_t);
|
||||
|
||||
/// @brief rewrites the list of entries using a map of values
|
||||
/// returns the number of entries remaining in the list after rewrite
|
||||
uint32_t TRI_RewriteListMMFilesFulltextIndex(TRI_fulltext_list_t*, void const*);
|
||||
|
||||
/// @brief dump a list
|
||||
#if TRI_FULLTEXT_DEBUG
|
||||
void TRI_DumpListMMFilesFulltextIndex(TRI_fulltext_list_t const*);
|
||||
#endif
|
||||
/// @brief remove an element from a list
|
||||
/// this might free the old list and allocate a new, smaller one
|
||||
TRI_fulltext_list_t* TRI_RemoveListMMFilesFulltextIndex(
|
||||
TRI_fulltext_list_t*, TRI_fulltext_list_entry_t);
|
||||
|
||||
/// @brief return the number of entries
|
||||
uint32_t TRI_NumEntriesListMMFilesFulltextIndex(TRI_fulltext_list_t const*);
|
||||
|
|
|
@ -1,63 +0,0 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
||||
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Jan Steemann
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "mmfiles-fulltext-result.h"
|
||||
#include "StorageEngine/DocumentIdentifierToken.h"
|
||||
|
||||
/// @brief create a result
|
||||
TRI_fulltext_result_t* TRI_CreateResultMMFilesFulltextIndex(const uint32_t size) {
|
||||
TRI_fulltext_result_t* result = static_cast<TRI_fulltext_result_t*>(
|
||||
TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_fulltext_result_t)));
|
||||
|
||||
if (result == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
result->_documents = nullptr;
|
||||
result->_numDocuments = 0;
|
||||
|
||||
if (size > 0) {
|
||||
result->_documents = static_cast<arangodb::DocumentIdentifierToken*>(TRI_Allocate(
|
||||
TRI_UNKNOWN_MEM_ZONE, sizeof(arangodb::DocumentIdentifierToken) * size));
|
||||
|
||||
if (result->_documents == nullptr) {
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, result);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// @brief destroy a result
|
||||
void TRI_DestroyResultMMFilesFulltextIndex(TRI_fulltext_result_t* result) {
|
||||
if (result->_documents != nullptr) {
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, result->_documents);
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief free a result
|
||||
void TRI_FreeResultMMFilesFulltextIndex(TRI_fulltext_result_t* result) {
|
||||
TRI_DestroyResultMMFilesFulltextIndex(result);
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, result);
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
||||
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Jan Steemann
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ARANGOD_MMFILES_MMFILES_FULLTEXT_RESULT_H
|
||||
#define ARANGOD_MMFILES_MMFILES_FULLTEXT_RESULT_H 1
|
||||
|
||||
#include "mmfiles-fulltext-common.h"
|
||||
|
||||
// Forward declarations
|
||||
namespace arangodb {
|
||||
struct DocumentIdentifierToken;
|
||||
}
|
||||
|
||||
/// @brief typedef for a fulltext result list
|
||||
typedef struct TRI_fulltext_result_s {
|
||||
uint32_t _numDocuments;
|
||||
arangodb::DocumentIdentifierToken* _documents;
|
||||
} TRI_fulltext_result_t;
|
||||
|
||||
/// @brief create a result
|
||||
TRI_fulltext_result_t* TRI_CreateResultMMFilesFulltextIndex(const uint32_t);
|
||||
|
||||
/// @brief destroy a result
|
||||
void TRI_DestroyResultMMFilesFulltextIndex(TRI_fulltext_result_t*);
|
||||
|
||||
/// @brief free a result
|
||||
void TRI_FreeResultMMFilesFulltextIndex(TRI_fulltext_result_t*);
|
||||
|
||||
#endif
|
|
@ -398,7 +398,10 @@ void DatabaseFeature::stop() {
|
|||
TRI_ASSERT(vocbase->type() == TRI_VOCBASE_TYPE_NORMAL);
|
||||
|
||||
vocbase->processCollections([](LogicalCollection* collection) {
|
||||
collection->close();
|
||||
// no one else must modify the collection's status while we are in here
|
||||
collection->executeWhileStatusWriteLocked([collection]() {
|
||||
collection->close();
|
||||
});
|
||||
}, true);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1233,7 +1233,7 @@ int RocksDBCollection::saveIndex(transaction::Methods* trx,
|
|||
return res.errorNumber();
|
||||
}
|
||||
|
||||
std::shared_ptr<VPackBuilder> builder = idx->toVelocyPack(false);
|
||||
std::shared_ptr<VPackBuilder> builder = idx->toVelocyPack(false, true);
|
||||
auto vocbase = _logicalCollection->vocbase();
|
||||
auto collectionId = _logicalCollection->cid();
|
||||
VPackSlice data = builder->slice();
|
||||
|
|
|
@ -541,25 +541,6 @@ Result RocksDBGeoIndex::removeInternal(transaction::Methods* trx,
|
|||
return IndexResult(TRI_ERROR_NO_ERROR, this);
|
||||
}
|
||||
|
||||
int RocksDBGeoIndex::unload() {
|
||||
// create a new, empty index
|
||||
/*auto empty = GeoIndex_new(_objectId, 0, 0);
|
||||
|
||||
if (empty == nullptr) {
|
||||
THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
|
||||
// free the old one
|
||||
if (_geoIndex != nullptr) {
|
||||
GeoIndex_free(_geoIndex);
|
||||
}
|
||||
|
||||
// and assign it
|
||||
_geoIndex = empty;*/
|
||||
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
/// @brief looks up all points within a given radius
|
||||
GeoCoordinates* RocksDBGeoIndex::withinQuery(transaction::Methods* trx,
|
||||
double lat, double lon,
|
||||
|
|
|
@ -136,7 +136,7 @@ class RocksDBGeoIndex final : public RocksDBIndex {
|
|||
|
||||
bool matchesDefinition(VPackSlice const& info) const override;
|
||||
|
||||
int unload() override;
|
||||
void unload() override {}
|
||||
|
||||
/// @brief looks up all points within a given radius
|
||||
arangodb::rocksdbengine::GeoCoordinates* withinQuery(transaction::Methods*,
|
||||
|
|
|
@ -118,21 +118,19 @@ void RocksDBIndex::toVelocyPackFigures(VPackBuilder& builder) const {
|
|||
}
|
||||
}
|
||||
|
||||
int RocksDBIndex::load() {
|
||||
void RocksDBIndex::load() {
|
||||
if (_useCache) {
|
||||
createCache();
|
||||
TRI_ASSERT(_cachePresent);
|
||||
}
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
int RocksDBIndex::unload() {
|
||||
void RocksDBIndex::unload() {
|
||||
if (useCache()) {
|
||||
// LOG_TOPIC(ERR, Logger::FIXME) << "unload cache";
|
||||
disableCache();
|
||||
TRI_ASSERT(!_cachePresent);
|
||||
}
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
/// @brief return a VelocyPack representation of the index
|
||||
|
@ -277,14 +275,13 @@ size_t RocksDBIndex::memory() const {
|
|||
}
|
||||
|
||||
/// compact the index, should reduce read amplification
|
||||
int RocksDBIndex::cleanup() {
|
||||
void RocksDBIndex::cleanup() {
|
||||
rocksdb::TransactionDB* db = rocksutils::globalRocksDB();
|
||||
rocksdb::CompactRangeOptions opts;
|
||||
RocksDBKeyBounds bounds = this->getBounds();
|
||||
TRI_ASSERT(_cf == bounds.columnFamily());
|
||||
rocksdb::Slice b = bounds.start(), e = bounds.end();
|
||||
db->CompactRange(opts, _cf, &b, &e);
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
||||
Result RocksDBIndex::postprocessRemove(transaction::Methods* trx,
|
||||
|
|
|
@ -65,25 +65,25 @@ class RocksDBIndex : public Index {
|
|||
public:
|
||||
~RocksDBIndex();
|
||||
void toVelocyPackFigures(VPackBuilder& builder) const override;
|
||||
|
||||
/// @brief return a VelocyPack representation of the index
|
||||
void toVelocyPack(velocypack::Builder& builder, bool withFigures,
|
||||
bool forPersistence) const override;
|
||||
|
||||
uint64_t objectId() const { return _objectId; }
|
||||
|
||||
bool isPersistent() const override final { return true; }
|
||||
|
||||
/// @brief return a VelocyPack representation of the index
|
||||
void toVelocyPack(velocypack::Builder& builder, bool withFigures,
|
||||
bool forPersistence) const override;
|
||||
|
||||
int drop() override;
|
||||
|
||||
int load() override;
|
||||
int unload() override;
|
||||
void load() override;
|
||||
void unload() override;
|
||||
|
||||
virtual void truncate(transaction::Methods*);
|
||||
|
||||
size_t memory() const override;
|
||||
|
||||
int cleanup() override;
|
||||
void cleanup();
|
||||
|
||||
/// @brief provides a size hint for the index
|
||||
int sizeHint(transaction::Methods* /*trx*/, size_t /*size*/) override final {
|
||||
|
|
|
@ -118,7 +118,7 @@ class Context {
|
|||
virtual void registerTransaction(TransactionState*) = 0;
|
||||
|
||||
/// @brief unregister the transaction
|
||||
virtual void unregisterTransaction() = 0;
|
||||
virtual void unregisterTransaction() noexcept = 0;
|
||||
|
||||
protected:
|
||||
|
||||
|
|
|
@ -530,9 +530,15 @@ TRI_vocbase_col_status_e LogicalCollection::getStatusLocked() {
|
|||
return _status;
|
||||
}
|
||||
|
||||
void LogicalCollection::executeWhileStatusWriteLocked(
|
||||
std::function<void()> const& callback) {
|
||||
WRITE_LOCKER_EVENTUAL(locker, _lock);
|
||||
callback();
|
||||
}
|
||||
|
||||
void LogicalCollection::executeWhileStatusLocked(
|
||||
std::function<void()> const& callback) {
|
||||
READ_LOCKER(readLocker, _lock);
|
||||
READ_LOCKER(locker, _lock);
|
||||
callback();
|
||||
}
|
||||
|
||||
|
|
|
@ -158,6 +158,7 @@ class LogicalCollection {
|
|||
TRI_vocbase_col_status_e status() const;
|
||||
TRI_vocbase_col_status_e getStatusLocked();
|
||||
|
||||
void executeWhileStatusWriteLocked(std::function<void()> const& callback);
|
||||
void executeWhileStatusLocked(std::function<void()> const& callback);
|
||||
bool tryExecuteWhileStatusLocked(std::function<void()> const& callback);
|
||||
|
||||
|
|
|
@ -71,7 +71,7 @@ function dumpAuthentication (options) {
|
|||
'server.authentication': 'true'
|
||||
};
|
||||
|
||||
let instanceInfo = pu.startInstance('tcp', options, auth1, 'dump_authentication');
|
||||
let instanceInfo = pu.startInstance('tcp', options, { 'server.authentication': 'true', 'server.jwt-secret': 'haxxmann' }, 'dump_authentication');
|
||||
|
||||
if (instanceInfo === false) {
|
||||
return {
|
||||
|
|
|
@ -720,9 +720,6 @@ function fulltextQuerySuite () {
|
|||
assertEqual(0, collection.fulltext("text", "tomatoes", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "others", idx).toArray().length);
|
||||
|
||||
require("console").log("waiting for compaction");
|
||||
internal.wait(7);
|
||||
|
||||
assertEqual(0, collection.fulltext("text", "bananas", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "some", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "tomatoes", idx).toArray().length);
|
||||
|
@ -735,9 +732,6 @@ function fulltextQuerySuite () {
|
|||
|
||||
collection.remove(d2);
|
||||
|
||||
require("console").log("waiting for compaction");
|
||||
internal.wait(7);
|
||||
|
||||
assertEqual(0, collection.fulltext("text", "several", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "oranges,hate", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "people", idx).toArray().length);
|
||||
|
@ -746,14 +740,8 @@ function fulltextQuerySuite () {
|
|||
|
||||
collection.remove(d3);
|
||||
|
||||
require("console").log("waiting for compaction");
|
||||
internal.wait(7);
|
||||
|
||||
assertEqual(0, collection.fulltext("text", "unrelated,text,index", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "index", idx).toArray().length);
|
||||
|
||||
require("console").log("waiting for compaction");
|
||||
internal.wait(7);
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -1015,9 +1003,6 @@ function fulltextQuerySuite () {
|
|||
collection.remove(docs[i]);
|
||||
}
|
||||
|
||||
require("console").log("waiting for compaction");
|
||||
internal.wait(7);
|
||||
|
||||
assertEqual(750, collection.fulltext("text", "document", idx).toArray().length);
|
||||
assertEqual(750, collection.fulltext("text", "text", idx).toArray().length);
|
||||
assertEqual(250, collection.fulltext("text", "this", idx).toArray().length);
|
||||
|
@ -1501,272 +1486,30 @@ function fulltextQuerySuite () {
|
|||
assertEqual(1, collection.fulltext("text", "møguleikar", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "síðu,rættar,ritstjórni", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "prefix:læt", idx).toArray().length);
|
||||
}
|
||||
},
|
||||
|
||||
testQueryingAfterDeletion: function () {
|
||||
for (let i = 0; i < 4000; ++i) {
|
||||
collection.save({ _key: "test" + i, text: "test" + i });
|
||||
}
|
||||
|
||||
for (let i = 2436; i < 3473; ++i) {
|
||||
collection.remove("test" + i);
|
||||
}
|
||||
|
||||
for (let i = 0; i < 4000; ++i) {
|
||||
assertEqual((i >= 2436 && i < 3473) ? 0 : 1, collection.fulltext("text", "test" + i, idx).toArray().length);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief fulltext queries
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
function fulltextQuerySubstringSuite () {
|
||||
'use strict';
|
||||
var cn = "UnitTestsFulltext";
|
||||
var collection = null;
|
||||
var idx = null;
|
||||
|
||||
return {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief set up
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
setUp : function () {
|
||||
internal.db._drop(cn);
|
||||
collection = internal.db._create(cn);
|
||||
|
||||
idx = collection.ensureFulltextIndex("text").id;
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief tear down
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
tearDown : function () {
|
||||
internal.db._drop(cn);
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief simple queries
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testSimple: function () {
|
||||
var texts = [
|
||||
"some rubbish text",
|
||||
"More rubbish test data. The index should be able to handle all this.",
|
||||
"even MORE rubbish. Nevertheless this should be handled well, too."
|
||||
];
|
||||
|
||||
for (var i = 0; i < texts.length; ++i) {
|
||||
collection.save({ text: texts[i] });
|
||||
}
|
||||
|
||||
assertEqual(1, collection.fulltext("text", "some", idx).toArray().length);
|
||||
assertEqual(3, collection.fulltext("text", "rubbish", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "text", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "More", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "test", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "data", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "The", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "index", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "should", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "be", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "able", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "to", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "handle", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "all", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "this", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "even", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "Nevertheless", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "handled", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "well", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "too", idx).toArray().length);
|
||||
|
||||
assertEqual(0, collection.fulltext("text", "not", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "foobar", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "it", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "BANANA", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "noncontained", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "notpresent", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "Invisible", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "unAvailaBLE", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "Neverthelessy", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "dindex", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "grubbish", idx).toArray().length);
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief substring queries
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testSubstrings: function () {
|
||||
var texts = [
|
||||
"Ego sum fidus. Canis sum.",
|
||||
"Ibi est Aurelia amica. Aurelia est puelle XI annos nata. Filia est.",
|
||||
"Claudia mater est.",
|
||||
"Anna est ancilla. Liberta est",
|
||||
"Flavus Germanus est servus. Coquus est.",
|
||||
"Ibi Quintus amicus est. Quintus est X annos natus.",
|
||||
"Gaius est frater magnus. Est XVIII annos natus et Bonnae miles.",
|
||||
"Aurelius pater est. Est mercator."
|
||||
];
|
||||
|
||||
for (var i = 0; i < texts.length; ++i) {
|
||||
collection.save({ text: texts[i] });
|
||||
}
|
||||
|
||||
assertEqual(1, collection.fulltext("text", "substring:fidus", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:idus", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:idu", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:canis", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:cani", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:can", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:anis", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:ilia,substring:aurel", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "substring:ibi,substring:mic", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:ibi,substring:micus", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:ibi,substring:amicus", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:ibi,substring:mica", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:ibi,substring:amica", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:mercator,substring:aurel", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:mercator,substring:aurel,substring:pat", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:merca,substring:aurelius,substring:pater", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:cato,substring:elius,substring:ater", idx).toArray().length);
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief long substrings
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testLongSubstrings: function () {
|
||||
var texts = [
|
||||
"Donaudampfschifffahrtskapitaensmuetzentraegervereinsvorstandsvorsitzenderehegattinsfreundinnenbesucheranlassversammlungsortausschilderungsherstellungsfabrikationsanlagenbetreiberliebhaberliebhaber",
|
||||
"Donaudampfschifffahrtskapitaensmuetzentraegervereinsvorstandsvorsitzenderehegattin",
|
||||
"autotuerendellenentfernungsfirmenmitarbeiterverguetungsbewerter",
|
||||
"Dampfmaschinenfahrzeugsinspektionsverwaltungsstellenmitarbeiter",
|
||||
"Dampfmaschinenfahrzeugsinspektionsverwaltungsstellenmitarbeiterinsignifikant"
|
||||
];
|
||||
|
||||
for (var i = 0; i < texts.length; ++i) {
|
||||
collection.save({ text: texts[i] });
|
||||
}
|
||||
|
||||
assertEqual(2, collection.fulltext("text", "substring:donau", idx).toArray().length);
|
||||
assertEqual(4, collection.fulltext("text", "substring:fahr", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "substring:ver", idx).toArray().length); // significance is only 40 chars
|
||||
assertEqual(1, collection.fulltext("text", "substring:end", idx).toArray().length); // significance is only 40 chars
|
||||
assertEqual(3, collection.fulltext("text", "substring:ent", idx).toArray().length);
|
||||
assertEqual(4, collection.fulltext("text", "substring:damp", idx).toArray().length);
|
||||
assertEqual(4, collection.fulltext("text", "substring:dampf", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "substring:dampfma", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "substring:DONAUDAMPFSCHIFF", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "substring:DONAUDAMPFSCHIFFFAHRTSKAPITAENSMUETZE", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "substring:kapitaen", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "substring:kapitaensmuetze", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "substring:inspektion", idx).toArray().length);
|
||||
|
||||
assertEqual(0, collection.fulltext("text", "substring:ehegattin", idx).toArray().length); // significance!
|
||||
assertEqual(0, collection.fulltext("text", "substring:traegerverein", idx).toArray().length); // significance!
|
||||
assertEqual(0, collection.fulltext("text", "substring:taegerverein", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "substring:hafer", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "substring:apfel", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "substring:glasur", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "substring:somethingisreallywrongwiththislongwordsyouknowbetternotputthemintheindexyouneverknowwhathappensiftheresenoughmemoryforalltheindividualcharactersinthemletssee", idx).toArray().length);
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief substrings vs. prefixes
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testSubstringsVsPrefixes: function () {
|
||||
var texts = [
|
||||
"bing",
|
||||
"bingo",
|
||||
"abing",
|
||||
"ingo"
|
||||
];
|
||||
|
||||
for (var i = 0; i < texts.length; ++i) {
|
||||
collection.save({ text: texts[i] });
|
||||
}
|
||||
|
||||
assertEqual(1, collection.fulltext("text", "prefix:bingo", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "prefix:bing", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "prefix:bin", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "prefix:bi", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "prefix:b", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "prefix:abing", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "prefix:abin", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "prefix:abi", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "prefix:ab", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "prefix:a", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "prefix:ingo", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "prefix:ing", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "prefix:in", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "prefix:i", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "prefix:binga", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "prefix:inga", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "prefix:abingo", idx).toArray().length);
|
||||
|
||||
assertEqual(1, collection.fulltext("text", "substring:abing", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:bingo", idx).toArray().length);
|
||||
assertEqual(3, collection.fulltext("text", "substring:bing", idx).toArray().length);
|
||||
assertEqual(3, collection.fulltext("text", "substring:bin", idx).toArray().length);
|
||||
assertEqual(3, collection.fulltext("text", "substring:bi", idx).toArray().length);
|
||||
assertEqual(3, collection.fulltext("text", "substring:b", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "substring:go", idx).toArray().length);
|
||||
assertEqual(2, collection.fulltext("text", "substring:ingo", idx).toArray().length);
|
||||
assertEqual(4, collection.fulltext("text", "substring:ing", idx).toArray().length);
|
||||
assertEqual(4, collection.fulltext("text", "substring:in", idx).toArray().length);
|
||||
assertEqual(4, collection.fulltext("text", "substring:i", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:a", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "substring:binga", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "substring:abingo", idx).toArray().length);
|
||||
|
||||
assertEqual(1, collection.fulltext("text", "complete:bing", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "complete:bingo", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "complete:abing", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "complete:ingo", idx).toArray().length);
|
||||
assertEqual(0, collection.fulltext("text", "complete:abingo", idx).toArray().length);
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief substring queries & everything else combined
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testMultiMatching: function () {
|
||||
var texts = [
|
||||
"Ego sum fidus. Canis sum.",
|
||||
"Ibi est Aurelia amica. Aurelia est puelle XI annos nata. Filia est.",
|
||||
"Claudia mater est.",
|
||||
"Anna est ancilla. Liberta est",
|
||||
"Flavus Germanus est servus. Coquus est.",
|
||||
"Ibi Quintus amicus est. Quintus est X annos natus.",
|
||||
"Gaius est frater magnus. Est XVIII annos natus et Bonnae miles.",
|
||||
"Aurelius pater est. Est mercator."
|
||||
];
|
||||
|
||||
for (var i = 0; i < texts.length; ++i) {
|
||||
collection.save({ text: texts[i] });
|
||||
}
|
||||
|
||||
assertEqual(1, collection.fulltext("text", "substring:fidus,ego,sum,prefix:canis", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "claudia,substring:ater,est", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "Quintus,substring:icus,prefix:anno", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "aurelius,pater,est,substring:mercator", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "aurelius,pater,est,substring:tor", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "prefix:aur,prefix:merc,substring:merc", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "substring:puelle,substring:annos,substring:nata,substring:filia", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "puelle,prefix:annos,prefix:nata,substring:filia", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "prefix:puelle,prefix:annos,prefix:nata,substring:filia", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "puelle,annos,nata,substring:filia", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "puelle,substring:annos,nata,substring:filia", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "puelle,substring:annos,nata,prefix:filia", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "puelle,substring:nos,nata,prefix:filia", idx).toArray().length);
|
||||
assertEqual(1, collection.fulltext("text", "puelle,substring:nos,nata,substring:ili", idx).toArray().length);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
*/
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief executes the test suites
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
jsunity.run(fulltextCreateSuite);
|
||||
jsunity.run(fulltextQuerySuite);
|
||||
/// jsunity.run(fulltextQuerySubstringSuite);
|
||||
|
||||
return jsunity.done();
|
||||
|
||||
|
|
|
@ -62,9 +62,6 @@ function createStatisticsCollection (name) {
|
|||
}
|
||||
|
||||
collection = db._collection(name);
|
||||
}
|
||||
|
||||
if (collection !== null) {
|
||||
collection.ensureIndex({ type: 'skiplist', fields: [ 'time' ] });
|
||||
}
|
||||
|
||||
|
@ -414,11 +411,9 @@ exports.STATISTICS_INTERVAL = 10;
|
|||
exports.STATISTICS_HISTORY_INTERVAL = 15 * 60;
|
||||
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
// / @brief createCollections
|
||||
// /
|
||||
// / This cannot be called during version check, because the collections are
|
||||
// / system wide and the version checks might not yet know, that it is running
|
||||
// / on a cluster coordinate.
|
||||
// / on a cluster coordinator.
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
exports.createStatisticsCollections = function () {
|
||||
|
|
Loading…
Reference in New Issue