mirror of https://gitee.com/bigwinds/arangodb
cleanup of fulltext indexes, resizing etc.
This commit is contained in:
parent
7261465139
commit
cbaa57a1bd
|
@ -75,19 +75,30 @@ extern ZCOD zcdh;
|
|||
/// @brief maximum number of Unicode characters for an indexed word
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define MAX_WORD_LENGTH (40)
|
||||
#define MAX_WORD_LENGTH (40)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief gap between two words in a temporary search buffer
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define SPACING (10)
|
||||
#define SPACING (10)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief maximum tolerable occupancy of the index (e.g. 60 %)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define HEALTH_THRESHOLD (75)
|
||||
#define HEALTH_THRESHOLD (75)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief index extra growth factor
|
||||
/// if 1.0, the index will be resized to the values originally suggested. As
|
||||
/// resizing is expensive, one might want to decrease the overall number of
|
||||
/// resizings. This can be done by setting this number to a value bigger than
|
||||
/// 1.0
|
||||
/// TODO: find a good default value for this
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define EXTRA_GROWTH_FACTOR (1.0)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief the actual index struct used
|
||||
|
@ -104,9 +115,10 @@ typedef struct {
|
|||
TUBER* _index1;
|
||||
TUBER* _index2;
|
||||
TUBER* _index3;
|
||||
uint64_t _ix3KKey;
|
||||
|
||||
int64_t _maxDocuments;
|
||||
int64_t _numDocuments;
|
||||
uint64_t _maxDocuments;
|
||||
uint64_t _numDocuments;
|
||||
|
||||
FTS_texts_t* (*getTexts)(FTS_document_id_t, void*);
|
||||
void (*freeWordlist)(FTS_texts_t*);
|
||||
|
@ -168,6 +180,20 @@ static uint64_t GetUnicode (uint8_t** ptr) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief translate zstr error code into TRI_error code
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static int TranslateZStrErrorCode (int zstrErrorCode) {
|
||||
assert(zstrErrorCode != 0);
|
||||
|
||||
if (zstrErrorCode == 2) {
|
||||
return TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE;
|
||||
}
|
||||
|
||||
return TRI_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief add a document to the index
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -195,6 +221,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
uint64_t bkey = 0;
|
||||
uint64_t docb, dock;
|
||||
int res;
|
||||
int res2;
|
||||
|
||||
ix = (FTS_real_index*) ftx;
|
||||
|
||||
|
@ -202,6 +229,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
handle = ix->_firstFree;
|
||||
if (handle == 0) {
|
||||
// no more document handles free
|
||||
LOG_ERROR("fail on %d", __LINE__);
|
||||
return TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE;
|
||||
}
|
||||
|
||||
|
@ -227,7 +255,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
|
||||
// check for out of memory
|
||||
if (zstrwl == NULL || zstr2a == NULL || zstr2b == NULL || x3zstr == NULL || x3zstrb == NULL) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
|
||||
|
@ -243,7 +271,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
unicode = GetUnicode(&utf);
|
||||
while (unicode != 0) {
|
||||
if (ZStrEnc(zstrwl, &zcutf, unicode) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
|
||||
|
@ -256,13 +284,13 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
|
||||
// terminate the word and insert into STEX
|
||||
if (ZStrEnc(zstrwl, &zcutf, 0) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
|
||||
ZStrNormalize(zstrwl);
|
||||
if (ZStrSTAppend(stex, zstrwl) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
|
@ -279,7 +307,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
for (wdx = 0; wdx < nowords; wdx++) {
|
||||
// get it out as a word
|
||||
if (ZStrInsert(zstrwl, wpt, 2) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
|
||||
|
@ -307,7 +335,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
// get the Z-string for the index-2 entry before this letter
|
||||
i = ZStrTuberRead(ix->_index2, kkey[j], zstr2a);
|
||||
if (i == 1) {
|
||||
res = TRI_ERROR_INTERNAL; // TODO: check with Richard
|
||||
res = TRI_ERROR_INTERNAL;
|
||||
goto oom;
|
||||
}
|
||||
|
||||
|
@ -335,6 +363,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
if (newlet != tran) {
|
||||
// if not there, create a new index-2 entry for it
|
||||
bkey = ZStrTuberIns(ix->_index2, kkey[j], tran);
|
||||
// TODO: check bkey for INSFAIL
|
||||
kkey[j + 1] = ZStrTuberK(ix->_index2, kkey[j], tran, bkey);
|
||||
// update old index-2 entry to insert new letter
|
||||
ZStrCxClear(&zcdelt, &ctx2a);
|
||||
|
@ -344,14 +373,14 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
ZStrClear(zstr2b);
|
||||
x64 = ZStrBitsOut(zstr2a, 1);
|
||||
if (ZStrBitsIn(x64, 1, zstr2b) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
if (x64 == 1) {
|
||||
// copy over the B-key into index 3
|
||||
docb = ZStrDec(zstr2a, &zcbky);
|
||||
if (ZStrEnc(zstr2b, &zcbky, docb) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
|
@ -364,27 +393,27 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
break;
|
||||
}
|
||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
x64 = ZStrDec(zstr2a, &zcbky);
|
||||
if (ZStrEnc(zstr2b, &zcbky, x64) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
|
||||
if (ZStrEnc(zstr2b, &zcbky, bkey) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
if (newlet == oldlet) {
|
||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
|
@ -392,24 +421,25 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
while (newlet != oldlet) {
|
||||
oldlet = newlet;
|
||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
x64 = ZStrDec(zstr2a, &zcbky);
|
||||
if (ZStrEnc(zstr2b, &zcbky, x64) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
newlet = ZStrCxDec(zstr2a, &zcdelt, &ctx2a);
|
||||
}
|
||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
ZStrNormalize(zstr2b);
|
||||
if (ZStrTuberUpdate(ix->_index2, kkey[j], zstr2b) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res2 = ZStrTuberUpdate(ix->_index2, kkey[j], zstr2b);
|
||||
if (res2 != 0) {
|
||||
res = TranslateZStrErrorCode(res2);
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
|
@ -424,7 +454,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
// so read the zstr from index2
|
||||
i = ZStrTuberRead(ix->_index2, kkey[j], zstr2a);
|
||||
if (i == 1) {
|
||||
res = TRI_ERROR_INTERNAL; // TODO: check with Richard
|
||||
res = TRI_ERROR_INTERNAL;
|
||||
goto oom;
|
||||
}
|
||||
// is there already an index-3 entry available?
|
||||
|
@ -435,6 +465,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
}
|
||||
else {
|
||||
docb = ZStrTuberIns(ix->_index3, kkey[j], 0);
|
||||
// TODO: check docb
|
||||
// put it into index 2
|
||||
ZStrCxClear(&zcdelt, &ctx2a);
|
||||
ZStrCxClear(&zcdelt, &ctx2b);
|
||||
|
@ -443,12 +474,12 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
ZStrClear(zstr2b);
|
||||
x64 = ZStrBitsOut(zstr2a, 1);
|
||||
if (ZStrBitsIn(1, 1, zstr2b) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
|
||||
if (ZStrEnc(zstr2b, &zcbky, docb) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
|
||||
|
@ -461,18 +492,19 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
}
|
||||
|
||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
x64 = ZStrDec(zstr2a, &zcbky);
|
||||
if (ZStrEnc(zstr2b,&zcbky, x64) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
ZStrNormalize(zstr2b);
|
||||
if (ZStrTuberUpdate(ix->_index2, kkey[j], zstr2b) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res2 = ZStrTuberUpdate(ix->_index2, kkey[j], zstr2b);
|
||||
if (res2 != 0) {
|
||||
res = TranslateZStrErrorCode(res2);
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
|
@ -481,7 +513,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
i = ZStrTuberRead(ix->_index3, dock, x3zstr);
|
||||
ZStrClear(x3zstrb);
|
||||
if (i == 1) {
|
||||
res = TRI_ERROR_INTERNAL; // TODO: check with Richard
|
||||
res = TRI_ERROR_INTERNAL;
|
||||
goto oom;
|
||||
}
|
||||
|
||||
|
@ -496,37 +528,38 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
}
|
||||
|
||||
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, newhan) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, handle) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
if (newhan == oldhan) {
|
||||
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, handle) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, newhan) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
while (newhan != oldhan) {
|
||||
oldhan = newhan;
|
||||
newhan = ZStrCxDec(x3zstr, &zcdoc, &x3ctx);
|
||||
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, newhan) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
}
|
||||
ZStrNormalize(x3zstrb);
|
||||
if (ZStrTuberUpdate(ix->_index3, dock, x3zstrb) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res2 = ZStrTuberUpdate(ix->_index3, dock, x3zstrb);
|
||||
if (res2 != 0) {
|
||||
res = TranslateZStrErrorCode(res2);
|
||||
goto oom;
|
||||
}
|
||||
|
||||
|
@ -545,7 +578,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
tran = ZStrXlate(&zcutf, ixlet[j2]);
|
||||
i = ZStrTuberRead(ix->_index1, kkey1[j2 + 1], zstr2a);
|
||||
if (i == 1) {
|
||||
res = TRI_ERROR_INTERNAL; // TODO: check with Richard
|
||||
res = TRI_ERROR_INTERNAL;
|
||||
goto oom;
|
||||
}
|
||||
// look to see if the letter is there
|
||||
|
@ -565,6 +598,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
if (newlet != tran) {
|
||||
// if not there, create a new index-1 entry for it
|
||||
bkey = ZStrTuberIns(ix->_index1, kkey1[j2 + 1], tran);
|
||||
// TODO: check bkey
|
||||
kkey1[j2] = ZStrTuberK(ix->_index1, kkey1[j2 + 1], tran, bkey);
|
||||
// update old index-1 entry to insert new letter
|
||||
ZStrCxClear(&zcdelt, &ctx2a);
|
||||
|
@ -580,26 +614,26 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
break;
|
||||
}
|
||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
x64 = ZStrDec(zstr2a, &zcbky);
|
||||
if (ZStrEnc(zstr2b, &zcbky, x64) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
if (ZStrEnc(zstr2b, &zcbky, bkey) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
if (newlet == oldlet) {
|
||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
|
@ -607,24 +641,25 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
|||
while (newlet != oldlet) {
|
||||
oldlet = newlet;
|
||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
x64 = ZStrDec(zstr2a, &zcbky);
|
||||
if (ZStrEnc(zstr2b, &zcbky, x64) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
newlet = ZStrCxDec(zstr2a, &zcdelt, &ctx2a);
|
||||
}
|
||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
ZStrNormalize(zstr2b);
|
||||
if (ZStrTuberUpdate(ix->_index1, kkey1[j2 + 1], zstr2b) != 0) {
|
||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
||||
res2 = ZStrTuberUpdate(ix->_index1, kkey1[j2 + 1], zstr2b);
|
||||
if (res2 != 0) {
|
||||
res = TranslateZStrErrorCode(res2);
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
|
@ -685,11 +720,15 @@ static int RealDeleteDocument (FTS_index_t* ftx, FTS_document_id_t docid) {
|
|||
}
|
||||
|
||||
if (i > ix->_lastSlot) {
|
||||
LOG_ERROR("fail on %d", __LINE__);
|
||||
return TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE;
|
||||
}
|
||||
|
||||
ix->_handlesFree[i] = 1;
|
||||
ix->_numDocuments--;
|
||||
if (ix->_numDocuments > 0) {
|
||||
// should never underflow
|
||||
ix->_numDocuments--;
|
||||
}
|
||||
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
@ -946,11 +985,17 @@ static void Ix2Recurs (STEX* dochan, FTS_real_index* ix, uint64_t kk2) {
|
|||
/// @brief index recursion, prefix matching
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static void Ix1Recurs (STEX* dochan, FTS_real_index* ix, uint64_t kk1, uint64_t* wd) {
|
||||
static int Ix1Recurs (STEX* dochan,
|
||||
FTS_real_index* ix,
|
||||
uint64_t kk1,
|
||||
uint64_t* wd) {
|
||||
ZSTR* zstr;
|
||||
CTX ctx;
|
||||
uint64_t newlet;
|
||||
uint64_t kk2;
|
||||
int res;
|
||||
|
||||
res = TRI_ERROR_NO_ERROR;
|
||||
|
||||
kk2 = FindKKey2(ix,wd);
|
||||
|
||||
|
@ -961,13 +1006,11 @@ static void Ix1Recurs (STEX* dochan, FTS_real_index* ix, uint64_t kk1, uint64_t*
|
|||
// index 1 entry for this prefix
|
||||
zstr = ZStrCons(10);
|
||||
if (zstr == NULL) {
|
||||
// TODO: out of memory
|
||||
return TRI_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
if (ZStrTuberRead(ix->_index1, kk1, zstr) == 1) {
|
||||
// TODO: make this return an error instead
|
||||
printf("recursion failed to read kk1\n");
|
||||
exit(1);
|
||||
return TRI_ERROR_INTERNAL;
|
||||
}
|
||||
|
||||
ZStrCxClear(&zcdelt, &ctx);
|
||||
|
@ -986,10 +1029,16 @@ static void Ix1Recurs (STEX* dochan, FTS_real_index* ix, uint64_t kk1, uint64_t*
|
|||
bkey = ZStrDec(zstr, &zcbky);
|
||||
newkk1 = ZStrTuberK(ix->_index1, kk1, newlet, bkey);
|
||||
*(wd - 1) = newlet;
|
||||
Ix1Recurs(dochan, ix, newkk1, wd - 1);
|
||||
|
||||
res = Ix1Recurs(dochan, ix, newkk1, wd - 1);
|
||||
if (res != TRI_ERROR_NO_ERROR) {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
ZStrDest(zstr);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -1072,15 +1121,9 @@ int FTS_HealthIndex (FTS_index_t* ftx, uint64_t* stats) {
|
|||
ix = (FTS_real_index*) ftx;
|
||||
|
||||
health = (ix->_numDocuments * 100) / ix->_maxDocuments;
|
||||
stats[0] = (health * (ix->_numDocuments + 5)) / 50;
|
||||
|
||||
if (stats[0] < 5) {
|
||||
stats[0] = 5;
|
||||
}
|
||||
|
||||
if (ix->_options == FTS_INDEX_SUBSTRINGS) {
|
||||
ZStrTuberStats(ix->_index1, st);
|
||||
// LOG_TRACE("index 1 health %d size %d", (int) st[0], (int) st[1]);
|
||||
stats[1] = st[1];
|
||||
if (health < st[0]) {
|
||||
health = st[0];
|
||||
|
@ -1091,19 +1134,30 @@ int FTS_HealthIndex (FTS_index_t* ftx, uint64_t* stats) {
|
|||
}
|
||||
|
||||
ZStrTuberStats(ix->_index2, st);
|
||||
// LOG_TRACE("index 2 health %d size %d", (int) st[0], (int) st[1]);
|
||||
stats[2] = st[1];
|
||||
if (health < st[0]) {
|
||||
health = st[0];
|
||||
}
|
||||
|
||||
ZStrTuberStats(ix->_index3, st);
|
||||
// LOG_TRACE("index 3 health %d size %d", (int) st[0], (int) st[1]);
|
||||
stats[3] = st[1];
|
||||
if (health < st[0]) {
|
||||
health = st[0];
|
||||
}
|
||||
|
||||
stats[0] = (health * (ix->_numDocuments + 5)) / 50;
|
||||
if (stats[0] < (ix->_numDocuments + 5)) {
|
||||
stats[0] = (ix->_numDocuments + 5);
|
||||
}
|
||||
|
||||
if (EXTRA_GROWTH_FACTOR > 1.0) {
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
stats[i] = (uint64_t) ((double) stats[i] * (double) EXTRA_GROWTH_FACTOR);
|
||||
}
|
||||
}
|
||||
|
||||
return (int) health;
|
||||
}
|
||||
|
||||
|
@ -1115,6 +1169,7 @@ int FTS_HealthIndex (FTS_index_t* ftx, uint64_t* stats) {
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FTS_index_t* FTS_CloneIndex (FTS_index_t* ftx,
|
||||
FTS_document_id_t excludeDocument,
|
||||
uint64_t sizes[4]) {
|
||||
FTS_real_index* old;
|
||||
FTS_index_t* clone;
|
||||
|
@ -1126,15 +1181,24 @@ FTS_index_t* FTS_CloneIndex (FTS_index_t* ftx,
|
|||
if (clone != NULL) {
|
||||
// copy documents
|
||||
FTS_document_id_t i;
|
||||
uint64_t count = 0;
|
||||
|
||||
for (i = 1; i <= old->_lastSlot; i++) {
|
||||
FTS_document_id_t found;
|
||||
int res;
|
||||
|
||||
if (old->_handlesFree[i] == 1) {
|
||||
// document is marked as deleted
|
||||
continue;
|
||||
}
|
||||
|
||||
res = FTS_AddDocument(clone, old->_handles[i]);
|
||||
found = old->_handles[i];
|
||||
if (found == excludeDocument) {
|
||||
// do not insert this document, because the caller will insert it later
|
||||
continue;
|
||||
}
|
||||
|
||||
res = FTS_AddDocument(clone, found);
|
||||
if (res != TRI_ERROR_NO_ERROR && res != TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
||||
// if resize fails, everything's ruined
|
||||
LOG_ERROR("resizing the fulltext index failed with %d, sizes were: %llu %llu %llu %llu",
|
||||
|
@ -1147,7 +1211,11 @@ FTS_index_t* FTS_CloneIndex (FTS_index_t* ftx,
|
|||
FTS_FreeIndex(clone);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
++count;
|
||||
}
|
||||
|
||||
LOG_DEBUG("cloned %llu documents", (unsigned long long) count);
|
||||
}
|
||||
|
||||
return clone;
|
||||
|
@ -1170,11 +1238,11 @@ FTS_index_t* FTS_CreateIndex (void* context,
|
|||
FTS_real_index* ix;
|
||||
int i;
|
||||
|
||||
LOG_INFO("creating fulltext index with sizes %llu %llu %llu %llu",
|
||||
(unsigned long long) sizes[0],
|
||||
(unsigned long long) sizes[1],
|
||||
(unsigned long long) sizes[2],
|
||||
(unsigned long long) sizes[3]);
|
||||
LOG_TRACE("creating fulltext index with sizes %llu %llu %llu %llu",
|
||||
(unsigned long long) sizes[0],
|
||||
(unsigned long long) sizes[1],
|
||||
(unsigned long long) sizes[2],
|
||||
(unsigned long long) sizes[3]);
|
||||
|
||||
ix = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(FTS_real_index), false);
|
||||
if (ix == NULL) {
|
||||
|
@ -1194,10 +1262,11 @@ FTS_index_t* FTS_CreateIndex (void* context,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
ix->_maxDocuments = (int64_t) sizes[0];
|
||||
ix->_maxDocuments = sizes[0];
|
||||
ix->_numDocuments = 0;
|
||||
ix->_context = context;
|
||||
ix->_options = options;
|
||||
ix->_ix3KKey = 0;
|
||||
|
||||
// wordlists retrieval function
|
||||
ix->getTexts = getTexts;
|
||||
|
@ -1335,11 +1404,11 @@ int FTS_AddDocument (FTS_index_t* ftx, FTS_document_id_t docid) {
|
|||
|
||||
health = FTS_HealthIndex(ftx, sizes);
|
||||
if (health > HEALTH_THRESHOLD || res == TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
||||
LOG_INFO("Add document: health exceeds threshold. suggested sizes are: %llu %llu %llu %llu",
|
||||
(unsigned long long) sizes[0],
|
||||
(unsigned long long) sizes[1],
|
||||
(unsigned long long) sizes[2],
|
||||
(unsigned long long) sizes[3]);
|
||||
LOG_TRACE("fulltext index health threshold exceeded. new suggested sizes are: %llu %llu %llu %llu",
|
||||
(unsigned long long) sizes[0],
|
||||
(unsigned long long) sizes[1],
|
||||
(unsigned long long) sizes[2],
|
||||
(unsigned long long) sizes[3]);
|
||||
res = TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE;
|
||||
}
|
||||
|
||||
|
@ -1392,13 +1461,110 @@ int FTS_UpdateDocument (FTS_index_t* ftx, FTS_document_id_t docid) {
|
|||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief current not called. TODO: find out what its intention is
|
||||
/// @brief read index3 and remove handles of unused documents. stop after docs
|
||||
/// deletions. the scan & cleanup is incremental
|
||||
/// the caller must have write-locked the index
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void FTS_BackgroundTask (FTS_index_t* ftx) {
|
||||
/* obtain LOCKMAIN */
|
||||
/* remove deleted handles from index3 not done QQQ */
|
||||
/* release LOCKMAIN */
|
||||
int FTS_BackgroundTask (FTS_index_t * ftx, int docs) {
|
||||
FTS_real_index * ix;
|
||||
int dleft,i;
|
||||
CTX cold, cnew;
|
||||
int cd;
|
||||
uint64_t newterm,oldhan,han;
|
||||
ZSTR *zold, *znew;
|
||||
ix = (FTS_real_index *)ftx;
|
||||
dleft=docs;
|
||||
cd=0;
|
||||
|
||||
znew=ZStrCons(100);
|
||||
if(znew==NULL) return 1;
|
||||
zold=ZStrCons(100);
|
||||
if(zold==NULL)
|
||||
{
|
||||
ZStrDest(znew);
|
||||
return 1;
|
||||
}
|
||||
|
||||
while(dleft>0)
|
||||
{
|
||||
uint64_t numDeletions;
|
||||
|
||||
assert(ix->_ix3KKey < (ix->_index3)->kmax);
|
||||
|
||||
numDeletions = 0;
|
||||
i=ZStrTuberRead(ix->_index3,ix->_ix3KKey,zold);
|
||||
if(i==2)
|
||||
{
|
||||
cd=1;
|
||||
break;
|
||||
}
|
||||
if(i==0)
|
||||
{
|
||||
ZStrCxClear(&zcdoc,&cold);
|
||||
ZStrCxClear(&zcdoc,&cnew);
|
||||
ZStrClear(znew);
|
||||
oldhan=0;
|
||||
newterm=0;
|
||||
while(1)
|
||||
{
|
||||
han=ZStrCxDec(zold,&zcdoc,&cold);
|
||||
if(han==oldhan) {
|
||||
break;
|
||||
}
|
||||
oldhan=han;
|
||||
dleft--;
|
||||
if(ix->_handlesFree[han]==0)
|
||||
{
|
||||
i=ZStrCxEnc(znew,&zcdoc,&cnew,han);
|
||||
if(i!=0) {
|
||||
ix->_ix3KKey = 0;
|
||||
ZStrDest(znew);
|
||||
ZStrDest(zold);
|
||||
return 1;
|
||||
}
|
||||
newterm=han;
|
||||
}
|
||||
else {
|
||||
// something was deleted
|
||||
++numDeletions;
|
||||
}
|
||||
}
|
||||
|
||||
if (numDeletions > 0) {
|
||||
// update existing entry in tuber
|
||||
// but only if there's something to update
|
||||
|
||||
i=ZStrCxEnc(znew,&zcdoc,&cnew,newterm);
|
||||
if(i!=0) {
|
||||
ix->_ix3KKey = 0;
|
||||
ZStrDest(znew);
|
||||
ZStrDest(zold);
|
||||
return 1;
|
||||
}
|
||||
ZStrNormalize(znew);
|
||||
i=ZStrTuberUpdate(ix->_index3,ix->_ix3KKey,znew);
|
||||
}
|
||||
|
||||
if(i!=0) {
|
||||
ix->_ix3KKey = 0;
|
||||
ZStrDest(znew);
|
||||
ZStrDest(zold);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
ix->_ix3KKey++;
|
||||
if(ix->_ix3KKey >= (ix->_index3)->kmax)
|
||||
{
|
||||
ix->_ix3KKey = 0;
|
||||
cd=3; // finished iterating over all document handles
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ZStrDest(znew);
|
||||
ZStrDest(zold);
|
||||
return cd;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -1423,34 +1589,53 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
|||
uint64_t word[2 * (MAX_WORD_LENGTH + SPACING)];
|
||||
uint64_t ndocs = 0;
|
||||
|
||||
zstr2 = ZStrCons(10); /* from index-2 tuber */
|
||||
// initialise
|
||||
dc = NULL;
|
||||
TRI_set_errno(TRI_ERROR_NO_ERROR);
|
||||
|
||||
zstr2 = ZStrCons(10); /* from index-2 tuber */
|
||||
if (zstr2 == NULL) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
zstr3 = ZStrCons(10); /* from index-3 tuber */
|
||||
zstr3 = ZStrCons(10); /* from index-3 tuber */
|
||||
if (zstr3 == NULL) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
ZStrDest(zstr2);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
zstra1 = ZStrCons(10); /* current list of documents */
|
||||
if (zstra1 == NULL) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
ZStrDest(zstr3);
|
||||
ZStrDest(zstr2);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
zstra2 = ZStrCons(10); /* new list of documents */
|
||||
if (zstra2 == NULL) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
ZStrDest(zstra1);
|
||||
ZStrDest(zstr3);
|
||||
ZStrDest(zstr2);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
zstr = ZStrCons(4); /* work zstr from stex */
|
||||
zstr = ZStrCons(4); /* work zstr from stex */
|
||||
if (zstr == NULL) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
ZStrDest(zstra2);
|
||||
ZStrDest(zstra1);
|
||||
ZStrDest(zstr3);
|
||||
ZStrDest(zstr2);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
ix = (FTS_real_index*) ftx;
|
||||
|
||||
/* - for each term in the query */
|
||||
// for each term in the query
|
||||
for (queryterm = 0; queryterm < query->_len; queryterm++) {
|
||||
if (query->_localOptions[queryterm] == FTS_MATCH_SUBSTRING &&
|
||||
ix->_options != FTS_INDEX_SUBSTRINGS) {
|
||||
|
@ -1512,7 +1697,8 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
|||
}
|
||||
if (ix->_handlesFree[newhan] == 0) {
|
||||
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
goto oom;
|
||||
}
|
||||
lasthan = newhan;
|
||||
ndocs++;
|
||||
|
@ -1541,7 +1727,8 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
|||
if (newhan == nhand1) {
|
||||
if (ix->_handlesFree[newhan] == 0) {
|
||||
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
goto oom;
|
||||
}
|
||||
lasthan = newhan;
|
||||
ndocs++;
|
||||
|
@ -1561,9 +1748,12 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, lasthan) != 0) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
goto oom;
|
||||
}
|
||||
|
||||
ZStrNormalize(zstra2);
|
||||
ztemp = zstra1;
|
||||
zstra1 = zstra2;
|
||||
|
@ -1579,7 +1769,8 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
|||
// make STEX to contain new list of handles
|
||||
dochan = ZStrSTCons(2);
|
||||
if (dochan == NULL) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
goto oom;
|
||||
}
|
||||
|
||||
FillWordBuffer(&word[MAX_WORD_LENGTH + SPACING], query->_texts[queryterm]);
|
||||
|
@ -1607,7 +1798,10 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
|||
break;
|
||||
}
|
||||
// call routine to recursively put handles to STEX
|
||||
Ix1Recurs(dochan, ix, kkey, word + MAX_WORD_LENGTH + SPACING);
|
||||
if (Ix1Recurs(dochan, ix, kkey, word + MAX_WORD_LENGTH + SPACING) != TRI_ERROR_NO_ERROR) {
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
goto oom;
|
||||
}
|
||||
}
|
||||
|
||||
ZStrSTSort(dochan);
|
||||
|
@ -1625,13 +1819,17 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
|||
uint64_t newhan;
|
||||
|
||||
if (ZStrInsert(zstr, docpt, 2) != 0) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
ZStrSTDest(dochan);
|
||||
goto oom;
|
||||
}
|
||||
newhan = ZStrDec(zstr, &zcdh);
|
||||
docpt += ZStrExtLen(docpt, 2);
|
||||
if (ix->_handlesFree[newhan] == 0) {
|
||||
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
ZStrSTDest(dochan);
|
||||
goto oom;
|
||||
}
|
||||
lasthan = newhan;
|
||||
ndocs++;
|
||||
|
@ -1652,7 +1850,9 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
|||
|
||||
nhand1 = ZStrCxDec(zstra1, &zcdoc, &ctxa1);
|
||||
if (ZStrInsert(zstr, docpt, 2) != 0) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
ZStrSTDest(dochan);
|
||||
goto oom;
|
||||
}
|
||||
newhan = ZStrDec(zstr, &zcdh);
|
||||
docpt += ZStrExtLen(docpt, 2);
|
||||
|
@ -1667,8 +1867,11 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
|||
if (newhan == nhand1) {
|
||||
if (ix->_handlesFree[newhan] == 0) {
|
||||
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
ZStrSTDest(dochan);
|
||||
goto oom;
|
||||
}
|
||||
|
||||
lasthan = newhan;
|
||||
ndocs++;
|
||||
}
|
||||
|
@ -1676,8 +1879,11 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
|||
break;
|
||||
}
|
||||
if (ZStrInsert(zstr, docpt, 2) != 0) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
ZStrSTDest(dochan);
|
||||
goto oom;
|
||||
}
|
||||
|
||||
newhan = ZStrDec(zstr, &zcdh);
|
||||
docpt += ZStrExtLen(docpt, 2);
|
||||
odocs--;
|
||||
|
@ -1693,7 +1899,9 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
|||
break;
|
||||
}
|
||||
if (ZStrInsert(zstr, docpt, 2) != 0) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
ZStrSTDest(dochan);
|
||||
goto oom;
|
||||
}
|
||||
newhan = ZStrDec(zstr, &zcdh);
|
||||
docpt += ZStrExtLen(docpt, 2);
|
||||
|
@ -1702,7 +1910,8 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
|||
}
|
||||
}
|
||||
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, lasthan) != 0) {
|
||||
// TODO: out of memory
|
||||
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||
goto oom;
|
||||
}
|
||||
ZStrNormalize(zstra2);
|
||||
ztemp = zstra1;
|
||||
|
@ -1737,6 +1946,8 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
oom:
|
||||
|
||||
ZStrDest(zstra1);
|
||||
ZStrDest(zstra2);
|
||||
|
|
|
@ -116,13 +116,14 @@ FTS_texts_t;
|
|||
/// @brief determine the health of the index
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int FTS_HealthIndex (FTS_index_t*, uint64_t sizes[4]);
|
||||
int FTS_HealthIndex (FTS_index_t*, uint64_t[4]);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief clone an existing index
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FTS_index_t* FTS_CloneIndex (FTS_index_t*,
|
||||
FTS_document_id_t,
|
||||
uint64_t[4]);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -159,6 +160,13 @@ int FTS_DeleteDocument (FTS_index_t*, FTS_document_id_t);
|
|||
|
||||
int FTS_UpdateDocument (FTS_index_t*, FTS_document_id_t);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief read index3 and remove handles of unused documents. stop after docs
|
||||
/// deletions. the bc struct can be used to do an incremental scan & cleanup
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int FTS_BackgroundTask (FTS_index_t* ftx, int docs);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief perform a search in the index
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -952,10 +952,11 @@ void ZStrTuberStats(TUBER * t, uint64_t * stats)
|
|||
d2=100-d2;
|
||||
if(d2>d1) d1=d2;
|
||||
d2=((t->wct*8)*(d1+1))/50;
|
||||
/*
|
||||
printf("fuse %d freebit %d freekey %d kmax %d wct %d lenlen %d\n",
|
||||
(int)t->fuses, (int)t->freebit, (int)t->freekey, (int)t->kmax,
|
||||
(int)t->wct, (int)t->lenlen);
|
||||
|
||||
*/
|
||||
if(d2<72*t->fuses)d2=72*t->fuses;
|
||||
stats[0]=d1;
|
||||
stats[1]=d2;
|
||||
|
|
|
@ -61,6 +61,7 @@
|
|||
#include "VocBase/general-cursor.h"
|
||||
#include "VocBase/document-collection.h"
|
||||
#include "VocBase/edge-collection.h"
|
||||
#include "VocBase/fulltext-query.h"
|
||||
#include "VocBase/key-generator.h"
|
||||
#include "VocBase/voc-shaper.h"
|
||||
#include "v8.h"
|
||||
|
@ -614,8 +615,8 @@ static v8::Handle<v8::Value> EnsureFulltextIndex (v8::Arguments const& argv,
|
|||
const bool create) {
|
||||
v8::HandleScope scope;
|
||||
|
||||
if (argv.Length() != 1 && argv.Length() != 2) {
|
||||
return scope.Close(v8::ThrowException(TRI_CreateErrorObject(TRI_ERROR_ILLEGAL_OPTION, "usage: ensureFulltext(<attribute>, <indexSubstrings>)")));
|
||||
if (argv.Length() < 1 || argv.Length() > 3) {
|
||||
return scope.Close(v8::ThrowException(TRI_CreateErrorObject(TRI_ERROR_ILLEGAL_OPTION, "usage: ensureFulltext(<attribute>, <indexSubstrings>, <minWordLength>)")));
|
||||
}
|
||||
|
||||
string attributeName = TRI_ObjectToString(argv[0]);
|
||||
|
@ -624,10 +625,15 @@ static v8::Handle<v8::Value> EnsureFulltextIndex (v8::Arguments const& argv,
|
|||
}
|
||||
|
||||
bool indexSubstrings = false;
|
||||
if (argv.Length() == 2) {
|
||||
if (argv.Length() > 1) {
|
||||
indexSubstrings = TRI_ObjectToBoolean(argv[1]);
|
||||
}
|
||||
|
||||
int minWordLength = TRI_FULLTEXT_WORDLENGTH_DEFAULT;
|
||||
if (argv.Length() == 3) {
|
||||
minWordLength = (int) TRI_ObjectToInt64(argv[2]);
|
||||
}
|
||||
|
||||
// .............................................................................
|
||||
// Check that we have a valid collection
|
||||
// .............................................................................
|
||||
|
@ -661,14 +667,14 @@ static v8::Handle<v8::Value> EnsureFulltextIndex (v8::Arguments const& argv,
|
|||
TRI_document_collection_t* document = (TRI_document_collection_t*) primary;
|
||||
|
||||
if (create) {
|
||||
idx = TRI_EnsureFulltextIndexDocumentCollection(document, attributeName.c_str(), indexSubstrings, &created);
|
||||
idx = TRI_EnsureFulltextIndexDocumentCollection(document, attributeName.c_str(), indexSubstrings, minWordLength, &created);
|
||||
|
||||
if (idx == 0) {
|
||||
res = TRI_errno();
|
||||
}
|
||||
}
|
||||
else {
|
||||
idx = TRI_LookupFulltextIndexDocumentCollection(document, attributeName.c_str(), indexSubstrings);
|
||||
idx = TRI_LookupFulltextIndexDocumentCollection(document, attributeName.c_str(), indexSubstrings, minWordLength);
|
||||
}
|
||||
|
||||
if (idx == 0) {
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "BasicsC/strings.h"
|
||||
#include "ShapedJson/shape-accessor.h"
|
||||
#include "VocBase/edge-collection.h"
|
||||
#include "VocBase/fulltext-query.h"
|
||||
#include "VocBase/index.h"
|
||||
#include "VocBase/key-generator.h"
|
||||
#include "VocBase/voc-shaper.h"
|
||||
|
@ -4091,7 +4092,8 @@ TRI_index_t* TRI_EnsureSkiplistIndexDocumentCollection (TRI_document_collection_
|
|||
|
||||
static TRI_index_t* LookupFulltextIndexDocumentCollection (TRI_document_collection_t* document,
|
||||
const char* attributeName,
|
||||
const bool indexSubstrings) {
|
||||
const bool indexSubstrings,
|
||||
int minWordLength) {
|
||||
size_t i;
|
||||
|
||||
assert(attributeName);
|
||||
|
@ -4107,6 +4109,10 @@ static TRI_index_t* LookupFulltextIndexDocumentCollection (TRI_document_collecti
|
|||
continue;
|
||||
}
|
||||
|
||||
if (fulltext->_minWordLength != minWordLength) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fulltext->base._fields._length != 1) {
|
||||
continue;
|
||||
}
|
||||
|
@ -4129,6 +4135,7 @@ static TRI_index_t* LookupFulltextIndexDocumentCollection (TRI_document_collecti
|
|||
static TRI_index_t* CreateFulltextIndexDocumentCollection (TRI_document_collection_t* document,
|
||||
const char* attributeName,
|
||||
const bool indexSubstrings,
|
||||
int minWordLength,
|
||||
TRI_idx_iid_t iid,
|
||||
bool* created) {
|
||||
TRI_index_t* idx;
|
||||
|
@ -4140,7 +4147,7 @@ static TRI_index_t* CreateFulltextIndexDocumentCollection (TRI_document_collecti
|
|||
// a new one.
|
||||
// ...........................................................................
|
||||
|
||||
idx = LookupFulltextIndexDocumentCollection(document, attributeName, indexSubstrings);
|
||||
idx = LookupFulltextIndexDocumentCollection(document, attributeName, indexSubstrings, minWordLength);
|
||||
if (idx != NULL) {
|
||||
LOG_TRACE("fulltext-index already created");
|
||||
|
||||
|
@ -4151,7 +4158,7 @@ static TRI_index_t* CreateFulltextIndexDocumentCollection (TRI_document_collecti
|
|||
}
|
||||
|
||||
// Create the fulltext index
|
||||
idx = TRI_CreateFulltextIndex(&document->base, attributeName, indexSubstrings);
|
||||
idx = TRI_CreateFulltextIndex(&document->base, attributeName, indexSubstrings, minWordLength);
|
||||
|
||||
// If index id given, use it otherwise use the default.
|
||||
if (iid) {
|
||||
|
@ -4187,9 +4194,11 @@ static int FulltextIndexFromJson (TRI_document_collection_t* document,
|
|||
TRI_json_t* attribute;
|
||||
TRI_json_t* fld;
|
||||
TRI_json_t* indexSubstrings;
|
||||
TRI_json_t* minWordLength;
|
||||
char* attributeName;
|
||||
size_t fieldCount;
|
||||
bool doIndexSubstrings;
|
||||
int minWordLengthValue;
|
||||
|
||||
// extract fields
|
||||
fld = ExtractFields(definition, &fieldCount, iid);
|
||||
|
@ -4214,13 +4223,19 @@ static int FulltextIndexFromJson (TRI_document_collection_t* document,
|
|||
if (indexSubstrings != NULL && indexSubstrings->_type == TRI_JSON_BOOLEAN) {
|
||||
doIndexSubstrings = indexSubstrings->_value._boolean;
|
||||
}
|
||||
|
||||
minWordLength = TRI_LookupArrayJson(definition, "minLength");
|
||||
minWordLengthValue = TRI_FULLTEXT_WORDLENGTH_DEFAULT;
|
||||
if (minWordLength != NULL && minWordLength->_type == TRI_JSON_NUMBER) {
|
||||
minWordLengthValue = (int) minWordLength->_value._number;
|
||||
}
|
||||
|
||||
// create the index
|
||||
idx = LookupFulltextIndexDocumentCollection(document, attributeName, doIndexSubstrings);
|
||||
idx = LookupFulltextIndexDocumentCollection(document, attributeName, doIndexSubstrings, minWordLengthValue);
|
||||
|
||||
if (idx == NULL) {
|
||||
bool created;
|
||||
idx = CreateFulltextIndexDocumentCollection(document, attributeName, doIndexSubstrings, iid, &created);
|
||||
idx = CreateFulltextIndexDocumentCollection(document, attributeName, doIndexSubstrings, minWordLengthValue, iid, &created);
|
||||
}
|
||||
|
||||
if (idx == NULL) {
|
||||
|
@ -4250,7 +4265,8 @@ static int FulltextIndexFromJson (TRI_document_collection_t* document,
|
|||
|
||||
TRI_index_t* TRI_LookupFulltextIndexDocumentCollection (TRI_document_collection_t* document,
|
||||
const char* attributeName,
|
||||
const bool indexSubstrings) {
|
||||
const bool indexSubstrings,
|
||||
int minWordLength) {
|
||||
TRI_index_t* idx;
|
||||
TRI_primary_collection_t* primary;
|
||||
|
||||
|
@ -4262,7 +4278,7 @@ TRI_index_t* TRI_LookupFulltextIndexDocumentCollection (TRI_document_collection_
|
|||
|
||||
TRI_READ_LOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary);
|
||||
|
||||
idx = LookupFulltextIndexDocumentCollection(document, attributeName, indexSubstrings);
|
||||
idx = LookupFulltextIndexDocumentCollection(document, attributeName, indexSubstrings, minWordLength);
|
||||
|
||||
TRI_READ_UNLOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary);
|
||||
|
||||
|
@ -4280,6 +4296,7 @@ TRI_index_t* TRI_LookupFulltextIndexDocumentCollection (TRI_document_collection_
|
|||
TRI_index_t* TRI_EnsureFulltextIndexDocumentCollection (TRI_document_collection_t* document,
|
||||
const char* attributeName,
|
||||
const bool indexSubstrings,
|
||||
int minWordLength,
|
||||
bool* created) {
|
||||
TRI_index_t* idx;
|
||||
TRI_primary_collection_t* primary;
|
||||
|
@ -4292,7 +4309,7 @@ TRI_index_t* TRI_EnsureFulltextIndexDocumentCollection (TRI_document_collection_
|
|||
|
||||
TRI_WRITE_LOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary);
|
||||
|
||||
idx = CreateFulltextIndexDocumentCollection(document, attributeName, indexSubstrings, 0, created);
|
||||
idx = CreateFulltextIndexDocumentCollection(document, attributeName, indexSubstrings, minWordLength, 0, created);
|
||||
|
||||
TRI_WRITE_UNLOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary);
|
||||
|
||||
|
|
|
@ -553,7 +553,8 @@ struct TRI_index_s* TRI_EnsureSkiplistIndexDocumentCollection (TRI_document_coll
|
|||
|
||||
struct TRI_index_s* TRI_LookupFulltextIndexDocumentCollection (TRI_document_collection_t*,
|
||||
const char*,
|
||||
const bool);
|
||||
const bool,
|
||||
int);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief ensures that a fulltext index exists
|
||||
|
@ -562,6 +563,7 @@ struct TRI_index_s* TRI_LookupFulltextIndexDocumentCollection (TRI_document_coll
|
|||
struct TRI_index_s* TRI_EnsureFulltextIndexDocumentCollection (TRI_document_collection_t*,
|
||||
const char*,
|
||||
const bool,
|
||||
int,
|
||||
bool*);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -35,6 +35,25 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- public defines
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @addtogroup VocBase
|
||||
/// @{
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief default minimum word length for a fulltext index
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define TRI_FULLTEXT_WORDLENGTH_DEFAULT (2)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @}
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- public functions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
|
|
@ -4115,7 +4115,7 @@ static FTS_texts_t* GetTextsFulltextIndex (FTS_document_id_t document,
|
|||
}
|
||||
|
||||
// parse the document text
|
||||
words = TRI_get_words(text, textLength, 2, true);
|
||||
words = TRI_get_words(text, textLength, (uint8_t) fulltextIndex->_minWordLength, true);
|
||||
if (words == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -4149,23 +4149,31 @@ static FTS_texts_t* GetTextsFulltextIndex (FTS_document_id_t document,
|
|||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief rebuilds a fulltext index by resizing it and re-adding documents
|
||||
/// @brief creates a new fulltext index with the properties of an existing one,
|
||||
/// but with adjusted (potentially bigger) sizes. The documents from the old
|
||||
/// index will be added into the new index.
|
||||
/// doc will not be re-inserted into the new index. It's the caller's
|
||||
/// responsibility to add it later. This prevents duplicate document entries
|
||||
/// in case document insertion has failed at a certain place. In this case, doc
|
||||
/// might have been in the old index already, and copying the old index and
|
||||
/// inserting doc again will lead to duplicates. So we exclude doc when copying
|
||||
/// the old documents and make it the caller's responsibility to add doc later
|
||||
/// the caller must have write-locked the index
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static int ResizeFulltextIndex (TRI_index_t* idx) {
|
||||
static int ResizeFulltextIndex (TRI_index_t* idx, TRI_doc_mptr_t const* doc) {
|
||||
TRI_fulltext_index_t* fulltextIndex;
|
||||
FTS_index_t* newIndex;
|
||||
uint64_t sizes[4];
|
||||
|
||||
LOG_INFO("fulltext index resize was triggered");
|
||||
LOG_DEBUG("resizing fulltext index");
|
||||
|
||||
fulltextIndex = (TRI_fulltext_index_t*) idx;
|
||||
|
||||
// this call will populate the sizes array
|
||||
FTS_HealthIndex(fulltextIndex->_fulltextIndex, sizes);
|
||||
|
||||
newIndex = FTS_CloneIndex(fulltextIndex->_fulltextIndex, sizes);
|
||||
newIndex = FTS_CloneIndex(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) doc), sizes);
|
||||
|
||||
if (newIndex == NULL) {
|
||||
return TRI_ERROR_OUT_OF_MEMORY;
|
||||
|
@ -4191,16 +4199,22 @@ static int InsertFulltextIndex (TRI_index_t* idx, TRI_doc_mptr_t const* doc) {
|
|||
LOG_WARNING("internal error in InsertFulltextIndex");
|
||||
return TRI_ERROR_INTERNAL;
|
||||
}
|
||||
|
||||
|
||||
TRI_WriteLockReadWriteLock(&fulltextIndex->_lock);
|
||||
res = FTS_AddDocument(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) doc));
|
||||
|
||||
|
||||
if (res == TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
||||
// rebuild the index with adjusted (bigger) size
|
||||
res = ResizeFulltextIndex(idx);
|
||||
// rebuild the index with adjusted (bigger) sizes
|
||||
res = ResizeFulltextIndex(idx, doc);
|
||||
if (res == TRI_ERROR_NO_ERROR) {
|
||||
// insert the document again because previous insert failed
|
||||
res = FTS_AddDocument(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) doc));
|
||||
if (res != TRI_ERROR_NO_ERROR) {
|
||||
LOG_ERROR("adding document to fulltext index failed: %s", TRI_errno_string(res));
|
||||
}
|
||||
}
|
||||
else {
|
||||
LOG_ERROR("resizing fulltext index failed: %s", TRI_errno_string(res));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4245,6 +4259,7 @@ static TRI_json_t* JsonFulltextIndex (TRI_index_t* idx, TRI_primary_collection_t
|
|||
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "unique", TRI_CreateBooleanJson(TRI_UNKNOWN_MEM_ZONE, idx->_unique));
|
||||
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "type", TRI_CreateStringCopyJson(TRI_UNKNOWN_MEM_ZONE, "fulltext"));
|
||||
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "indexSubstrings", TRI_CreateBooleanJson(TRI_UNKNOWN_MEM_ZONE, fulltextIndex->_indexSubstrings));
|
||||
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "minWordLength", TRI_CreateNumberJson(TRI_UNKNOWN_MEM_ZONE, (double) fulltextIndex->_minWordLength));
|
||||
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "fields", fields);
|
||||
|
||||
return json;
|
||||
|
@ -4273,11 +4288,7 @@ static int RemoveFulltextIndex (TRI_index_t* idx, TRI_doc_mptr_t const* doc) {
|
|||
|
||||
if (res == TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
||||
// rebuild the index with adjusted (bigger) size
|
||||
res = ResizeFulltextIndex(idx);
|
||||
if (res == TRI_ERROR_NO_ERROR) {
|
||||
// delete the document again because previous delete failed
|
||||
res = FTS_DeleteDocument(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) doc));
|
||||
}
|
||||
res = ResizeFulltextIndex(idx, doc);
|
||||
}
|
||||
|
||||
TRI_WriteUnlockReadWriteLock(&fulltextIndex->_lock);
|
||||
|
@ -4304,10 +4315,10 @@ static int UpdateFulltextIndex (TRI_index_t* idx,
|
|||
|
||||
if (res == TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
||||
// rebuild the index with adjusted (bigger) size
|
||||
res = ResizeFulltextIndex(idx);
|
||||
res = ResizeFulltextIndex(idx, newDoc);
|
||||
if (res == TRI_ERROR_NO_ERROR) {
|
||||
// update the document again because previous update failed
|
||||
res = FTS_UpdateDocument(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) newDoc));
|
||||
// insert just the new version of the document
|
||||
res = FTS_AddDocument(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) newDoc));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4322,7 +4333,33 @@ static int UpdateFulltextIndex (TRI_index_t* idx,
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static int CleanupFulltextIndex (TRI_index_t* idx) {
|
||||
LOG_DEBUG("fulltext cleanup called");
|
||||
TRI_fulltext_index_t* fulltextIndex;
|
||||
int res;
|
||||
|
||||
LOG_TRACE("fulltext cleanup called");
|
||||
|
||||
fulltextIndex = (TRI_fulltext_index_t*) idx;
|
||||
|
||||
TRI_WriteLockReadWriteLock(&fulltextIndex->_lock);
|
||||
|
||||
while (1) {
|
||||
// this will scan 100.000 document/word pairs at a time
|
||||
// TODO: check if this number is reasonable
|
||||
res = FTS_BackgroundTask(fulltextIndex->_fulltextIndex, 100000);
|
||||
// 0 = ok, but unfinished
|
||||
// 1 = oom
|
||||
// 2 = needs resize
|
||||
// 3 = finished
|
||||
if (res == 3) {
|
||||
// finished cleaning
|
||||
break;
|
||||
}
|
||||
// TODO: maybe we want to clean more
|
||||
break;
|
||||
}
|
||||
TRI_WriteUnlockReadWriteLock(&fulltextIndex->_lock);
|
||||
|
||||
LOG_TRACE("finished cleaning up");
|
||||
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
}
|
||||
|
@ -4346,7 +4383,8 @@ static int CleanupFulltextIndex (TRI_index_t* idx) {
|
|||
|
||||
TRI_index_t* TRI_CreateFulltextIndex (struct TRI_primary_collection_s* collection,
|
||||
const char* attributeName,
|
||||
const bool indexSubstrings) {
|
||||
const bool indexSubstrings,
|
||||
int minWordLength) {
|
||||
TRI_fulltext_index_t* fulltextIndex;
|
||||
FTS_index_t* fts;
|
||||
TRI_shaper_t* shaper;
|
||||
|
@ -4354,8 +4392,8 @@ TRI_index_t* TRI_CreateFulltextIndex (struct TRI_primary_collection_s* collectio
|
|||
TRI_shape_pid_t attribute;
|
||||
int options;
|
||||
// default sizes for index. TODO: adjust these
|
||||
//uint64_t sizes[4] = { 20050, 100000, 570000, 10000000 };
|
||||
uint64_t sizes[4] = { 50, 1000, 5700, 10000 };
|
||||
//uint64_t sizes[4] = { 50, 100000, 5000, 1000 };
|
||||
uint64_t sizes[4] = { 500, 1000000, 50000, 10000 };
|
||||
|
||||
// look up the attribute
|
||||
shaper = collection->_shaper;
|
||||
|
@ -4403,6 +4441,7 @@ TRI_index_t* TRI_CreateFulltextIndex (struct TRI_primary_collection_s* collectio
|
|||
fulltextIndex->_fulltextIndex = fts;
|
||||
fulltextIndex->_indexSubstrings = indexSubstrings;
|
||||
fulltextIndex->_attribute = attribute;
|
||||
fulltextIndex->_minWordLength = minWordLength;
|
||||
|
||||
TRI_InitVectorString(&fulltextIndex->base._fields, TRI_UNKNOWN_MEM_ZONE);
|
||||
TRI_PushBackVectorString(&fulltextIndex->base._fields, copy);
|
||||
|
|
|
@ -235,6 +235,7 @@ typedef struct TRI_fulltext_index_s {
|
|||
FTS_index_t* _fulltextIndex;
|
||||
TRI_shape_pid_t _attribute;
|
||||
TRI_read_write_lock_t _lock;
|
||||
int _minWordLength;
|
||||
|
||||
bool _indexSubstrings;
|
||||
}
|
||||
|
@ -683,7 +684,8 @@ struct TRI_doc_mptr_s** TRI_LookupFulltextIndex (TRI_index_t*, const char* query
|
|||
|
||||
TRI_index_t* TRI_CreateFulltextIndex (struct TRI_primary_collection_s*,
|
||||
const char*,
|
||||
const bool);
|
||||
const bool,
|
||||
int);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief frees the memory allocated, but does not free the pointer
|
||||
|
|
|
@ -384,7 +384,7 @@ function fulltextQuerySuite () {
|
|||
assertEqual(0, collection.FULLTEXT(idx, "no,cats,allowed").documents.length);
|
||||
assertEqual(0, collection.FULLTEXT(idx, "banana").documents.length);
|
||||
},
|
||||
/*
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test duplicate entries
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -407,7 +407,7 @@ function fulltextQuerySuite () {
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test duplicate entries
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
testDuplicatesDocuments: function () {
|
||||
var text1 = "this is a short document text";
|
||||
var text2 = "Some longer document text is put in here just to validate whats going on";
|
||||
|
@ -423,7 +423,7 @@ function fulltextQuerySuite () {
|
|||
assertEqual(10000, collection.FULLTEXT(idx, "some").documents.length);
|
||||
assertEqual(0, collection.FULLTEXT(idx, "banana").documents.length);
|
||||
},
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test similar entries
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -806,7 +806,6 @@ function fulltextQuerySuite () {
|
|||
}
|
||||
},
|
||||
|
||||
/*
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief 4 byte sequences
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -847,6 +846,7 @@ function fulltextQuerySuite () {
|
|||
assertEqual(1, collection.FULLTEXT(idx, "타이승려들은,수호사원으로서").documents.length);
|
||||
assertEqual(1, collection.FULLTEXT(idx, "prefix:타이승려,prefix:수호사원으").documents.length);
|
||||
assertEqual(1, collection.FULLTEXT(idx, "prefix:조상숭배로").documents.length);
|
||||
/*
|
||||
require("console").log(4);
|
||||
assertEqual(1, collection.FULLTEXT(idx, "教材教辅考试").documents.length);
|
||||
// "图书简介 亚马逊图书,中国最大的网上书店。拥有文学,经济管理,少儿,人文社科,生活,艺术,科技,进口原版,期刊杂志等大类,教材教辅考试,历史,国学古籍,法律,军事,宗教,心理学,哲学,健康与养生,旅游与地图,娱乐,两性婚恋,时尚,家居休闲,孕产育儿,文学,小说,传记,青春与动漫绘本,家庭百科,外语,工具书,教育,心理励志,心灵读物,建筑,计算机与网络,科学与自然等数十小类共计300多万种中外图书
|
||||
|
@ -880,8 +880,8 @@ function fulltextQuerySuite () {
|
|||
assertEqual(1, collection.FULLTEXT(idx, "síðu,rættar,ritstjórni").documents.length);
|
||||
require("console").log(73);
|
||||
assertEqual(1, collection.FULLTEXT(idx, "prefix:læt").documents.length);
|
||||
*/
|
||||
}
|
||||
*/
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -250,6 +250,9 @@ char* Utf8Helper::tolower (TRI_memory_zone_t* zone, const char *src, int32_t src
|
|||
}
|
||||
else {
|
||||
utf8_dest = (char*) TRI_Allocate(zone, (srcLength+1) * sizeof(char), false);
|
||||
if (utf8_dest == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
dstLength = ucasemap_utf8ToLower(csm.getAlias(),
|
||||
utf8_dest,
|
||||
|
@ -285,7 +288,9 @@ char* Utf8Helper::tolower (TRI_memory_zone_t* zone, const char *src, int32_t src
|
|||
#endif
|
||||
|
||||
utf8_dest = TRI_LowerAsciiStringZ(zone, src);
|
||||
dstLength = strlen(utf8_dest);
|
||||
if (utf8_dest != 0) {
|
||||
dstLength = strlen(utf8_dest);
|
||||
}
|
||||
return utf8_dest;
|
||||
}
|
||||
|
||||
|
@ -371,14 +376,16 @@ char* Utf8Helper::toupper (TRI_memory_zone_t* zone, const char *src, int32_t src
|
|||
#endif
|
||||
|
||||
utf8_dest = TRI_UpperAsciiStringZ(zone, src);
|
||||
dstLength = strlen(utf8_dest);
|
||||
if (utf8_dest != NULL) {
|
||||
dstLength = strlen(utf8_dest);
|
||||
}
|
||||
return utf8_dest;
|
||||
}
|
||||
|
||||
TRI_vector_string_t* Utf8Helper::getWords (const char* const text,
|
||||
const size_t textLength,
|
||||
uint8_t minimalLength,
|
||||
bool lowerCase) {
|
||||
const size_t textLength,
|
||||
uint8_t minimalLength,
|
||||
bool lowerCase) {
|
||||
TRI_vector_string_t* words;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString word;
|
||||
|
@ -401,17 +408,29 @@ TRI_vector_string_t* Utf8Helper::getWords (const char* const text,
|
|||
// lower case string
|
||||
int32_t lowerLength = 0;
|
||||
char* lower = tolower(TRI_UNKNOWN_MEM_ZONE, text, (int32_t) textLength, lowerLength);
|
||||
|
||||
if (lowerLength == 0) {
|
||||
|
||||
if (lower == NULL) {
|
||||
// out of memory
|
||||
TRI_FreeVectorString(TRI_UNKNOWN_MEM_ZONE, words);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (lowerLength == 0) {
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, lower);
|
||||
TRI_FreeVectorString(TRI_UNKNOWN_MEM_ZONE, words);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
textUtf16 = TRI_Utf8ToUChar(TRI_UNKNOWN_MEM_ZONE, lower, lowerLength, &textUtf16Length);
|
||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, lower);
|
||||
}
|
||||
else {
|
||||
textUtf16 = TRI_Utf8ToUChar(TRI_UNKNOWN_MEM_ZONE, text, (int32_t) textLength, &textUtf16Length);
|
||||
}
|
||||
|
||||
if (textUtf16 == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ULocDataLocaleType type = ULOC_VALID_LOCALE;
|
||||
const Locale& locale = _coll->getLocale(type, status);
|
||||
|
@ -437,7 +456,9 @@ TRI_vector_string_t* Utf8Helper::getWords (const char* const text,
|
|||
if (tempUtf16Length >= minimalLength) {
|
||||
utext.extractBetween(start, end, tempUtf16, 0);
|
||||
utf8Word = TRI_UCharToUtf8(TRI_UNKNOWN_MEM_ZONE, tempUtf16, tempUtf16Length, &utf8WordLength);
|
||||
TRI_PushBackVectorString(words, utf8Word);
|
||||
if (utf8Word != 0) {
|
||||
TRI_PushBackVectorString(words, utf8Word);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue