mirror of https://gitee.com/bigwinds/arangodb
cleanup of fulltext indexes, resizing etc.
This commit is contained in:
parent
7261465139
commit
cbaa57a1bd
|
@ -75,19 +75,30 @@ extern ZCOD zcdh;
|
||||||
/// @brief maximum number of Unicode characters for an indexed word
|
/// @brief maximum number of Unicode characters for an indexed word
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define MAX_WORD_LENGTH (40)
|
#define MAX_WORD_LENGTH (40)
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief gap between two words in a temporary search buffer
|
/// @brief gap between two words in a temporary search buffer
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define SPACING (10)
|
#define SPACING (10)
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief maximum tolerable occupancy of the index (e.g. 60 %)
|
/// @brief maximum tolerable occupancy of the index (e.g. 60 %)
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define HEALTH_THRESHOLD (75)
|
#define HEALTH_THRESHOLD (75)
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief index extra growth factor
|
||||||
|
/// if 1.0, the index will be resized to the values originally suggested. As
|
||||||
|
/// resizing is expensive, one might want to decrease the overall number of
|
||||||
|
/// resizings. This can be done by setting this number to a value bigger than
|
||||||
|
/// 1.0
|
||||||
|
/// TODO: find a good default value for this
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#define EXTRA_GROWTH_FACTOR (1.0)
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief the actual index struct used
|
/// @brief the actual index struct used
|
||||||
|
@ -104,9 +115,10 @@ typedef struct {
|
||||||
TUBER* _index1;
|
TUBER* _index1;
|
||||||
TUBER* _index2;
|
TUBER* _index2;
|
||||||
TUBER* _index3;
|
TUBER* _index3;
|
||||||
|
uint64_t _ix3KKey;
|
||||||
|
|
||||||
int64_t _maxDocuments;
|
uint64_t _maxDocuments;
|
||||||
int64_t _numDocuments;
|
uint64_t _numDocuments;
|
||||||
|
|
||||||
FTS_texts_t* (*getTexts)(FTS_document_id_t, void*);
|
FTS_texts_t* (*getTexts)(FTS_document_id_t, void*);
|
||||||
void (*freeWordlist)(FTS_texts_t*);
|
void (*freeWordlist)(FTS_texts_t*);
|
||||||
|
@ -168,6 +180,20 @@ static uint64_t GetUnicode (uint8_t** ptr) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief translate zstr error code into TRI_error code
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
static int TranslateZStrErrorCode (int zstrErrorCode) {
|
||||||
|
assert(zstrErrorCode != 0);
|
||||||
|
|
||||||
|
if (zstrErrorCode == 2) {
|
||||||
|
return TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return TRI_ERROR_OUT_OF_MEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief add a document to the index
|
/// @brief add a document to the index
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -195,6 +221,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
uint64_t bkey = 0;
|
uint64_t bkey = 0;
|
||||||
uint64_t docb, dock;
|
uint64_t docb, dock;
|
||||||
int res;
|
int res;
|
||||||
|
int res2;
|
||||||
|
|
||||||
ix = (FTS_real_index*) ftx;
|
ix = (FTS_real_index*) ftx;
|
||||||
|
|
||||||
|
@ -202,6 +229,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
handle = ix->_firstFree;
|
handle = ix->_firstFree;
|
||||||
if (handle == 0) {
|
if (handle == 0) {
|
||||||
// no more document handles free
|
// no more document handles free
|
||||||
|
LOG_ERROR("fail on %d", __LINE__);
|
||||||
return TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE;
|
return TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -227,7 +255,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
|
|
||||||
// check for out of memory
|
// check for out of memory
|
||||||
if (zstrwl == NULL || zstr2a == NULL || zstr2b == NULL || x3zstr == NULL || x3zstrb == NULL) {
|
if (zstrwl == NULL || zstr2a == NULL || zstr2b == NULL || x3zstr == NULL || x3zstrb == NULL) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -243,7 +271,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
unicode = GetUnicode(&utf);
|
unicode = GetUnicode(&utf);
|
||||||
while (unicode != 0) {
|
while (unicode != 0) {
|
||||||
if (ZStrEnc(zstrwl, &zcutf, unicode) != 0) {
|
if (ZStrEnc(zstrwl, &zcutf, unicode) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -256,13 +284,13 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
|
|
||||||
// terminate the word and insert into STEX
|
// terminate the word and insert into STEX
|
||||||
if (ZStrEnc(zstrwl, &zcutf, 0) != 0) {
|
if (ZStrEnc(zstrwl, &zcutf, 0) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
ZStrNormalize(zstrwl);
|
ZStrNormalize(zstrwl);
|
||||||
if (ZStrSTAppend(stex, zstrwl) != 0) {
|
if (ZStrSTAppend(stex, zstrwl) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -279,7 +307,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
for (wdx = 0; wdx < nowords; wdx++) {
|
for (wdx = 0; wdx < nowords; wdx++) {
|
||||||
// get it out as a word
|
// get it out as a word
|
||||||
if (ZStrInsert(zstrwl, wpt, 2) != 0) {
|
if (ZStrInsert(zstrwl, wpt, 2) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -307,7 +335,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
// get the Z-string for the index-2 entry before this letter
|
// get the Z-string for the index-2 entry before this letter
|
||||||
i = ZStrTuberRead(ix->_index2, kkey[j], zstr2a);
|
i = ZStrTuberRead(ix->_index2, kkey[j], zstr2a);
|
||||||
if (i == 1) {
|
if (i == 1) {
|
||||||
res = TRI_ERROR_INTERNAL; // TODO: check with Richard
|
res = TRI_ERROR_INTERNAL;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -335,6 +363,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
if (newlet != tran) {
|
if (newlet != tran) {
|
||||||
// if not there, create a new index-2 entry for it
|
// if not there, create a new index-2 entry for it
|
||||||
bkey = ZStrTuberIns(ix->_index2, kkey[j], tran);
|
bkey = ZStrTuberIns(ix->_index2, kkey[j], tran);
|
||||||
|
// TODO: check bkey for INSFAIL
|
||||||
kkey[j + 1] = ZStrTuberK(ix->_index2, kkey[j], tran, bkey);
|
kkey[j + 1] = ZStrTuberK(ix->_index2, kkey[j], tran, bkey);
|
||||||
// update old index-2 entry to insert new letter
|
// update old index-2 entry to insert new letter
|
||||||
ZStrCxClear(&zcdelt, &ctx2a);
|
ZStrCxClear(&zcdelt, &ctx2a);
|
||||||
|
@ -344,14 +373,14 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
ZStrClear(zstr2b);
|
ZStrClear(zstr2b);
|
||||||
x64 = ZStrBitsOut(zstr2a, 1);
|
x64 = ZStrBitsOut(zstr2a, 1);
|
||||||
if (ZStrBitsIn(x64, 1, zstr2b) != 0) {
|
if (ZStrBitsIn(x64, 1, zstr2b) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
if (x64 == 1) {
|
if (x64 == 1) {
|
||||||
// copy over the B-key into index 3
|
// copy over the B-key into index 3
|
||||||
docb = ZStrDec(zstr2a, &zcbky);
|
docb = ZStrDec(zstr2a, &zcbky);
|
||||||
if (ZStrEnc(zstr2b, &zcbky, docb) != 0) {
|
if (ZStrEnc(zstr2b, &zcbky, docb) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -364,27 +393,27 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
x64 = ZStrDec(zstr2a, &zcbky);
|
x64 = ZStrDec(zstr2a, &zcbky);
|
||||||
if (ZStrEnc(zstr2b, &zcbky, x64) != 0) {
|
if (ZStrEnc(zstr2b, &zcbky, x64) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) {
|
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ZStrEnc(zstr2b, &zcbky, bkey) != 0) {
|
if (ZStrEnc(zstr2b, &zcbky, bkey) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
if (newlet == oldlet) {
|
if (newlet == oldlet) {
|
||||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) {
|
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -392,24 +421,25 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
while (newlet != oldlet) {
|
while (newlet != oldlet) {
|
||||||
oldlet = newlet;
|
oldlet = newlet;
|
||||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
x64 = ZStrDec(zstr2a, &zcbky);
|
x64 = ZStrDec(zstr2a, &zcbky);
|
||||||
if (ZStrEnc(zstr2b, &zcbky, x64) != 0) {
|
if (ZStrEnc(zstr2b, &zcbky, x64) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
newlet = ZStrCxDec(zstr2a, &zcdelt, &ctx2a);
|
newlet = ZStrCxDec(zstr2a, &zcdelt, &ctx2a);
|
||||||
}
|
}
|
||||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ZStrNormalize(zstr2b);
|
ZStrNormalize(zstr2b);
|
||||||
if (ZStrTuberUpdate(ix->_index2, kkey[j], zstr2b) != 0) {
|
res2 = ZStrTuberUpdate(ix->_index2, kkey[j], zstr2b);
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
if (res2 != 0) {
|
||||||
|
res = TranslateZStrErrorCode(res2);
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -424,7 +454,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
// so read the zstr from index2
|
// so read the zstr from index2
|
||||||
i = ZStrTuberRead(ix->_index2, kkey[j], zstr2a);
|
i = ZStrTuberRead(ix->_index2, kkey[j], zstr2a);
|
||||||
if (i == 1) {
|
if (i == 1) {
|
||||||
res = TRI_ERROR_INTERNAL; // TODO: check with Richard
|
res = TRI_ERROR_INTERNAL;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
// is there already an index-3 entry available?
|
// is there already an index-3 entry available?
|
||||||
|
@ -435,6 +465,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
docb = ZStrTuberIns(ix->_index3, kkey[j], 0);
|
docb = ZStrTuberIns(ix->_index3, kkey[j], 0);
|
||||||
|
// TODO: check docb
|
||||||
// put it into index 2
|
// put it into index 2
|
||||||
ZStrCxClear(&zcdelt, &ctx2a);
|
ZStrCxClear(&zcdelt, &ctx2a);
|
||||||
ZStrCxClear(&zcdelt, &ctx2b);
|
ZStrCxClear(&zcdelt, &ctx2b);
|
||||||
|
@ -443,12 +474,12 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
ZStrClear(zstr2b);
|
ZStrClear(zstr2b);
|
||||||
x64 = ZStrBitsOut(zstr2a, 1);
|
x64 = ZStrBitsOut(zstr2a, 1);
|
||||||
if (ZStrBitsIn(1, 1, zstr2b) != 0) {
|
if (ZStrBitsIn(1, 1, zstr2b) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ZStrEnc(zstr2b, &zcbky, docb) != 0) {
|
if (ZStrEnc(zstr2b, &zcbky, docb) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -461,18 +492,19 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
x64 = ZStrDec(zstr2a, &zcbky);
|
x64 = ZStrDec(zstr2a, &zcbky);
|
||||||
if (ZStrEnc(zstr2b,&zcbky, x64) != 0) {
|
if (ZStrEnc(zstr2b,&zcbky, x64) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ZStrNormalize(zstr2b);
|
ZStrNormalize(zstr2b);
|
||||||
if (ZStrTuberUpdate(ix->_index2, kkey[j], zstr2b) != 0) {
|
res2 = ZStrTuberUpdate(ix->_index2, kkey[j], zstr2b);
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
if (res2 != 0) {
|
||||||
|
res = TranslateZStrErrorCode(res2);
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -481,7 +513,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
i = ZStrTuberRead(ix->_index3, dock, x3zstr);
|
i = ZStrTuberRead(ix->_index3, dock, x3zstr);
|
||||||
ZStrClear(x3zstrb);
|
ZStrClear(x3zstrb);
|
||||||
if (i == 1) {
|
if (i == 1) {
|
||||||
res = TRI_ERROR_INTERNAL; // TODO: check with Richard
|
res = TRI_ERROR_INTERNAL;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -496,37 +528,38 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, newhan) != 0) {
|
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, newhan) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, handle) != 0) {
|
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, handle) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
if (newhan == oldhan) {
|
if (newhan == oldhan) {
|
||||||
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, handle) != 0) {
|
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, handle) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, newhan) != 0) {
|
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, newhan) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
while (newhan != oldhan) {
|
while (newhan != oldhan) {
|
||||||
oldhan = newhan;
|
oldhan = newhan;
|
||||||
newhan = ZStrCxDec(x3zstr, &zcdoc, &x3ctx);
|
newhan = ZStrCxDec(x3zstr, &zcdoc, &x3ctx);
|
||||||
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, newhan) != 0) {
|
if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, newhan) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ZStrNormalize(x3zstrb);
|
ZStrNormalize(x3zstrb);
|
||||||
if (ZStrTuberUpdate(ix->_index3, dock, x3zstrb) != 0) {
|
res2 = ZStrTuberUpdate(ix->_index3, dock, x3zstrb);
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
if (res2 != 0) {
|
||||||
|
res = TranslateZStrErrorCode(res2);
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -545,7 +578,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
tran = ZStrXlate(&zcutf, ixlet[j2]);
|
tran = ZStrXlate(&zcutf, ixlet[j2]);
|
||||||
i = ZStrTuberRead(ix->_index1, kkey1[j2 + 1], zstr2a);
|
i = ZStrTuberRead(ix->_index1, kkey1[j2 + 1], zstr2a);
|
||||||
if (i == 1) {
|
if (i == 1) {
|
||||||
res = TRI_ERROR_INTERNAL; // TODO: check with Richard
|
res = TRI_ERROR_INTERNAL;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
// look to see if the letter is there
|
// look to see if the letter is there
|
||||||
|
@ -565,6 +598,7 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
if (newlet != tran) {
|
if (newlet != tran) {
|
||||||
// if not there, create a new index-1 entry for it
|
// if not there, create a new index-1 entry for it
|
||||||
bkey = ZStrTuberIns(ix->_index1, kkey1[j2 + 1], tran);
|
bkey = ZStrTuberIns(ix->_index1, kkey1[j2 + 1], tran);
|
||||||
|
// TODO: check bkey
|
||||||
kkey1[j2] = ZStrTuberK(ix->_index1, kkey1[j2 + 1], tran, bkey);
|
kkey1[j2] = ZStrTuberK(ix->_index1, kkey1[j2 + 1], tran, bkey);
|
||||||
// update old index-1 entry to insert new letter
|
// update old index-1 entry to insert new letter
|
||||||
ZStrCxClear(&zcdelt, &ctx2a);
|
ZStrCxClear(&zcdelt, &ctx2a);
|
||||||
|
@ -580,26 +614,26 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
x64 = ZStrDec(zstr2a, &zcbky);
|
x64 = ZStrDec(zstr2a, &zcbky);
|
||||||
if (ZStrEnc(zstr2b, &zcbky, x64) != 0) {
|
if (ZStrEnc(zstr2b, &zcbky, x64) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) {
|
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
if (ZStrEnc(zstr2b, &zcbky, bkey) != 0) {
|
if (ZStrEnc(zstr2b, &zcbky, bkey) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
if (newlet == oldlet) {
|
if (newlet == oldlet) {
|
||||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) {
|
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -607,24 +641,25 @@ int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* raw
|
||||||
while (newlet != oldlet) {
|
while (newlet != oldlet) {
|
||||||
oldlet = newlet;
|
oldlet = newlet;
|
||||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
x64 = ZStrDec(zstr2a, &zcbky);
|
x64 = ZStrDec(zstr2a, &zcbky);
|
||||||
if (ZStrEnc(zstr2b, &zcbky, x64) != 0) {
|
if (ZStrEnc(zstr2b, &zcbky, x64) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
newlet = ZStrCxDec(zstr2a, &zcdelt, &ctx2a);
|
newlet = ZStrCxDec(zstr2a, &zcdelt, &ctx2a);
|
||||||
}
|
}
|
||||||
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) {
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
res = TRI_ERROR_OUT_OF_MEMORY;
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ZStrNormalize(zstr2b);
|
ZStrNormalize(zstr2b);
|
||||||
if (ZStrTuberUpdate(ix->_index1, kkey1[j2 + 1], zstr2b) != 0) {
|
res2 = ZStrTuberUpdate(ix->_index1, kkey1[j2 + 1], zstr2b);
|
||||||
LOG_INFO("oom triggered in %d", __LINE__); res = TRI_ERROR_OUT_OF_MEMORY;
|
if (res2 != 0) {
|
||||||
|
res = TranslateZStrErrorCode(res2);
|
||||||
goto oom;
|
goto oom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -685,11 +720,15 @@ static int RealDeleteDocument (FTS_index_t* ftx, FTS_document_id_t docid) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i > ix->_lastSlot) {
|
if (i > ix->_lastSlot) {
|
||||||
|
LOG_ERROR("fail on %d", __LINE__);
|
||||||
return TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE;
|
return TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
ix->_handlesFree[i] = 1;
|
ix->_handlesFree[i] = 1;
|
||||||
ix->_numDocuments--;
|
if (ix->_numDocuments > 0) {
|
||||||
|
// should never underflow
|
||||||
|
ix->_numDocuments--;
|
||||||
|
}
|
||||||
|
|
||||||
return TRI_ERROR_NO_ERROR;
|
return TRI_ERROR_NO_ERROR;
|
||||||
}
|
}
|
||||||
|
@ -946,11 +985,17 @@ static void Ix2Recurs (STEX* dochan, FTS_real_index* ix, uint64_t kk2) {
|
||||||
/// @brief index recursion, prefix matching
|
/// @brief index recursion, prefix matching
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static void Ix1Recurs (STEX* dochan, FTS_real_index* ix, uint64_t kk1, uint64_t* wd) {
|
static int Ix1Recurs (STEX* dochan,
|
||||||
|
FTS_real_index* ix,
|
||||||
|
uint64_t kk1,
|
||||||
|
uint64_t* wd) {
|
||||||
ZSTR* zstr;
|
ZSTR* zstr;
|
||||||
CTX ctx;
|
CTX ctx;
|
||||||
uint64_t newlet;
|
uint64_t newlet;
|
||||||
uint64_t kk2;
|
uint64_t kk2;
|
||||||
|
int res;
|
||||||
|
|
||||||
|
res = TRI_ERROR_NO_ERROR;
|
||||||
|
|
||||||
kk2 = FindKKey2(ix,wd);
|
kk2 = FindKKey2(ix,wd);
|
||||||
|
|
||||||
|
@ -961,13 +1006,11 @@ static void Ix1Recurs (STEX* dochan, FTS_real_index* ix, uint64_t kk1, uint64_t*
|
||||||
// index 1 entry for this prefix
|
// index 1 entry for this prefix
|
||||||
zstr = ZStrCons(10);
|
zstr = ZStrCons(10);
|
||||||
if (zstr == NULL) {
|
if (zstr == NULL) {
|
||||||
// TODO: out of memory
|
return TRI_ERROR_OUT_OF_MEMORY;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ZStrTuberRead(ix->_index1, kk1, zstr) == 1) {
|
if (ZStrTuberRead(ix->_index1, kk1, zstr) == 1) {
|
||||||
// TODO: make this return an error instead
|
return TRI_ERROR_INTERNAL;
|
||||||
printf("recursion failed to read kk1\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ZStrCxClear(&zcdelt, &ctx);
|
ZStrCxClear(&zcdelt, &ctx);
|
||||||
|
@ -986,10 +1029,16 @@ static void Ix1Recurs (STEX* dochan, FTS_real_index* ix, uint64_t kk1, uint64_t*
|
||||||
bkey = ZStrDec(zstr, &zcbky);
|
bkey = ZStrDec(zstr, &zcbky);
|
||||||
newkk1 = ZStrTuberK(ix->_index1, kk1, newlet, bkey);
|
newkk1 = ZStrTuberK(ix->_index1, kk1, newlet, bkey);
|
||||||
*(wd - 1) = newlet;
|
*(wd - 1) = newlet;
|
||||||
Ix1Recurs(dochan, ix, newkk1, wd - 1);
|
|
||||||
|
res = Ix1Recurs(dochan, ix, newkk1, wd - 1);
|
||||||
|
if (res != TRI_ERROR_NO_ERROR) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ZStrDest(zstr);
|
ZStrDest(zstr);
|
||||||
|
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -1072,15 +1121,9 @@ int FTS_HealthIndex (FTS_index_t* ftx, uint64_t* stats) {
|
||||||
ix = (FTS_real_index*) ftx;
|
ix = (FTS_real_index*) ftx;
|
||||||
|
|
||||||
health = (ix->_numDocuments * 100) / ix->_maxDocuments;
|
health = (ix->_numDocuments * 100) / ix->_maxDocuments;
|
||||||
stats[0] = (health * (ix->_numDocuments + 5)) / 50;
|
|
||||||
|
|
||||||
if (stats[0] < 5) {
|
|
||||||
stats[0] = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ix->_options == FTS_INDEX_SUBSTRINGS) {
|
if (ix->_options == FTS_INDEX_SUBSTRINGS) {
|
||||||
ZStrTuberStats(ix->_index1, st);
|
ZStrTuberStats(ix->_index1, st);
|
||||||
// LOG_TRACE("index 1 health %d size %d", (int) st[0], (int) st[1]);
|
|
||||||
stats[1] = st[1];
|
stats[1] = st[1];
|
||||||
if (health < st[0]) {
|
if (health < st[0]) {
|
||||||
health = st[0];
|
health = st[0];
|
||||||
|
@ -1091,19 +1134,30 @@ int FTS_HealthIndex (FTS_index_t* ftx, uint64_t* stats) {
|
||||||
}
|
}
|
||||||
|
|
||||||
ZStrTuberStats(ix->_index2, st);
|
ZStrTuberStats(ix->_index2, st);
|
||||||
// LOG_TRACE("index 2 health %d size %d", (int) st[0], (int) st[1]);
|
|
||||||
stats[2] = st[1];
|
stats[2] = st[1];
|
||||||
if (health < st[0]) {
|
if (health < st[0]) {
|
||||||
health = st[0];
|
health = st[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
ZStrTuberStats(ix->_index3, st);
|
ZStrTuberStats(ix->_index3, st);
|
||||||
// LOG_TRACE("index 3 health %d size %d", (int) st[0], (int) st[1]);
|
|
||||||
stats[3] = st[1];
|
stats[3] = st[1];
|
||||||
if (health < st[0]) {
|
if (health < st[0]) {
|
||||||
health = st[0];
|
health = st[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
stats[0] = (health * (ix->_numDocuments + 5)) / 50;
|
||||||
|
if (stats[0] < (ix->_numDocuments + 5)) {
|
||||||
|
stats[0] = (ix->_numDocuments + 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EXTRA_GROWTH_FACTOR > 1.0) {
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < 4; ++i) {
|
||||||
|
stats[i] = (uint64_t) ((double) stats[i] * (double) EXTRA_GROWTH_FACTOR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return (int) health;
|
return (int) health;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1115,6 +1169,7 @@ int FTS_HealthIndex (FTS_index_t* ftx, uint64_t* stats) {
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
FTS_index_t* FTS_CloneIndex (FTS_index_t* ftx,
|
FTS_index_t* FTS_CloneIndex (FTS_index_t* ftx,
|
||||||
|
FTS_document_id_t excludeDocument,
|
||||||
uint64_t sizes[4]) {
|
uint64_t sizes[4]) {
|
||||||
FTS_real_index* old;
|
FTS_real_index* old;
|
||||||
FTS_index_t* clone;
|
FTS_index_t* clone;
|
||||||
|
@ -1126,15 +1181,24 @@ FTS_index_t* FTS_CloneIndex (FTS_index_t* ftx,
|
||||||
if (clone != NULL) {
|
if (clone != NULL) {
|
||||||
// copy documents
|
// copy documents
|
||||||
FTS_document_id_t i;
|
FTS_document_id_t i;
|
||||||
|
uint64_t count = 0;
|
||||||
|
|
||||||
for (i = 1; i <= old->_lastSlot; i++) {
|
for (i = 1; i <= old->_lastSlot; i++) {
|
||||||
|
FTS_document_id_t found;
|
||||||
int res;
|
int res;
|
||||||
|
|
||||||
if (old->_handlesFree[i] == 1) {
|
if (old->_handlesFree[i] == 1) {
|
||||||
|
// document is marked as deleted
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
res = FTS_AddDocument(clone, old->_handles[i]);
|
found = old->_handles[i];
|
||||||
|
if (found == excludeDocument) {
|
||||||
|
// do not insert this document, because the caller will insert it later
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
res = FTS_AddDocument(clone, found);
|
||||||
if (res != TRI_ERROR_NO_ERROR && res != TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
if (res != TRI_ERROR_NO_ERROR && res != TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
||||||
// if resize fails, everything's ruined
|
// if resize fails, everything's ruined
|
||||||
LOG_ERROR("resizing the fulltext index failed with %d, sizes were: %llu %llu %llu %llu",
|
LOG_ERROR("resizing the fulltext index failed with %d, sizes were: %llu %llu %llu %llu",
|
||||||
|
@ -1147,7 +1211,11 @@ FTS_index_t* FTS_CloneIndex (FTS_index_t* ftx,
|
||||||
FTS_FreeIndex(clone);
|
FTS_FreeIndex(clone);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
++count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOG_DEBUG("cloned %llu documents", (unsigned long long) count);
|
||||||
}
|
}
|
||||||
|
|
||||||
return clone;
|
return clone;
|
||||||
|
@ -1170,11 +1238,11 @@ FTS_index_t* FTS_CreateIndex (void* context,
|
||||||
FTS_real_index* ix;
|
FTS_real_index* ix;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
LOG_INFO("creating fulltext index with sizes %llu %llu %llu %llu",
|
LOG_TRACE("creating fulltext index with sizes %llu %llu %llu %llu",
|
||||||
(unsigned long long) sizes[0],
|
(unsigned long long) sizes[0],
|
||||||
(unsigned long long) sizes[1],
|
(unsigned long long) sizes[1],
|
||||||
(unsigned long long) sizes[2],
|
(unsigned long long) sizes[2],
|
||||||
(unsigned long long) sizes[3]);
|
(unsigned long long) sizes[3]);
|
||||||
|
|
||||||
ix = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(FTS_real_index), false);
|
ix = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(FTS_real_index), false);
|
||||||
if (ix == NULL) {
|
if (ix == NULL) {
|
||||||
|
@ -1194,10 +1262,11 @@ FTS_index_t* FTS_CreateIndex (void* context,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
ix->_maxDocuments = (int64_t) sizes[0];
|
ix->_maxDocuments = sizes[0];
|
||||||
ix->_numDocuments = 0;
|
ix->_numDocuments = 0;
|
||||||
ix->_context = context;
|
ix->_context = context;
|
||||||
ix->_options = options;
|
ix->_options = options;
|
||||||
|
ix->_ix3KKey = 0;
|
||||||
|
|
||||||
// wordlists retrieval function
|
// wordlists retrieval function
|
||||||
ix->getTexts = getTexts;
|
ix->getTexts = getTexts;
|
||||||
|
@ -1335,11 +1404,11 @@ int FTS_AddDocument (FTS_index_t* ftx, FTS_document_id_t docid) {
|
||||||
|
|
||||||
health = FTS_HealthIndex(ftx, sizes);
|
health = FTS_HealthIndex(ftx, sizes);
|
||||||
if (health > HEALTH_THRESHOLD || res == TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
if (health > HEALTH_THRESHOLD || res == TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
||||||
LOG_INFO("Add document: health exceeds threshold. suggested sizes are: %llu %llu %llu %llu",
|
LOG_TRACE("fulltext index health threshold exceeded. new suggested sizes are: %llu %llu %llu %llu",
|
||||||
(unsigned long long) sizes[0],
|
(unsigned long long) sizes[0],
|
||||||
(unsigned long long) sizes[1],
|
(unsigned long long) sizes[1],
|
||||||
(unsigned long long) sizes[2],
|
(unsigned long long) sizes[2],
|
||||||
(unsigned long long) sizes[3]);
|
(unsigned long long) sizes[3]);
|
||||||
res = TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE;
|
res = TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1392,13 +1461,110 @@ int FTS_UpdateDocument (FTS_index_t* ftx, FTS_document_id_t docid) {
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief current not called. TODO: find out what its intention is
|
/// @brief read index3 and remove handles of unused documents. stop after docs
|
||||||
|
/// deletions. the scan & cleanup is incremental
|
||||||
|
/// the caller must have write-locked the index
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
void FTS_BackgroundTask (FTS_index_t* ftx) {
|
int FTS_BackgroundTask (FTS_index_t * ftx, int docs) {
|
||||||
/* obtain LOCKMAIN */
|
FTS_real_index * ix;
|
||||||
/* remove deleted handles from index3 not done QQQ */
|
int dleft,i;
|
||||||
/* release LOCKMAIN */
|
CTX cold, cnew;
|
||||||
|
int cd;
|
||||||
|
uint64_t newterm,oldhan,han;
|
||||||
|
ZSTR *zold, *znew;
|
||||||
|
ix = (FTS_real_index *)ftx;
|
||||||
|
dleft=docs;
|
||||||
|
cd=0;
|
||||||
|
|
||||||
|
znew=ZStrCons(100);
|
||||||
|
if(znew==NULL) return 1;
|
||||||
|
zold=ZStrCons(100);
|
||||||
|
if(zold==NULL)
|
||||||
|
{
|
||||||
|
ZStrDest(znew);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(dleft>0)
|
||||||
|
{
|
||||||
|
uint64_t numDeletions;
|
||||||
|
|
||||||
|
assert(ix->_ix3KKey < (ix->_index3)->kmax);
|
||||||
|
|
||||||
|
numDeletions = 0;
|
||||||
|
i=ZStrTuberRead(ix->_index3,ix->_ix3KKey,zold);
|
||||||
|
if(i==2)
|
||||||
|
{
|
||||||
|
cd=1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(i==0)
|
||||||
|
{
|
||||||
|
ZStrCxClear(&zcdoc,&cold);
|
||||||
|
ZStrCxClear(&zcdoc,&cnew);
|
||||||
|
ZStrClear(znew);
|
||||||
|
oldhan=0;
|
||||||
|
newterm=0;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
han=ZStrCxDec(zold,&zcdoc,&cold);
|
||||||
|
if(han==oldhan) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
oldhan=han;
|
||||||
|
dleft--;
|
||||||
|
if(ix->_handlesFree[han]==0)
|
||||||
|
{
|
||||||
|
i=ZStrCxEnc(znew,&zcdoc,&cnew,han);
|
||||||
|
if(i!=0) {
|
||||||
|
ix->_ix3KKey = 0;
|
||||||
|
ZStrDest(znew);
|
||||||
|
ZStrDest(zold);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
newterm=han;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// something was deleted
|
||||||
|
++numDeletions;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (numDeletions > 0) {
|
||||||
|
// update existing entry in tuber
|
||||||
|
// but only if there's something to update
|
||||||
|
|
||||||
|
i=ZStrCxEnc(znew,&zcdoc,&cnew,newterm);
|
||||||
|
if(i!=0) {
|
||||||
|
ix->_ix3KKey = 0;
|
||||||
|
ZStrDest(znew);
|
||||||
|
ZStrDest(zold);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
ZStrNormalize(znew);
|
||||||
|
i=ZStrTuberUpdate(ix->_index3,ix->_ix3KKey,znew);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(i!=0) {
|
||||||
|
ix->_ix3KKey = 0;
|
||||||
|
ZStrDest(znew);
|
||||||
|
ZStrDest(zold);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ix->_ix3KKey++;
|
||||||
|
if(ix->_ix3KKey >= (ix->_index3)->kmax)
|
||||||
|
{
|
||||||
|
ix->_ix3KKey = 0;
|
||||||
|
cd=3; // finished iterating over all document handles
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ZStrDest(znew);
|
||||||
|
ZStrDest(zold);
|
||||||
|
return cd;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -1423,34 +1589,53 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
||||||
uint64_t word[2 * (MAX_WORD_LENGTH + SPACING)];
|
uint64_t word[2 * (MAX_WORD_LENGTH + SPACING)];
|
||||||
uint64_t ndocs = 0;
|
uint64_t ndocs = 0;
|
||||||
|
|
||||||
zstr2 = ZStrCons(10); /* from index-2 tuber */
|
// initialise
|
||||||
|
dc = NULL;
|
||||||
|
TRI_set_errno(TRI_ERROR_NO_ERROR);
|
||||||
|
|
||||||
|
zstr2 = ZStrCons(10); /* from index-2 tuber */
|
||||||
if (zstr2 == NULL) {
|
if (zstr2 == NULL) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
zstr3 = ZStrCons(10); /* from index-3 tuber */
|
zstr3 = ZStrCons(10); /* from index-3 tuber */
|
||||||
if (zstr3 == NULL) {
|
if (zstr3 == NULL) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
ZStrDest(zstr2);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
zstra1 = ZStrCons(10); /* current list of documents */
|
zstra1 = ZStrCons(10); /* current list of documents */
|
||||||
if (zstra1 == NULL) {
|
if (zstra1 == NULL) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
ZStrDest(zstr3);
|
||||||
|
ZStrDest(zstr2);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
zstra2 = ZStrCons(10); /* new list of documents */
|
zstra2 = ZStrCons(10); /* new list of documents */
|
||||||
if (zstra2 == NULL) {
|
if (zstra2 == NULL) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
ZStrDest(zstra1);
|
||||||
|
ZStrDest(zstr3);
|
||||||
|
ZStrDest(zstr2);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
zstr = ZStrCons(4); /* work zstr from stex */
|
zstr = ZStrCons(4); /* work zstr from stex */
|
||||||
if (zstr == NULL) {
|
if (zstr == NULL) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
ZStrDest(zstra2);
|
||||||
|
ZStrDest(zstra1);
|
||||||
|
ZStrDest(zstr3);
|
||||||
|
ZStrDest(zstr2);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
ix = (FTS_real_index*) ftx;
|
ix = (FTS_real_index*) ftx;
|
||||||
|
|
||||||
/* - for each term in the query */
|
// for each term in the query
|
||||||
for (queryterm = 0; queryterm < query->_len; queryterm++) {
|
for (queryterm = 0; queryterm < query->_len; queryterm++) {
|
||||||
if (query->_localOptions[queryterm] == FTS_MATCH_SUBSTRING &&
|
if (query->_localOptions[queryterm] == FTS_MATCH_SUBSTRING &&
|
||||||
ix->_options != FTS_INDEX_SUBSTRINGS) {
|
ix->_options != FTS_INDEX_SUBSTRINGS) {
|
||||||
|
@ -1512,7 +1697,8 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
||||||
}
|
}
|
||||||
if (ix->_handlesFree[newhan] == 0) {
|
if (ix->_handlesFree[newhan] == 0) {
|
||||||
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) {
|
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
goto oom;
|
||||||
}
|
}
|
||||||
lasthan = newhan;
|
lasthan = newhan;
|
||||||
ndocs++;
|
ndocs++;
|
||||||
|
@ -1541,7 +1727,8 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
||||||
if (newhan == nhand1) {
|
if (newhan == nhand1) {
|
||||||
if (ix->_handlesFree[newhan] == 0) {
|
if (ix->_handlesFree[newhan] == 0) {
|
||||||
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) {
|
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
goto oom;
|
||||||
}
|
}
|
||||||
lasthan = newhan;
|
lasthan = newhan;
|
||||||
ndocs++;
|
ndocs++;
|
||||||
|
@ -1561,9 +1748,12 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, lasthan) != 0) {
|
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, lasthan) != 0) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
ZStrNormalize(zstra2);
|
ZStrNormalize(zstra2);
|
||||||
ztemp = zstra1;
|
ztemp = zstra1;
|
||||||
zstra1 = zstra2;
|
zstra1 = zstra2;
|
||||||
|
@ -1579,7 +1769,8 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
||||||
// make STEX to contain new list of handles
|
// make STEX to contain new list of handles
|
||||||
dochan = ZStrSTCons(2);
|
dochan = ZStrSTCons(2);
|
||||||
if (dochan == NULL) {
|
if (dochan == NULL) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
FillWordBuffer(&word[MAX_WORD_LENGTH + SPACING], query->_texts[queryterm]);
|
FillWordBuffer(&word[MAX_WORD_LENGTH + SPACING], query->_texts[queryterm]);
|
||||||
|
@ -1607,7 +1798,10 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// call routine to recursively put handles to STEX
|
// call routine to recursively put handles to STEX
|
||||||
Ix1Recurs(dochan, ix, kkey, word + MAX_WORD_LENGTH + SPACING);
|
if (Ix1Recurs(dochan, ix, kkey, word + MAX_WORD_LENGTH + SPACING) != TRI_ERROR_NO_ERROR) {
|
||||||
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
goto oom;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ZStrSTSort(dochan);
|
ZStrSTSort(dochan);
|
||||||
|
@ -1625,13 +1819,17 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
||||||
uint64_t newhan;
|
uint64_t newhan;
|
||||||
|
|
||||||
if (ZStrInsert(zstr, docpt, 2) != 0) {
|
if (ZStrInsert(zstr, docpt, 2) != 0) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
ZStrSTDest(dochan);
|
||||||
|
goto oom;
|
||||||
}
|
}
|
||||||
newhan = ZStrDec(zstr, &zcdh);
|
newhan = ZStrDec(zstr, &zcdh);
|
||||||
docpt += ZStrExtLen(docpt, 2);
|
docpt += ZStrExtLen(docpt, 2);
|
||||||
if (ix->_handlesFree[newhan] == 0) {
|
if (ix->_handlesFree[newhan] == 0) {
|
||||||
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) {
|
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
ZStrSTDest(dochan);
|
||||||
|
goto oom;
|
||||||
}
|
}
|
||||||
lasthan = newhan;
|
lasthan = newhan;
|
||||||
ndocs++;
|
ndocs++;
|
||||||
|
@ -1652,7 +1850,9 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
||||||
|
|
||||||
nhand1 = ZStrCxDec(zstra1, &zcdoc, &ctxa1);
|
nhand1 = ZStrCxDec(zstra1, &zcdoc, &ctxa1);
|
||||||
if (ZStrInsert(zstr, docpt, 2) != 0) {
|
if (ZStrInsert(zstr, docpt, 2) != 0) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
ZStrSTDest(dochan);
|
||||||
|
goto oom;
|
||||||
}
|
}
|
||||||
newhan = ZStrDec(zstr, &zcdh);
|
newhan = ZStrDec(zstr, &zcdh);
|
||||||
docpt += ZStrExtLen(docpt, 2);
|
docpt += ZStrExtLen(docpt, 2);
|
||||||
|
@ -1667,8 +1867,11 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
||||||
if (newhan == nhand1) {
|
if (newhan == nhand1) {
|
||||||
if (ix->_handlesFree[newhan] == 0) {
|
if (ix->_handlesFree[newhan] == 0) {
|
||||||
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) {
|
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
ZStrSTDest(dochan);
|
||||||
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
lasthan = newhan;
|
lasthan = newhan;
|
||||||
ndocs++;
|
ndocs++;
|
||||||
}
|
}
|
||||||
|
@ -1676,8 +1879,11 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ZStrInsert(zstr, docpt, 2) != 0) {
|
if (ZStrInsert(zstr, docpt, 2) != 0) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
ZStrSTDest(dochan);
|
||||||
|
goto oom;
|
||||||
}
|
}
|
||||||
|
|
||||||
newhan = ZStrDec(zstr, &zcdh);
|
newhan = ZStrDec(zstr, &zcdh);
|
||||||
docpt += ZStrExtLen(docpt, 2);
|
docpt += ZStrExtLen(docpt, 2);
|
||||||
odocs--;
|
odocs--;
|
||||||
|
@ -1693,7 +1899,9 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ZStrInsert(zstr, docpt, 2) != 0) {
|
if (ZStrInsert(zstr, docpt, 2) != 0) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
ZStrSTDest(dochan);
|
||||||
|
goto oom;
|
||||||
}
|
}
|
||||||
newhan = ZStrDec(zstr, &zcdh);
|
newhan = ZStrDec(zstr, &zcdh);
|
||||||
docpt += ZStrExtLen(docpt, 2);
|
docpt += ZStrExtLen(docpt, 2);
|
||||||
|
@ -1702,7 +1910,8 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, lasthan) != 0) {
|
if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, lasthan) != 0) {
|
||||||
// TODO: out of memory
|
TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY);
|
||||||
|
goto oom;
|
||||||
}
|
}
|
||||||
ZStrNormalize(zstra2);
|
ZStrNormalize(zstra2);
|
||||||
ztemp = zstra1;
|
ztemp = zstra1;
|
||||||
|
@ -1738,6 +1947,8 @@ FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
oom:
|
||||||
|
|
||||||
ZStrDest(zstra1);
|
ZStrDest(zstra1);
|
||||||
ZStrDest(zstra2);
|
ZStrDest(zstra2);
|
||||||
ZStrDest(zstr);
|
ZStrDest(zstr);
|
||||||
|
|
|
@ -116,13 +116,14 @@ FTS_texts_t;
|
||||||
/// @brief determine the health of the index
|
/// @brief determine the health of the index
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
int FTS_HealthIndex (FTS_index_t*, uint64_t sizes[4]);
|
int FTS_HealthIndex (FTS_index_t*, uint64_t[4]);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief clone an existing index
|
/// @brief clone an existing index
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
FTS_index_t* FTS_CloneIndex (FTS_index_t*,
|
FTS_index_t* FTS_CloneIndex (FTS_index_t*,
|
||||||
|
FTS_document_id_t,
|
||||||
uint64_t[4]);
|
uint64_t[4]);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -159,6 +160,13 @@ int FTS_DeleteDocument (FTS_index_t*, FTS_document_id_t);
|
||||||
|
|
||||||
int FTS_UpdateDocument (FTS_index_t*, FTS_document_id_t);
|
int FTS_UpdateDocument (FTS_index_t*, FTS_document_id_t);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief read index3 and remove handles of unused documents. stop after docs
|
||||||
|
/// deletions. the bc struct can be used to do an incremental scan & cleanup
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
int FTS_BackgroundTask (FTS_index_t* ftx, int docs);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief perform a search in the index
|
/// @brief perform a search in the index
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
@ -952,10 +952,11 @@ void ZStrTuberStats(TUBER * t, uint64_t * stats)
|
||||||
d2=100-d2;
|
d2=100-d2;
|
||||||
if(d2>d1) d1=d2;
|
if(d2>d1) d1=d2;
|
||||||
d2=((t->wct*8)*(d1+1))/50;
|
d2=((t->wct*8)*(d1+1))/50;
|
||||||
|
/*
|
||||||
printf("fuse %d freebit %d freekey %d kmax %d wct %d lenlen %d\n",
|
printf("fuse %d freebit %d freekey %d kmax %d wct %d lenlen %d\n",
|
||||||
(int)t->fuses, (int)t->freebit, (int)t->freekey, (int)t->kmax,
|
(int)t->fuses, (int)t->freebit, (int)t->freekey, (int)t->kmax,
|
||||||
(int)t->wct, (int)t->lenlen);
|
(int)t->wct, (int)t->lenlen);
|
||||||
|
*/
|
||||||
if(d2<72*t->fuses)d2=72*t->fuses;
|
if(d2<72*t->fuses)d2=72*t->fuses;
|
||||||
stats[0]=d1;
|
stats[0]=d1;
|
||||||
stats[1]=d2;
|
stats[1]=d2;
|
||||||
|
|
|
@ -61,6 +61,7 @@
|
||||||
#include "VocBase/general-cursor.h"
|
#include "VocBase/general-cursor.h"
|
||||||
#include "VocBase/document-collection.h"
|
#include "VocBase/document-collection.h"
|
||||||
#include "VocBase/edge-collection.h"
|
#include "VocBase/edge-collection.h"
|
||||||
|
#include "VocBase/fulltext-query.h"
|
||||||
#include "VocBase/key-generator.h"
|
#include "VocBase/key-generator.h"
|
||||||
#include "VocBase/voc-shaper.h"
|
#include "VocBase/voc-shaper.h"
|
||||||
#include "v8.h"
|
#include "v8.h"
|
||||||
|
@ -614,8 +615,8 @@ static v8::Handle<v8::Value> EnsureFulltextIndex (v8::Arguments const& argv,
|
||||||
const bool create) {
|
const bool create) {
|
||||||
v8::HandleScope scope;
|
v8::HandleScope scope;
|
||||||
|
|
||||||
if (argv.Length() != 1 && argv.Length() != 2) {
|
if (argv.Length() < 1 || argv.Length() > 3) {
|
||||||
return scope.Close(v8::ThrowException(TRI_CreateErrorObject(TRI_ERROR_ILLEGAL_OPTION, "usage: ensureFulltext(<attribute>, <indexSubstrings>)")));
|
return scope.Close(v8::ThrowException(TRI_CreateErrorObject(TRI_ERROR_ILLEGAL_OPTION, "usage: ensureFulltext(<attribute>, <indexSubstrings>, <minWordLength>)")));
|
||||||
}
|
}
|
||||||
|
|
||||||
string attributeName = TRI_ObjectToString(argv[0]);
|
string attributeName = TRI_ObjectToString(argv[0]);
|
||||||
|
@ -624,10 +625,15 @@ static v8::Handle<v8::Value> EnsureFulltextIndex (v8::Arguments const& argv,
|
||||||
}
|
}
|
||||||
|
|
||||||
bool indexSubstrings = false;
|
bool indexSubstrings = false;
|
||||||
if (argv.Length() == 2) {
|
if (argv.Length() > 1) {
|
||||||
indexSubstrings = TRI_ObjectToBoolean(argv[1]);
|
indexSubstrings = TRI_ObjectToBoolean(argv[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int minWordLength = TRI_FULLTEXT_WORDLENGTH_DEFAULT;
|
||||||
|
if (argv.Length() == 3) {
|
||||||
|
minWordLength = (int) TRI_ObjectToInt64(argv[2]);
|
||||||
|
}
|
||||||
|
|
||||||
// .............................................................................
|
// .............................................................................
|
||||||
// Check that we have a valid collection
|
// Check that we have a valid collection
|
||||||
// .............................................................................
|
// .............................................................................
|
||||||
|
@ -661,14 +667,14 @@ static v8::Handle<v8::Value> EnsureFulltextIndex (v8::Arguments const& argv,
|
||||||
TRI_document_collection_t* document = (TRI_document_collection_t*) primary;
|
TRI_document_collection_t* document = (TRI_document_collection_t*) primary;
|
||||||
|
|
||||||
if (create) {
|
if (create) {
|
||||||
idx = TRI_EnsureFulltextIndexDocumentCollection(document, attributeName.c_str(), indexSubstrings, &created);
|
idx = TRI_EnsureFulltextIndexDocumentCollection(document, attributeName.c_str(), indexSubstrings, minWordLength, &created);
|
||||||
|
|
||||||
if (idx == 0) {
|
if (idx == 0) {
|
||||||
res = TRI_errno();
|
res = TRI_errno();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
idx = TRI_LookupFulltextIndexDocumentCollection(document, attributeName.c_str(), indexSubstrings);
|
idx = TRI_LookupFulltextIndexDocumentCollection(document, attributeName.c_str(), indexSubstrings, minWordLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (idx == 0) {
|
if (idx == 0) {
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
#include "BasicsC/strings.h"
|
#include "BasicsC/strings.h"
|
||||||
#include "ShapedJson/shape-accessor.h"
|
#include "ShapedJson/shape-accessor.h"
|
||||||
#include "VocBase/edge-collection.h"
|
#include "VocBase/edge-collection.h"
|
||||||
|
#include "VocBase/fulltext-query.h"
|
||||||
#include "VocBase/index.h"
|
#include "VocBase/index.h"
|
||||||
#include "VocBase/key-generator.h"
|
#include "VocBase/key-generator.h"
|
||||||
#include "VocBase/voc-shaper.h"
|
#include "VocBase/voc-shaper.h"
|
||||||
|
@ -4091,7 +4092,8 @@ TRI_index_t* TRI_EnsureSkiplistIndexDocumentCollection (TRI_document_collection_
|
||||||
|
|
||||||
static TRI_index_t* LookupFulltextIndexDocumentCollection (TRI_document_collection_t* document,
|
static TRI_index_t* LookupFulltextIndexDocumentCollection (TRI_document_collection_t* document,
|
||||||
const char* attributeName,
|
const char* attributeName,
|
||||||
const bool indexSubstrings) {
|
const bool indexSubstrings,
|
||||||
|
int minWordLength) {
|
||||||
size_t i;
|
size_t i;
|
||||||
|
|
||||||
assert(attributeName);
|
assert(attributeName);
|
||||||
|
@ -4107,6 +4109,10 @@ static TRI_index_t* LookupFulltextIndexDocumentCollection (TRI_document_collecti
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (fulltext->_minWordLength != minWordLength) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (fulltext->base._fields._length != 1) {
|
if (fulltext->base._fields._length != 1) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -4129,6 +4135,7 @@ static TRI_index_t* LookupFulltextIndexDocumentCollection (TRI_document_collecti
|
||||||
static TRI_index_t* CreateFulltextIndexDocumentCollection (TRI_document_collection_t* document,
|
static TRI_index_t* CreateFulltextIndexDocumentCollection (TRI_document_collection_t* document,
|
||||||
const char* attributeName,
|
const char* attributeName,
|
||||||
const bool indexSubstrings,
|
const bool indexSubstrings,
|
||||||
|
int minWordLength,
|
||||||
TRI_idx_iid_t iid,
|
TRI_idx_iid_t iid,
|
||||||
bool* created) {
|
bool* created) {
|
||||||
TRI_index_t* idx;
|
TRI_index_t* idx;
|
||||||
|
@ -4140,7 +4147,7 @@ static TRI_index_t* CreateFulltextIndexDocumentCollection (TRI_document_collecti
|
||||||
// a new one.
|
// a new one.
|
||||||
// ...........................................................................
|
// ...........................................................................
|
||||||
|
|
||||||
idx = LookupFulltextIndexDocumentCollection(document, attributeName, indexSubstrings);
|
idx = LookupFulltextIndexDocumentCollection(document, attributeName, indexSubstrings, minWordLength);
|
||||||
if (idx != NULL) {
|
if (idx != NULL) {
|
||||||
LOG_TRACE("fulltext-index already created");
|
LOG_TRACE("fulltext-index already created");
|
||||||
|
|
||||||
|
@ -4151,7 +4158,7 @@ static TRI_index_t* CreateFulltextIndexDocumentCollection (TRI_document_collecti
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the fulltext index
|
// Create the fulltext index
|
||||||
idx = TRI_CreateFulltextIndex(&document->base, attributeName, indexSubstrings);
|
idx = TRI_CreateFulltextIndex(&document->base, attributeName, indexSubstrings, minWordLength);
|
||||||
|
|
||||||
// If index id given, use it otherwise use the default.
|
// If index id given, use it otherwise use the default.
|
||||||
if (iid) {
|
if (iid) {
|
||||||
|
@ -4187,9 +4194,11 @@ static int FulltextIndexFromJson (TRI_document_collection_t* document,
|
||||||
TRI_json_t* attribute;
|
TRI_json_t* attribute;
|
||||||
TRI_json_t* fld;
|
TRI_json_t* fld;
|
||||||
TRI_json_t* indexSubstrings;
|
TRI_json_t* indexSubstrings;
|
||||||
|
TRI_json_t* minWordLength;
|
||||||
char* attributeName;
|
char* attributeName;
|
||||||
size_t fieldCount;
|
size_t fieldCount;
|
||||||
bool doIndexSubstrings;
|
bool doIndexSubstrings;
|
||||||
|
int minWordLengthValue;
|
||||||
|
|
||||||
// extract fields
|
// extract fields
|
||||||
fld = ExtractFields(definition, &fieldCount, iid);
|
fld = ExtractFields(definition, &fieldCount, iid);
|
||||||
|
@ -4215,12 +4224,18 @@ static int FulltextIndexFromJson (TRI_document_collection_t* document,
|
||||||
doIndexSubstrings = indexSubstrings->_value._boolean;
|
doIndexSubstrings = indexSubstrings->_value._boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
minWordLength = TRI_LookupArrayJson(definition, "minLength");
|
||||||
|
minWordLengthValue = TRI_FULLTEXT_WORDLENGTH_DEFAULT;
|
||||||
|
if (minWordLength != NULL && minWordLength->_type == TRI_JSON_NUMBER) {
|
||||||
|
minWordLengthValue = (int) minWordLength->_value._number;
|
||||||
|
}
|
||||||
|
|
||||||
// create the index
|
// create the index
|
||||||
idx = LookupFulltextIndexDocumentCollection(document, attributeName, doIndexSubstrings);
|
idx = LookupFulltextIndexDocumentCollection(document, attributeName, doIndexSubstrings, minWordLengthValue);
|
||||||
|
|
||||||
if (idx == NULL) {
|
if (idx == NULL) {
|
||||||
bool created;
|
bool created;
|
||||||
idx = CreateFulltextIndexDocumentCollection(document, attributeName, doIndexSubstrings, iid, &created);
|
idx = CreateFulltextIndexDocumentCollection(document, attributeName, doIndexSubstrings, minWordLengthValue, iid, &created);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (idx == NULL) {
|
if (idx == NULL) {
|
||||||
|
@ -4250,7 +4265,8 @@ static int FulltextIndexFromJson (TRI_document_collection_t* document,
|
||||||
|
|
||||||
TRI_index_t* TRI_LookupFulltextIndexDocumentCollection (TRI_document_collection_t* document,
|
TRI_index_t* TRI_LookupFulltextIndexDocumentCollection (TRI_document_collection_t* document,
|
||||||
const char* attributeName,
|
const char* attributeName,
|
||||||
const bool indexSubstrings) {
|
const bool indexSubstrings,
|
||||||
|
int minWordLength) {
|
||||||
TRI_index_t* idx;
|
TRI_index_t* idx;
|
||||||
TRI_primary_collection_t* primary;
|
TRI_primary_collection_t* primary;
|
||||||
|
|
||||||
|
@ -4262,7 +4278,7 @@ TRI_index_t* TRI_LookupFulltextIndexDocumentCollection (TRI_document_collection_
|
||||||
|
|
||||||
TRI_READ_LOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary);
|
TRI_READ_LOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary);
|
||||||
|
|
||||||
idx = LookupFulltextIndexDocumentCollection(document, attributeName, indexSubstrings);
|
idx = LookupFulltextIndexDocumentCollection(document, attributeName, indexSubstrings, minWordLength);
|
||||||
|
|
||||||
TRI_READ_UNLOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary);
|
TRI_READ_UNLOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary);
|
||||||
|
|
||||||
|
@ -4280,6 +4296,7 @@ TRI_index_t* TRI_LookupFulltextIndexDocumentCollection (TRI_document_collection_
|
||||||
TRI_index_t* TRI_EnsureFulltextIndexDocumentCollection (TRI_document_collection_t* document,
|
TRI_index_t* TRI_EnsureFulltextIndexDocumentCollection (TRI_document_collection_t* document,
|
||||||
const char* attributeName,
|
const char* attributeName,
|
||||||
const bool indexSubstrings,
|
const bool indexSubstrings,
|
||||||
|
int minWordLength,
|
||||||
bool* created) {
|
bool* created) {
|
||||||
TRI_index_t* idx;
|
TRI_index_t* idx;
|
||||||
TRI_primary_collection_t* primary;
|
TRI_primary_collection_t* primary;
|
||||||
|
@ -4292,7 +4309,7 @@ TRI_index_t* TRI_EnsureFulltextIndexDocumentCollection (TRI_document_collection_
|
||||||
|
|
||||||
TRI_WRITE_LOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary);
|
TRI_WRITE_LOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary);
|
||||||
|
|
||||||
idx = CreateFulltextIndexDocumentCollection(document, attributeName, indexSubstrings, 0, created);
|
idx = CreateFulltextIndexDocumentCollection(document, attributeName, indexSubstrings, minWordLength, 0, created);
|
||||||
|
|
||||||
TRI_WRITE_UNLOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary);
|
TRI_WRITE_UNLOCK_DOCUMENTS_INDEXES_PRIMARY_COLLECTION(primary);
|
||||||
|
|
||||||
|
|
|
@ -553,7 +553,8 @@ struct TRI_index_s* TRI_EnsureSkiplistIndexDocumentCollection (TRI_document_coll
|
||||||
|
|
||||||
struct TRI_index_s* TRI_LookupFulltextIndexDocumentCollection (TRI_document_collection_t*,
|
struct TRI_index_s* TRI_LookupFulltextIndexDocumentCollection (TRI_document_collection_t*,
|
||||||
const char*,
|
const char*,
|
||||||
const bool);
|
const bool,
|
||||||
|
int);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief ensures that a fulltext index exists
|
/// @brief ensures that a fulltext index exists
|
||||||
|
@ -562,6 +563,7 @@ struct TRI_index_s* TRI_LookupFulltextIndexDocumentCollection (TRI_document_coll
|
||||||
struct TRI_index_s* TRI_EnsureFulltextIndexDocumentCollection (TRI_document_collection_t*,
|
struct TRI_index_s* TRI_EnsureFulltextIndexDocumentCollection (TRI_document_collection_t*,
|
||||||
const char*,
|
const char*,
|
||||||
const bool,
|
const bool,
|
||||||
|
int,
|
||||||
bool*);
|
bool*);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
@ -35,6 +35,25 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
// --SECTION-- public defines
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @addtogroup VocBase
|
||||||
|
/// @{
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief default minimum word length for a fulltext index
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#define TRI_FULLTEXT_WORDLENGTH_DEFAULT (2)
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @}
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
// --SECTION-- public functions
|
// --SECTION-- public functions
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
|
|
|
@ -4115,7 +4115,7 @@ static FTS_texts_t* GetTextsFulltextIndex (FTS_document_id_t document,
|
||||||
}
|
}
|
||||||
|
|
||||||
// parse the document text
|
// parse the document text
|
||||||
words = TRI_get_words(text, textLength, 2, true);
|
words = TRI_get_words(text, textLength, (uint8_t) fulltextIndex->_minWordLength, true);
|
||||||
if (words == NULL) {
|
if (words == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -4149,23 +4149,31 @@ static FTS_texts_t* GetTextsFulltextIndex (FTS_document_id_t document,
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief rebuilds a fulltext index by resizing it and re-adding documents
|
/// @brief creates a new fulltext index with the properties of an existing one,
|
||||||
|
/// but with adjusted (potentially bigger) sizes. The documents from the old
|
||||||
|
/// index will be added into the new index.
|
||||||
|
/// doc will not be re-inserted into the new index. It's the caller's
|
||||||
|
/// responsibility to add it later. This prevents duplicate document entries
|
||||||
|
/// in case document insertion has failed at a certain place. In this case, doc
|
||||||
|
/// might have been in the old index already, and copying the old index and
|
||||||
|
/// inserting doc again will lead to duplicates. So we exclude doc when copying
|
||||||
|
/// the old documents and make it the caller's responsibility to add doc later
|
||||||
/// the caller must have write-locked the index
|
/// the caller must have write-locked the index
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static int ResizeFulltextIndex (TRI_index_t* idx) {
|
static int ResizeFulltextIndex (TRI_index_t* idx, TRI_doc_mptr_t const* doc) {
|
||||||
TRI_fulltext_index_t* fulltextIndex;
|
TRI_fulltext_index_t* fulltextIndex;
|
||||||
FTS_index_t* newIndex;
|
FTS_index_t* newIndex;
|
||||||
uint64_t sizes[4];
|
uint64_t sizes[4];
|
||||||
|
|
||||||
LOG_INFO("fulltext index resize was triggered");
|
LOG_DEBUG("resizing fulltext index");
|
||||||
|
|
||||||
fulltextIndex = (TRI_fulltext_index_t*) idx;
|
fulltextIndex = (TRI_fulltext_index_t*) idx;
|
||||||
|
|
||||||
// this call will populate the sizes array
|
// this call will populate the sizes array
|
||||||
FTS_HealthIndex(fulltextIndex->_fulltextIndex, sizes);
|
FTS_HealthIndex(fulltextIndex->_fulltextIndex, sizes);
|
||||||
|
|
||||||
newIndex = FTS_CloneIndex(fulltextIndex->_fulltextIndex, sizes);
|
newIndex = FTS_CloneIndex(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) doc), sizes);
|
||||||
|
|
||||||
if (newIndex == NULL) {
|
if (newIndex == NULL) {
|
||||||
return TRI_ERROR_OUT_OF_MEMORY;
|
return TRI_ERROR_OUT_OF_MEMORY;
|
||||||
|
@ -4196,11 +4204,17 @@ static int InsertFulltextIndex (TRI_index_t* idx, TRI_doc_mptr_t const* doc) {
|
||||||
res = FTS_AddDocument(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) doc));
|
res = FTS_AddDocument(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) doc));
|
||||||
|
|
||||||
if (res == TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
if (res == TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
||||||
// rebuild the index with adjusted (bigger) size
|
// rebuild the index with adjusted (bigger) sizes
|
||||||
res = ResizeFulltextIndex(idx);
|
res = ResizeFulltextIndex(idx, doc);
|
||||||
if (res == TRI_ERROR_NO_ERROR) {
|
if (res == TRI_ERROR_NO_ERROR) {
|
||||||
// insert the document again because previous insert failed
|
// insert the document again because previous insert failed
|
||||||
res = FTS_AddDocument(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) doc));
|
res = FTS_AddDocument(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) doc));
|
||||||
|
if (res != TRI_ERROR_NO_ERROR) {
|
||||||
|
LOG_ERROR("adding document to fulltext index failed: %s", TRI_errno_string(res));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
LOG_ERROR("resizing fulltext index failed: %s", TRI_errno_string(res));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4245,6 +4259,7 @@ static TRI_json_t* JsonFulltextIndex (TRI_index_t* idx, TRI_primary_collection_t
|
||||||
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "unique", TRI_CreateBooleanJson(TRI_UNKNOWN_MEM_ZONE, idx->_unique));
|
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "unique", TRI_CreateBooleanJson(TRI_UNKNOWN_MEM_ZONE, idx->_unique));
|
||||||
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "type", TRI_CreateStringCopyJson(TRI_UNKNOWN_MEM_ZONE, "fulltext"));
|
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "type", TRI_CreateStringCopyJson(TRI_UNKNOWN_MEM_ZONE, "fulltext"));
|
||||||
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "indexSubstrings", TRI_CreateBooleanJson(TRI_UNKNOWN_MEM_ZONE, fulltextIndex->_indexSubstrings));
|
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "indexSubstrings", TRI_CreateBooleanJson(TRI_UNKNOWN_MEM_ZONE, fulltextIndex->_indexSubstrings));
|
||||||
|
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "minWordLength", TRI_CreateNumberJson(TRI_UNKNOWN_MEM_ZONE, (double) fulltextIndex->_minWordLength));
|
||||||
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "fields", fields);
|
TRI_Insert3ArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "fields", fields);
|
||||||
|
|
||||||
return json;
|
return json;
|
||||||
|
@ -4273,11 +4288,7 @@ static int RemoveFulltextIndex (TRI_index_t* idx, TRI_doc_mptr_t const* doc) {
|
||||||
|
|
||||||
if (res == TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
if (res == TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
||||||
// rebuild the index with adjusted (bigger) size
|
// rebuild the index with adjusted (bigger) size
|
||||||
res = ResizeFulltextIndex(idx);
|
res = ResizeFulltextIndex(idx, doc);
|
||||||
if (res == TRI_ERROR_NO_ERROR) {
|
|
||||||
// delete the document again because previous delete failed
|
|
||||||
res = FTS_DeleteDocument(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) doc));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TRI_WriteUnlockReadWriteLock(&fulltextIndex->_lock);
|
TRI_WriteUnlockReadWriteLock(&fulltextIndex->_lock);
|
||||||
|
@ -4304,10 +4315,10 @@ static int UpdateFulltextIndex (TRI_index_t* idx,
|
||||||
|
|
||||||
if (res == TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
if (res == TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) {
|
||||||
// rebuild the index with adjusted (bigger) size
|
// rebuild the index with adjusted (bigger) size
|
||||||
res = ResizeFulltextIndex(idx);
|
res = ResizeFulltextIndex(idx, newDoc);
|
||||||
if (res == TRI_ERROR_NO_ERROR) {
|
if (res == TRI_ERROR_NO_ERROR) {
|
||||||
// update the document again because previous update failed
|
// insert just the new version of the document
|
||||||
res = FTS_UpdateDocument(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) newDoc));
|
res = FTS_AddDocument(fulltextIndex->_fulltextIndex, (FTS_document_id_t) ((intptr_t) newDoc));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4322,7 +4333,33 @@ static int UpdateFulltextIndex (TRI_index_t* idx,
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static int CleanupFulltextIndex (TRI_index_t* idx) {
|
static int CleanupFulltextIndex (TRI_index_t* idx) {
|
||||||
LOG_DEBUG("fulltext cleanup called");
|
TRI_fulltext_index_t* fulltextIndex;
|
||||||
|
int res;
|
||||||
|
|
||||||
|
LOG_TRACE("fulltext cleanup called");
|
||||||
|
|
||||||
|
fulltextIndex = (TRI_fulltext_index_t*) idx;
|
||||||
|
|
||||||
|
TRI_WriteLockReadWriteLock(&fulltextIndex->_lock);
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
// this will scan 100.000 document/word pairs at a time
|
||||||
|
// TODO: check if this number is reasonable
|
||||||
|
res = FTS_BackgroundTask(fulltextIndex->_fulltextIndex, 100000);
|
||||||
|
// 0 = ok, but unfinished
|
||||||
|
// 1 = oom
|
||||||
|
// 2 = needs resize
|
||||||
|
// 3 = finished
|
||||||
|
if (res == 3) {
|
||||||
|
// finished cleaning
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// TODO: maybe we want to clean more
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
TRI_WriteUnlockReadWriteLock(&fulltextIndex->_lock);
|
||||||
|
|
||||||
|
LOG_TRACE("finished cleaning up");
|
||||||
|
|
||||||
return TRI_ERROR_NO_ERROR;
|
return TRI_ERROR_NO_ERROR;
|
||||||
}
|
}
|
||||||
|
@ -4346,7 +4383,8 @@ static int CleanupFulltextIndex (TRI_index_t* idx) {
|
||||||
|
|
||||||
TRI_index_t* TRI_CreateFulltextIndex (struct TRI_primary_collection_s* collection,
|
TRI_index_t* TRI_CreateFulltextIndex (struct TRI_primary_collection_s* collection,
|
||||||
const char* attributeName,
|
const char* attributeName,
|
||||||
const bool indexSubstrings) {
|
const bool indexSubstrings,
|
||||||
|
int minWordLength) {
|
||||||
TRI_fulltext_index_t* fulltextIndex;
|
TRI_fulltext_index_t* fulltextIndex;
|
||||||
FTS_index_t* fts;
|
FTS_index_t* fts;
|
||||||
TRI_shaper_t* shaper;
|
TRI_shaper_t* shaper;
|
||||||
|
@ -4354,8 +4392,8 @@ TRI_index_t* TRI_CreateFulltextIndex (struct TRI_primary_collection_s* collectio
|
||||||
TRI_shape_pid_t attribute;
|
TRI_shape_pid_t attribute;
|
||||||
int options;
|
int options;
|
||||||
// default sizes for index. TODO: adjust these
|
// default sizes for index. TODO: adjust these
|
||||||
//uint64_t sizes[4] = { 20050, 100000, 570000, 10000000 };
|
//uint64_t sizes[4] = { 50, 100000, 5000, 1000 };
|
||||||
uint64_t sizes[4] = { 50, 1000, 5700, 10000 };
|
uint64_t sizes[4] = { 500, 1000000, 50000, 10000 };
|
||||||
|
|
||||||
// look up the attribute
|
// look up the attribute
|
||||||
shaper = collection->_shaper;
|
shaper = collection->_shaper;
|
||||||
|
@ -4403,6 +4441,7 @@ TRI_index_t* TRI_CreateFulltextIndex (struct TRI_primary_collection_s* collectio
|
||||||
fulltextIndex->_fulltextIndex = fts;
|
fulltextIndex->_fulltextIndex = fts;
|
||||||
fulltextIndex->_indexSubstrings = indexSubstrings;
|
fulltextIndex->_indexSubstrings = indexSubstrings;
|
||||||
fulltextIndex->_attribute = attribute;
|
fulltextIndex->_attribute = attribute;
|
||||||
|
fulltextIndex->_minWordLength = minWordLength;
|
||||||
|
|
||||||
TRI_InitVectorString(&fulltextIndex->base._fields, TRI_UNKNOWN_MEM_ZONE);
|
TRI_InitVectorString(&fulltextIndex->base._fields, TRI_UNKNOWN_MEM_ZONE);
|
||||||
TRI_PushBackVectorString(&fulltextIndex->base._fields, copy);
|
TRI_PushBackVectorString(&fulltextIndex->base._fields, copy);
|
||||||
|
|
|
@ -235,6 +235,7 @@ typedef struct TRI_fulltext_index_s {
|
||||||
FTS_index_t* _fulltextIndex;
|
FTS_index_t* _fulltextIndex;
|
||||||
TRI_shape_pid_t _attribute;
|
TRI_shape_pid_t _attribute;
|
||||||
TRI_read_write_lock_t _lock;
|
TRI_read_write_lock_t _lock;
|
||||||
|
int _minWordLength;
|
||||||
|
|
||||||
bool _indexSubstrings;
|
bool _indexSubstrings;
|
||||||
}
|
}
|
||||||
|
@ -683,7 +684,8 @@ struct TRI_doc_mptr_s** TRI_LookupFulltextIndex (TRI_index_t*, const char* query
|
||||||
|
|
||||||
TRI_index_t* TRI_CreateFulltextIndex (struct TRI_primary_collection_s*,
|
TRI_index_t* TRI_CreateFulltextIndex (struct TRI_primary_collection_s*,
|
||||||
const char*,
|
const char*,
|
||||||
const bool);
|
const bool,
|
||||||
|
int);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief frees the memory allocated, but does not free the pointer
|
/// @brief frees the memory allocated, but does not free the pointer
|
||||||
|
|
|
@ -384,7 +384,7 @@ function fulltextQuerySuite () {
|
||||||
assertEqual(0, collection.FULLTEXT(idx, "no,cats,allowed").documents.length);
|
assertEqual(0, collection.FULLTEXT(idx, "no,cats,allowed").documents.length);
|
||||||
assertEqual(0, collection.FULLTEXT(idx, "banana").documents.length);
|
assertEqual(0, collection.FULLTEXT(idx, "banana").documents.length);
|
||||||
},
|
},
|
||||||
/*
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief test duplicate entries
|
/// @brief test duplicate entries
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -423,7 +423,7 @@ function fulltextQuerySuite () {
|
||||||
assertEqual(10000, collection.FULLTEXT(idx, "some").documents.length);
|
assertEqual(10000, collection.FULLTEXT(idx, "some").documents.length);
|
||||||
assertEqual(0, collection.FULLTEXT(idx, "banana").documents.length);
|
assertEqual(0, collection.FULLTEXT(idx, "banana").documents.length);
|
||||||
},
|
},
|
||||||
*/
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief test similar entries
|
/// @brief test similar entries
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -806,7 +806,6 @@ function fulltextQuerySuite () {
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
/*
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief 4 byte sequences
|
/// @brief 4 byte sequences
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -847,6 +846,7 @@ function fulltextQuerySuite () {
|
||||||
assertEqual(1, collection.FULLTEXT(idx, "타이승려들은,수호사원으로서").documents.length);
|
assertEqual(1, collection.FULLTEXT(idx, "타이승려들은,수호사원으로서").documents.length);
|
||||||
assertEqual(1, collection.FULLTEXT(idx, "prefix:타이승려,prefix:수호사원으").documents.length);
|
assertEqual(1, collection.FULLTEXT(idx, "prefix:타이승려,prefix:수호사원으").documents.length);
|
||||||
assertEqual(1, collection.FULLTEXT(idx, "prefix:조상숭배로").documents.length);
|
assertEqual(1, collection.FULLTEXT(idx, "prefix:조상숭배로").documents.length);
|
||||||
|
/*
|
||||||
require("console").log(4);
|
require("console").log(4);
|
||||||
assertEqual(1, collection.FULLTEXT(idx, "教材教辅考试").documents.length);
|
assertEqual(1, collection.FULLTEXT(idx, "教材教辅考试").documents.length);
|
||||||
// "图书简介 亚马逊图书,中国最大的网上书店。拥有文学,经济管理,少儿,人文社科,生活,艺术,科技,进口原版,期刊杂志等大类,教材教辅考试,历史,国学古籍,法律,军事,宗教,心理学,哲学,健康与养生,旅游与地图,娱乐,两性婚恋,时尚,家居休闲,孕产育儿,文学,小说,传记,青春与动漫绘本,家庭百科,外语,工具书,教育,心理励志,心灵读物,建筑,计算机与网络,科学与自然等数十小类共计300多万种中外图书
|
// "图书简介 亚马逊图书,中国最大的网上书店。拥有文学,经济管理,少儿,人文社科,生活,艺术,科技,进口原版,期刊杂志等大类,教材教辅考试,历史,国学古籍,法律,军事,宗教,心理学,哲学,健康与养生,旅游与地图,娱乐,两性婚恋,时尚,家居休闲,孕产育儿,文学,小说,传记,青春与动漫绘本,家庭百科,外语,工具书,教育,心理励志,心灵读物,建筑,计算机与网络,科学与自然等数十小类共计300多万种中外图书
|
||||||
|
@ -880,8 +880,8 @@ function fulltextQuerySuite () {
|
||||||
assertEqual(1, collection.FULLTEXT(idx, "síðu,rættar,ritstjórni").documents.length);
|
assertEqual(1, collection.FULLTEXT(idx, "síðu,rættar,ritstjórni").documents.length);
|
||||||
require("console").log(73);
|
require("console").log(73);
|
||||||
assertEqual(1, collection.FULLTEXT(idx, "prefix:læt").documents.length);
|
assertEqual(1, collection.FULLTEXT(idx, "prefix:læt").documents.length);
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -250,6 +250,9 @@ char* Utf8Helper::tolower (TRI_memory_zone_t* zone, const char *src, int32_t src
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
utf8_dest = (char*) TRI_Allocate(zone, (srcLength+1) * sizeof(char), false);
|
utf8_dest = (char*) TRI_Allocate(zone, (srcLength+1) * sizeof(char), false);
|
||||||
|
if (utf8_dest == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
dstLength = ucasemap_utf8ToLower(csm.getAlias(),
|
dstLength = ucasemap_utf8ToLower(csm.getAlias(),
|
||||||
utf8_dest,
|
utf8_dest,
|
||||||
|
@ -285,7 +288,9 @@ char* Utf8Helper::tolower (TRI_memory_zone_t* zone, const char *src, int32_t src
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
utf8_dest = TRI_LowerAsciiStringZ(zone, src);
|
utf8_dest = TRI_LowerAsciiStringZ(zone, src);
|
||||||
dstLength = strlen(utf8_dest);
|
if (utf8_dest != 0) {
|
||||||
|
dstLength = strlen(utf8_dest);
|
||||||
|
}
|
||||||
return utf8_dest;
|
return utf8_dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -371,14 +376,16 @@ char* Utf8Helper::toupper (TRI_memory_zone_t* zone, const char *src, int32_t src
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
utf8_dest = TRI_UpperAsciiStringZ(zone, src);
|
utf8_dest = TRI_UpperAsciiStringZ(zone, src);
|
||||||
dstLength = strlen(utf8_dest);
|
if (utf8_dest != NULL) {
|
||||||
|
dstLength = strlen(utf8_dest);
|
||||||
|
}
|
||||||
return utf8_dest;
|
return utf8_dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
TRI_vector_string_t* Utf8Helper::getWords (const char* const text,
|
TRI_vector_string_t* Utf8Helper::getWords (const char* const text,
|
||||||
const size_t textLength,
|
const size_t textLength,
|
||||||
uint8_t minimalLength,
|
uint8_t minimalLength,
|
||||||
bool lowerCase) {
|
bool lowerCase) {
|
||||||
TRI_vector_string_t* words;
|
TRI_vector_string_t* words;
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
UnicodeString word;
|
UnicodeString word;
|
||||||
|
@ -402,10 +409,18 @@ TRI_vector_string_t* Utf8Helper::getWords (const char* const text,
|
||||||
int32_t lowerLength = 0;
|
int32_t lowerLength = 0;
|
||||||
char* lower = tolower(TRI_UNKNOWN_MEM_ZONE, text, (int32_t) textLength, lowerLength);
|
char* lower = tolower(TRI_UNKNOWN_MEM_ZONE, text, (int32_t) textLength, lowerLength);
|
||||||
|
|
||||||
if (lowerLength == 0) {
|
if (lower == NULL) {
|
||||||
|
// out of memory
|
||||||
TRI_FreeVectorString(TRI_UNKNOWN_MEM_ZONE, words);
|
TRI_FreeVectorString(TRI_UNKNOWN_MEM_ZONE, words);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (lowerLength == 0) {
|
||||||
|
TRI_Free(TRI_UNKNOWN_MEM_ZONE, lower);
|
||||||
|
TRI_FreeVectorString(TRI_UNKNOWN_MEM_ZONE, words);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
textUtf16 = TRI_Utf8ToUChar(TRI_UNKNOWN_MEM_ZONE, lower, lowerLength, &textUtf16Length);
|
textUtf16 = TRI_Utf8ToUChar(TRI_UNKNOWN_MEM_ZONE, lower, lowerLength, &textUtf16Length);
|
||||||
TRI_Free(TRI_UNKNOWN_MEM_ZONE, lower);
|
TRI_Free(TRI_UNKNOWN_MEM_ZONE, lower);
|
||||||
}
|
}
|
||||||
|
@ -413,6 +428,10 @@ TRI_vector_string_t* Utf8Helper::getWords (const char* const text,
|
||||||
textUtf16 = TRI_Utf8ToUChar(TRI_UNKNOWN_MEM_ZONE, text, (int32_t) textLength, &textUtf16Length);
|
textUtf16 = TRI_Utf8ToUChar(TRI_UNKNOWN_MEM_ZONE, text, (int32_t) textLength, &textUtf16Length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (textUtf16 == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
ULocDataLocaleType type = ULOC_VALID_LOCALE;
|
ULocDataLocaleType type = ULOC_VALID_LOCALE;
|
||||||
const Locale& locale = _coll->getLocale(type, status);
|
const Locale& locale = _coll->getLocale(type, status);
|
||||||
if(U_FAILURE(status)) {
|
if(U_FAILURE(status)) {
|
||||||
|
@ -437,7 +456,9 @@ TRI_vector_string_t* Utf8Helper::getWords (const char* const text,
|
||||||
if (tempUtf16Length >= minimalLength) {
|
if (tempUtf16Length >= minimalLength) {
|
||||||
utext.extractBetween(start, end, tempUtf16, 0);
|
utext.extractBetween(start, end, tempUtf16, 0);
|
||||||
utf8Word = TRI_UCharToUtf8(TRI_UNKNOWN_MEM_ZONE, tempUtf16, tempUtf16Length, &utf8WordLength);
|
utf8Word = TRI_UCharToUtf8(TRI_UNKNOWN_MEM_ZONE, tempUtf16, tempUtf16Length, &utf8WordLength);
|
||||||
TRI_PushBackVectorString(words, utf8Word);
|
if (utf8Word != 0) {
|
||||||
|
TRI_PushBackVectorString(words, utf8Word);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue