diff --git a/arangod/FulltextIndex2/fulltext-index.c b/arangod/FulltextIndex2/fulltext-index.c deleted file mode 100644 index 5801a29f52..0000000000 --- a/arangod/FulltextIndex2/fulltext-index.c +++ /dev/null @@ -1,2166 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -/// @brief full text search -/// -/// @file -/// -/// DISCLAIMER -/// -/// Copyright 2010-2011 triagens GmbH, Cologne, Germany -/// -/// Licensed under the Apache License, Version 2.0 (the "License"); -/// you may not use this file except in compliance with the License. -/// You may obtain a copy of the License at -/// -/// http://www.apache.org/licenses/LICENSE-2.0 -/// -/// Unless required by applicable law or agreed to in writing, software -/// distributed under the License is distributed on an "AS IS" BASIS, -/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -/// See the License for the specific language governing permissions and -/// limitations under the License. -/// -/// Copyright holder is triAGENS GmbH, Cologne, Germany -/// -/// @author R. A. Parker -/// @author Copyright 2012, triagens GmbH, Cologne, Germany -//////////////////////////////////////////////////////////////////////////////// - -#include "fulltext-index.h" - -#include "BasicsC/locks.h" -#include "BasicsC/logging.h" - -#include "FulltextIndex/zstr-include.h" - -// ----------------------------------------------------------------------------- -// --SECTION-- externs -// ----------------------------------------------------------------------------- - -//////////////////////////////////////////////////////////////////////////////// -/// @addtogroup Fulltext -/// @{ -//////////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////////// -/// @brief codes, defined in zcode.c -//////////////////////////////////////////////////////////////////////////////// - -extern ZCOD zcutf; -extern ZCOD zcbky; -extern ZCOD zcdelt; -extern ZCOD zcdoc; -extern ZCOD zckk; -extern ZCOD zcdh; - -//////////////////////////////////////////////////////////////////////////////// -/// @} -//////////////////////////////////////////////////////////////////////////////// - -// ----------------------------------------------------------------------------- -// --SECTION-- private types -// ----------------------------------------------------------------------------- - -//////////////////////////////////////////////////////////////////////////////// -/// @addtogroup Fulltext -/// @{ -//////////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////////// -/// @brief not a valid kkey - 52 bits long! -//////////////////////////////////////////////////////////////////////////////// - -#define NOTFOUND 0xF777777777777 - -//////////////////////////////////////////////////////////////////////////////// -/// @brief maximum number of Unicode characters for an indexed word -//////////////////////////////////////////////////////////////////////////////// - -#define MAX_WORD_LENGTH (40) - -//////////////////////////////////////////////////////////////////////////////// -/// @brief gap between two words in a temporary search buffer -//////////////////////////////////////////////////////////////////////////////// - -#define SPACING (10) - -//////////////////////////////////////////////////////////////////////////////// -/// @brief maximum tolerable occupancy of the index (e.g. 60 %) -//////////////////////////////////////////////////////////////////////////////// - -#define HEALTH_THRESHOLD (75) - -//////////////////////////////////////////////////////////////////////////////// -/// @brief index extra growth factor -/// if 1.0, the index will be resized to the values originally suggested. As -/// resizing is expensive, one might want to decrease the overall number of -/// resizings. This can be done by setting this number to a value bigger than -/// 1.0 -//////////////////////////////////////////////////////////////////////////////// - -#define EXTRA_GROWTH_FACTOR (1.5) - -//////////////////////////////////////////////////////////////////////////////// -/// @brief the actual index struct used -//////////////////////////////////////////////////////////////////////////////// - -typedef struct { - void* _context; // arbitrary context info the index passed to getTexts - int _options; - - FTS_document_id_t* _handles; // array converting handles to docid - uint8_t* _handlesFree; - FTS_document_id_t _firstFree; // start of handle free chain - FTS_document_id_t _lastSlot; - TUBER* _index1; - TUBER* _index2; - TUBER* _index3; - uint64_t _ix3KKey; // current key in background cleanup iteration - - uint64_t _maxDocuments; - uint64_t _numDocuments; - uint64_t _numDeletions; - - FTS_texts_t* (*getTexts)(FTS_document_id_t, void*); - void (*freeWordlist)(FTS_texts_t*); -} -FTS_real_index; - -//////////////////////////////////////////////////////////////////////////////// -/// @} -//////////////////////////////////////////////////////////////////////////////// - -// ----------------------------------------------------------------------------- -// --SECTION-- private functions -// ----------------------------------------------------------------------------- - -//////////////////////////////////////////////////////////////////////////////// -/// @addtogroup Fulltext -/// @{ -//////////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////////// -/// @brief get a unicode character number from a UTF-8 string -//////////////////////////////////////////////////////////////////////////////// - -static uint64_t GetUnicode (uint8_t** ptr) { - uint64_t c1; - - c1 = **ptr; - if (c1 < 128) { - // single byte - (*ptr)++; - return c1; - } - - // multi-byte - if (c1 < 224) { - c1 = ((c1 - 192) << 6) + - (*((*ptr) + 1) - 128); - (*ptr) += 2; - return c1; - } - - if (c1 < 240) { - c1 = ((c1 - 224) << 12) + - ((*((*ptr) + 1) - 128) << 6) + - (*((*ptr) + 2) - 128); - (*ptr) += 3; - return c1; - } - - if (c1 < 248) { - c1 = ((c1 - 240) << 18) + - ((*((*ptr) + 1) - 128) << 12) + - ((*((*ptr) + 2) - 128) << 6) + - (*((*ptr) + 3) - 128); - (*ptr) += 4; - return c1; - } - - return 0; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief translate zstr error code into TRI_error code -//////////////////////////////////////////////////////////////////////////////// - -static int TranslateZStrErrorCode (int zstrErrorCode) { - assert(zstrErrorCode != 0); - - if (zstrErrorCode == 2) { - return TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE; - } - - return TRI_ERROR_OUT_OF_MEMORY; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief add a document to the index -//////////////////////////////////////////////////////////////////////////////// - -int RealAddDocument (FTS_index_t* ftx, FTS_document_id_t docid, FTS_texts_t* rawwords) { - FTS_real_index* ix; - CTX ctx2a, ctx2b, x3ctx, x3ctxb; - STEX* stex; - ZSTR* zstrwl; - ZSTR* zstr2a; - ZSTR* zstr2b; - ZSTR* x3zstr; - ZSTR* x3zstrb; - uint64_t letters[MAX_WORD_LENGTH + 2]; - uint64_t ixlet[MAX_WORD_LENGTH + 2]; - uint64_t kkey[MAX_WORD_LENGTH + 2]; /* for word *without* this letter */ - uint64_t kkey1[MAX_WORD_LENGTH + 2]; /* ix1 word whose last letter is this */ - int ixlen; - uint16_t* wpt; - uint64_t handle, newhan, oldhan; - uint64_t kroot1 = 0; /* initialise even if unused. this will prevent compiler warnings */ - int nowords, wdx; - int i, j, len; - uint64_t tran, x64, oldlet, newlet; - uint64_t bkey = 0; - uint64_t docb, dock; - int res; - int res2; - - ix = (FTS_real_index*) ftx; - - // allocate the document handle - handle = ix->_firstFree; - if (handle == 0) { - // no more document handles free - LOG_ERROR("fail on %d", __LINE__); - return TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE; - } - - stex = ZStrSTCons(2); /* format 2=uint16 is all that there is! */ - if (stex == NULL) { - return TRI_ERROR_OUT_OF_MEMORY; - } - - // origin of index 2 - kkey[0] = ZStrTuberK(ix->_index2, 0, 0, 0); - - if (ix->_options == FTS_INDEX_SUBSTRINGS) { - kroot1 = ZStrTuberK(ix->_index1, 0, 0, 0); - } - - res = TRI_ERROR_NO_ERROR; - - zstrwl = ZStrCons(25); /* 25 enough for word list */ - zstr2a = ZStrCons(30); /* 30 uint64's is always enough for ix2 */ - zstr2b = ZStrCons(30); - x3zstr = ZStrCons(35); - x3zstrb = ZStrCons(35); - - // check for out of memory - if (zstrwl == NULL || zstr2a == NULL || zstr2b == NULL || x3zstr == NULL || x3zstrb == NULL) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - - // put all words into a STEX - nowords = rawwords->_len; - for (i = 0; i < nowords; i++) { - uint64_t unicode; - uint8_t* utf; - - utf = rawwords->_texts[i]; - j = 0; - ZStrClear(zstrwl); - unicode = GetUnicode(&utf); - while (unicode != 0) { - if (ZStrEnc(zstrwl, &zcutf, unicode) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - - unicode = GetUnicode(&utf); - j++; - if (j > MAX_WORD_LENGTH) { - break; - } - } - - // terminate the word and insert into STEX - if (ZStrEnc(zstrwl, &zcutf, 0) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - - ZStrNormalize(zstrwl); - if (ZStrSTAppend(stex, zstrwl) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - } - - // sort them - ZStrSTSort(stex); - - // set current length of word = 0 - ixlen = 0; - - // for each word in the STEX - nowords = stex->cnt; - wpt = (uint16_t*) stex->list; - for (wdx = 0; wdx < nowords; wdx++) { - // get it out as a word - if (ZStrInsert(zstrwl, wpt, 2) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - - len = 0; - while (1) { - letters[len] = ZStrDec(zstrwl, &zcutf); - if (letters[len] == 0) { - break; - } - len++; - } - - wpt += ZStrExtLen(wpt, 2); - // find out where it first differs from previous one - for (j = 0; j < ixlen; j++) { - if (letters[j] != ixlet[j]) { - break; - } - } - - // for every new letter in the word, get its K-key into array - while (j < len) { - // obtain the translation of the letter - tran = ZStrXlate(&zcutf, letters[j]); - // get the Z-string for the index-2 entry before this letter - i = ZStrTuberRead(ix->_index2, kkey[j], zstr2a); - if (i == 1) { - res = TRI_ERROR_INTERNAL; - goto oom; - } - - x64 = ZStrBitsOut(zstr2a, 1); - if (x64 == 1) { - // skip over the B-key into index 3 - docb = ZStrDec(zstr2a, &zcbky); - } - // look to see if the letter is there - ZStrCxClear(&zcdelt, &ctx2a); - newlet = 0; - while (1) { - oldlet = newlet; - newlet = ZStrCxDec(zstr2a, &zcdelt, &ctx2a); - if (newlet == oldlet) { - break; - } - - bkey = ZStrDec(zstr2a, &zcbky); - if (newlet >= tran) { - break; - } - } - - if (newlet != tran) { - // if not there, create a new index-2 entry for it - bkey = ZStrTuberIns(ix->_index2, kkey[j], tran); - if (bkey == INSFAIL) { - res = TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE; - goto oom; - } - kkey[j + 1] = ZStrTuberK(ix->_index2, kkey[j], tran, bkey); - // update old index-2 entry to insert new letter - ZStrCxClear(&zcdelt, &ctx2a); - ZStrCxClear(&zcdelt, &ctx2b); - i = ZStrTuberRead(ix->_index2, kkey[j], zstr2a); - if (i == 1) { - res = TRI_ERROR_INTERNAL; - goto oom; - } - ZStrClear(zstr2b); - x64 = ZStrBitsOut(zstr2a, 1); - if (ZStrBitsIn(x64, 1, zstr2b) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - if (x64 == 1) { - // copy over the B-key into index 3 - docb = ZStrDec(zstr2a, &zcbky); - if (ZStrEnc(zstr2b, &zcbky, docb) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - } - - newlet = 0; - while (1) { - oldlet = newlet; - newlet = ZStrCxDec(zstr2a, &zcdelt, &ctx2a); - if (newlet == oldlet || newlet > tran) { - break; - } - if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - x64 = ZStrDec(zstr2a, &zcbky); - if (ZStrEnc(zstr2b, &zcbky, x64) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - } - if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - - if (ZStrEnc(zstr2b, &zcbky, bkey) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - if (newlet == oldlet) { - if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - } - else { - while (newlet != oldlet) { - oldlet = newlet; - if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - x64 = ZStrDec(zstr2a, &zcbky); - if (ZStrEnc(zstr2b, &zcbky, x64) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - newlet = ZStrCxDec(zstr2a, &zcdelt, &ctx2a); - } - if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - } - ZStrNormalize(zstr2b); - res2 = ZStrTuberUpdate(ix->_index2, kkey[j], zstr2b); - if (res2 != 0) { - res = TranslateZStrErrorCode(res2); - goto oom; - } - } - else { - // if it is, get its KKey and put in (next) slot - kkey[j + 1] = ZStrTuberK(ix->_index2, kkey[j], tran, bkey); - } - j++; - } - - // kkey[j] is kkey of whole word. - // so read the zstr from index2 - i = ZStrTuberRead(ix->_index2, kkey[j], zstr2a); - if (i == 1) { - res = TRI_ERROR_INTERNAL; - goto oom; - } - // is there already an index-3 entry available? - x64 = ZStrBitsOut(zstr2a, 1); - // If so, get its b-key - if(x64 == 1) { - docb = ZStrDec(zstr2a, &zcbky); - } - else { - docb = ZStrTuberIns(ix->_index3, kkey[j], 0); - if (docb == INSFAIL) { - res = TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE; - goto oom; - } - // put it into index 2 - ZStrCxClear(&zcdelt, &ctx2a); - ZStrCxClear(&zcdelt, &ctx2b); - i = ZStrTuberRead(ix->_index2, kkey[j], zstr2a); - if (i == 1) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - ZStrClear(zstr2b); - x64 = ZStrBitsOut(zstr2a, 1); - if (ZStrBitsIn(1, 1, zstr2b) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - - if (ZStrEnc(zstr2b, &zcbky, docb) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - - newlet = 0; - while (1) { - oldlet = newlet; - newlet = ZStrCxDec(zstr2a, &zcdelt, &ctx2a); - if (newlet == oldlet) { - break; - } - - if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - x64 = ZStrDec(zstr2a, &zcbky); - if (ZStrEnc(zstr2b,&zcbky, x64) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - } - ZStrNormalize(zstr2b); - res2 = ZStrTuberUpdate(ix->_index2, kkey[j], zstr2b); - if (res2 != 0) { - res = TranslateZStrErrorCode(res2); - goto oom; - } - } - dock = ZStrTuberK(ix->_index3, kkey[j], 0, docb); - // insert doc handle into index 3 - i = ZStrTuberRead(ix->_index3, dock, x3zstr); - ZStrClear(x3zstrb); - if (i == 1) { - res = TRI_ERROR_INTERNAL; - goto oom; - } - - ZStrCxClear(&zcdoc, &x3ctx); - ZStrCxClear(&zcdoc, &x3ctxb); - newhan = 0; - while (1) { - oldhan = newhan; - newhan = ZStrCxDec(x3zstr, &zcdoc, &x3ctx); - if (newhan == oldhan || newhan > handle) { - break; - } - - if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, newhan) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - } - if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, handle) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - if (newhan == oldhan) { - if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, handle) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - } - else { - if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, newhan) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - while (newhan != oldhan) { - oldhan = newhan; - newhan = ZStrCxDec(x3zstr, &zcdoc, &x3ctx); - if (ZStrCxEnc(x3zstrb, &zcdoc, &x3ctxb, newhan) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - } - } - ZStrNormalize(x3zstrb); - res2 = ZStrTuberUpdate(ix->_index3, dock, x3zstrb); - if (res2 != 0) { - res = TranslateZStrErrorCode(res2); - goto oom; - } - - // copy the word into ix - ixlen = len; - for (j = 0; j < len; j++) { - ixlet[j] = letters[j]; - } - - if (ix->_options == FTS_INDEX_SUBSTRINGS) { - int j1, j2; - - for (j1 = 0; j1 < len; j1++) { - kkey1[j1 + 1] = kroot1; - for (j2 = j1; j2 >= 0; j2--) { - tran = ZStrXlate(&zcutf, ixlet[j2]); - i = ZStrTuberRead(ix->_index1, kkey1[j2 + 1], zstr2a); - if (i == 1) { - res = TRI_ERROR_INTERNAL; - goto oom; - } - // look to see if the letter is there - ZStrCxClear(&zcdelt, &ctx2a); - newlet = 0; - while (1) { - oldlet = newlet; - newlet = ZStrCxDec(zstr2a, &zcdelt, &ctx2a); - if (newlet == oldlet) { - break; - } - bkey = ZStrDec(zstr2a, &zcbky); - if (newlet >= tran) { - break; - } - } - if (newlet != tran) { - // if not there, create a new index-1 entry for it - bkey = ZStrTuberIns(ix->_index1, kkey1[j2 + 1], tran); - if (bkey == INSFAIL) { - res = TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE; - goto oom; - } - kkey1[j2] = ZStrTuberK(ix->_index1, kkey1[j2 + 1], tran, bkey); - // update old index-1 entry to insert new letter - ZStrCxClear(&zcdelt, &ctx2a); - ZStrCxClear(&zcdelt, &ctx2b); - i = ZStrTuberRead(ix->_index1, kkey1[j2 + 1], zstr2a); - if (i == 1) { - res = TRI_ERROR_INTERNAL; - goto oom; - } - ZStrClear(zstr2b); - newlet = 0; - while (1) { - oldlet = newlet; - newlet = ZStrCxDec(zstr2a, &zcdelt, &ctx2a); - if (newlet == oldlet || newlet > tran) { - break; - } - if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - x64 = ZStrDec(zstr2a, &zcbky); - if (ZStrEnc(zstr2b, &zcbky, x64) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - } - if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - if (ZStrEnc(zstr2b, &zcbky, bkey) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - if (newlet == oldlet) { - if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, tran) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - } - else { - while (newlet != oldlet) { - oldlet = newlet; - if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - x64 = ZStrDec(zstr2a, &zcbky); - if (ZStrEnc(zstr2b, &zcbky, x64) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - newlet = ZStrCxDec(zstr2a, &zcdelt, &ctx2a); - } - if (ZStrCxEnc(zstr2b, &zcdelt, &ctx2b, newlet) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - } - ZStrNormalize(zstr2b); - res2 = ZStrTuberUpdate(ix->_index1, kkey1[j2 + 1], zstr2b); - if (res2 != 0) { - res = TranslateZStrErrorCode(res2); - goto oom; - } - } - else { - kkey1[j2] = ZStrTuberK(ix->_index1, kkey1[j2 + 1], tran, bkey); - } - } - } - } - } - - ix->_numDocuments++; - - // insert the handle - ix->_firstFree = ix->_handles[handle]; - ix->_handles[handle] = docid; - ix->_handlesFree[handle] = 0; - -oom: - ZStrSTDest(stex); - - if (zstrwl != NULL) { - ZStrDest(zstrwl); - } - if (zstr2a != NULL) { - ZStrDest(zstr2a); - } - if (zstr2b != NULL) { - ZStrDest(zstr2b); - } - if (x3zstr != NULL) { - ZStrDest(x3zstr); - } - if (x3zstrb != NULL) { - ZStrDest(x3zstrb); - } - - return res; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief delete a document from the index -//////////////////////////////////////////////////////////////////////////////// - -static int RealDeleteDocument (FTS_index_t* ftx, FTS_document_id_t docid) { - FTS_real_index* ix; - FTS_document_id_t i; - - ix = (FTS_real_index*) ftx; - for (i = 1; i <= ix->_lastSlot; i++) { - if (ix->_handlesFree[i] == 1) { - continue; - } - - if (ix->_handles[i] == docid) { - break; - } - } - - if (i > ix->_lastSlot) { - LOG_ERROR("fail on %d", __LINE__); - return TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE; - } - - ix->_handlesFree[i] = 1; - if (ix->_numDocuments > 0) { - // should never underflow - ix->_numDocuments--; - } - - ix->_numDeletions++; - - return TRI_ERROR_NO_ERROR; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief find a key - prefix or substring matching -//////////////////////////////////////////////////////////////////////////////// - -static uint64_t FindKKey1 (FTS_real_index* ix, uint64_t* word) { - ZSTR* zstr; - CTX ctx; - uint64_t* wd; - uint64_t bkey, kk1; - - zstr = ZStrCons(10); - if (zstr == NULL) { - // actually an out-of-memory error would be more appropriate here - return NOTFOUND; - } - - wd = word; - while (*wd != 0) { - wd++; - } - - kk1 = ZStrTuberK(ix->_index2, 0, 0, 0); - - while (1) { - uint64_t tran; - uint64_t newlet; - - if (wd == word) { - break; - } - - tran = *(--wd); - // get the Z-string for the index-1 entry of this key - if (ZStrTuberRead(ix->_index1, kk1, zstr) == 1) { - kk1 = NOTFOUND; - break; - } - - ZStrCxClear(&zcdelt, &ctx); - newlet = 0; - while (1) { - uint64_t oldlet; - - oldlet = newlet; - newlet = ZStrCxDec(zstr, &zcdelt, &ctx); - if (newlet == oldlet) { - kk1 = NOTFOUND; - break; - } - - bkey = ZStrDec(zstr, &zcbky); - if (newlet > tran) { - kk1 = NOTFOUND; - break; - } - if (newlet == tran) { - break; - } - } - - if (kk1 == NOTFOUND) { - break; - } - - kk1 = ZStrTuberK(ix->_index1, kk1, tran, bkey); - } - - ZStrDest(zstr); - return kk1; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief find a key - complete matching -//////////////////////////////////////////////////////////////////////////////// - -static uint64_t FindKKey2 (FTS_real_index* ix, uint64_t* word) { - ZSTR* zstr; - CTX ctx; - uint64_t kk2; - - zstr = ZStrCons(10); - if (zstr == NULL) { - // actually an out-of-memory error would be more appropriate here - return NOTFOUND; - } - - kk2 = ZStrTuberK(ix->_index2, 0, 0, 0); - - while (1) { - uint64_t tran; - uint64_t newlet; - uint64_t bkey; - - tran = *(word++); - if (tran == 0) { - break; - } - // get the Z-string for the index-2 entry of this key - if (ZStrTuberRead(ix->_index2, kk2, zstr) == 1) { - kk2 = NOTFOUND; - break; - } - - if (ZStrBitsOut(zstr, 1) == 1) { - uint64_t docb; - - // skip over the B-key into index 3 - docb = ZStrDec(zstr, &zcbky); - // silly use of docb to get rid of compiler warning - if (docb == 0xffffff) { - // actually some "internal error" code would be more appropriate here - ZStrDest(zstr); - return NOTFOUND; - } - } - ZStrCxClear(&zcdelt, &ctx); - - newlet = 0; - while (1) { - uint64_t oldlet; - - oldlet = newlet; - newlet = ZStrCxDec(zstr, &zcdelt, &ctx); - if (newlet == oldlet) { - kk2 = NOTFOUND; - break; - } - - bkey = ZStrDec(zstr, &zcbky); - if (newlet > tran) { - kk2 = NOTFOUND; - break; - } - if (newlet == tran) { - break; - } - } - - if (kk2 == NOTFOUND) { - break; - } - - kk2 = ZStrTuberK(ix->_index2, kk2, tran, bkey); - } - - ZStrDest(zstr); - return kk2; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief index recursion, complete matching -/// for each query term, update zstra2 to only contain handles matching that -/// also recursive index 2 handles kk2 to dochan STEX using zcdh -//////////////////////////////////////////////////////////////////////////////// - -static int Ix2Recurs (STEX* dochan, FTS_real_index* ix, uint64_t kk2) { - ZSTR* zstr2; - ZSTR* zstr3; - ZSTR* zstr; - CTX ctx2, ctx3; - uint64_t newlet; - int res; - - // index 2 entry for this prefix - zstr2 = ZStrCons(10); - if (zstr2 == NULL) { - return TRI_ERROR_OUT_OF_MEMORY; - } - - // index 3 entry for this prefix (if any) - zstr3 = ZStrCons(10); - if (zstr3 == NULL) { - ZStrDest(zstr2); - return TRI_ERROR_OUT_OF_MEMORY; - } - - // single doc handle work area - zstr = ZStrCons(2); - if (zstr == NULL) { - ZStrDest(zstr3); - ZStrDest(zstr2); - return TRI_ERROR_OUT_OF_MEMORY; - } - - if (ZStrTuberRead(ix->_index2, kk2, zstr2) == 1) { - ZStrDest(zstr); - ZStrDest(zstr3); - ZStrDest(zstr2); - return TRI_ERROR_INTERNAL; - } - - res = TRI_ERROR_NO_ERROR; - - if (ZStrBitsOut(zstr2, 1) == 1) { - // process the documents into the STEX - // uses zcdh not LastEnc because it must sort into - // numerical order - uint64_t docb; - uint64_t dock; - uint64_t newhan; - int i; - - docb = ZStrDec(zstr2, &zcbky); - dock = ZStrTuberK(ix->_index3, kk2, 0, docb); - i = ZStrTuberRead(ix->_index3, dock, zstr3); - if (i == 1) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - ZStrCxClear(&zcdoc, &ctx3); - - newhan = 0; - while (1) { - uint64_t oldhan; - - oldhan = newhan; - newhan = ZStrCxDec(zstr3, &zcdoc, &ctx3); - if (newhan == oldhan) { - break; - } - - if (ix->_handlesFree[newhan] == 0) { - ZStrClear(zstr); - if (ZStrEnc(zstr, &zcdh, newhan) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - if (ZStrSTAppend(dochan, zstr) != 0) { - res = TRI_ERROR_OUT_OF_MEMORY; - goto oom; - } - } - } - } - ZStrCxClear(&zcdelt, &ctx2); - - newlet = 0; - while (1) { - uint64_t oldlet; - uint64_t newkk2; - uint64_t bkey; - - oldlet = newlet; - newlet = ZStrCxDec(zstr2, &zcdelt, &ctx2); - if (newlet == oldlet) { - break; - } - - bkey = ZStrDec(zstr2, &zcbky); - newkk2 = ZStrTuberK(ix->_index2, kk2, newlet, bkey); - res = Ix2Recurs(dochan, ix, newkk2); - if (res != TRI_ERROR_NO_ERROR) { - break; - } - } - -oom: - ZStrDest(zstr2); - ZStrDest(zstr3); - ZStrDest(zstr); - - return res; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief index recursion, prefix matching -//////////////////////////////////////////////////////////////////////////////// - -static int Ix1Recurs (STEX* dochan, - FTS_real_index* ix, - uint64_t kk1, - uint64_t* wd) { - ZSTR* zstr; - CTX ctx; - uint64_t newlet; - uint64_t kk2; - int res; - - res = TRI_ERROR_NO_ERROR; - - kk2 = FindKKey2(ix,wd); - - if (kk2 != NOTFOUND) { - res = Ix2Recurs(dochan, ix, kk2); - if (res != TRI_ERROR_NO_ERROR) { - return res; - } - } - - // index 1 entry for this prefix - zstr = ZStrCons(10); - if (zstr == NULL) { - return TRI_ERROR_OUT_OF_MEMORY; - } - - if (ZStrTuberRead(ix->_index1, kk1, zstr) == 1) { - return TRI_ERROR_INTERNAL; - } - - ZStrCxClear(&zcdelt, &ctx); - newlet = 0; - - while (1) { - uint64_t oldlet; - uint64_t bkey; - uint64_t newkk1; - - oldlet = newlet; - newlet = ZStrCxDec(zstr, &zcdelt, &ctx); - if (newlet == oldlet) { - break; - } - bkey = ZStrDec(zstr, &zcbky); - newkk1 = ZStrTuberK(ix->_index1, kk1, newlet, bkey); - *(wd - 1) = newlet; - - res = Ix1Recurs(dochan, ix, newkk1, wd - 1); - if (res != TRI_ERROR_NO_ERROR) { - return res; - } - } - - ZStrDest(zstr); - - return res; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief read a unicode word into a buffer of uint64_ts -//////////////////////////////////////////////////////////////////////////////// - -static void FillWordBuffer (uint64_t* target, const uint8_t* source) { - uint8_t* current; - int i; - - current = (uint8_t*) source; - i = 0; - while (1) { - uint64_t unicode = GetUnicode(¤t); - - target[i++] = ZStrXlate(&zcutf, unicode); - if (unicode == 0 || i > MAX_WORD_LENGTH) { - break; - } - } - target[i] = 0; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief add the found documents to the result -//////////////////////////////////////////////////////////////////////////////// - -static void AddResultDocuments (FTS_document_ids_t* result, - FTS_real_index* ftx, - ZSTR* zstr, - CTX* ctx) { - uint64_t newHandle; - uint64_t numDocs; - - newHandle = 0; - numDocs = 0; - - while (1) { - uint64_t oldHandle; - - oldHandle = newHandle; - newHandle = ZStrCxDec(zstr, &zcdoc, ctx); - if (newHandle == oldHandle) { - break; - } - if (ftx->_handlesFree[newHandle] == 0) { - result->_docs[numDocs++] = ftx->_handles[newHandle]; - } - } - result->_len = numDocs; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @} -//////////////////////////////////////////////////////////////////////////////// - -// ----------------------------------------------------------------------------- -// --SECTION-- public functions -// ----------------------------------------------------------------------------- - -//////////////////////////////////////////////////////////////////////////////// -/// @addtogroup Fulltext -/// @{ -//////////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////////// -/// @brief determine the health of the index -/// the health will be returned as an integer with range 0..100 -/// 0 means the index is 0% full and 100 means the index is 100% full -/// values above 60 should trigger an index resize elsewhere -/// the stats array will be populated with appropriate index sizes when the -/// index is going to be resized -//////////////////////////////////////////////////////////////////////////////// - -int FTS_HealthIndex (FTS_index_t* ftx, uint64_t* stats) { - FTS_real_index* ix; - uint64_t st[2]; - uint64_t health; - - ix = (FTS_real_index*) ftx; - - health = (ix->_numDocuments * 100) / ix->_maxDocuments; - - if (ix->_options == FTS_INDEX_SUBSTRINGS) { - ZStrTuberStats(ix->_index1, st); - stats[1] = st[1]; - if (health < st[0]) { - health = st[0]; - } - } - else { - stats[1] = 0; - } - - ZStrTuberStats(ix->_index2, st); - stats[2] = st[1]; - if (health < st[0]) { - health = st[0]; - } - - ZStrTuberStats(ix->_index3, st); - stats[3] = st[1]; - if (health < st[0]) { - health = st[0]; - } - - stats[0] = (health * (ix->_numDocuments + 5)) / 50; - if (stats[0] < (ix->_numDocuments + 5)) { - stats[0] = (ix->_numDocuments + 5); - } - - if (EXTRA_GROWTH_FACTOR > 1.0) { - size_t i; - - for (i = 0; i < 4; ++i) { - stats[i] = (uint64_t) ((double) stats[i] * (double) EXTRA_GROWTH_FACTOR); - } - } - - return (int) health; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief clone an existing index -/// this will copy the properties of the old index, but will take different -/// sizes. This function is called when the index is resized -/// It will also copy the documents from the old index into the new one -//////////////////////////////////////////////////////////////////////////////// - -FTS_index_t* FTS_CloneIndex (FTS_index_t* ftx, - FTS_document_id_t excludeDocument, - uint64_t sizes[4]) { - FTS_real_index* old; - FTS_index_t* clone; - - old = (FTS_real_index*) ftx; - - // create new index - clone = FTS_CreateIndex(old->_context, old->getTexts, old->freeWordlist, old->_options, sizes); - if (clone != NULL) { - // copy documents - FTS_document_id_t i; - uint64_t count = 0; - - for (i = 1; i <= old->_lastSlot; i++) { - FTS_document_id_t found; - int res; - - if (old->_handlesFree[i] == 1) { - // document is marked as deleted - continue; - } - - found = old->_handles[i]; - if (found == excludeDocument) { - // do not insert this document, because the caller will insert it later - continue; - } - - res = FTS_AddDocument(clone, found); - if (res != TRI_ERROR_NO_ERROR && res != TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) { - // if resize fails, everything's ruined - LOG_ERROR("resizing the fulltext index failed with %d, sizes were: %llu %llu %llu %llu", - res, - (unsigned long long) sizes[0], - (unsigned long long) sizes[1], - (unsigned long long) sizes[2], - (unsigned long long) sizes[3]); - - FTS_FreeIndex(clone); - return NULL; - } - - ++count; - } - - LOG_DEBUG("cloned %llu documents", (unsigned long long) count); - } - - return clone; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief create a new fulltext index -/// -/// sizes[0] = size of handles table to start with -/// sizes[1] = number of bytes for index 1 -/// sizes[2] = number of bytes for index 2 -/// sizes[3] = number of bytes for index 3 -//////////////////////////////////////////////////////////////////////////////// - -FTS_index_t* FTS_CreateIndex (void* context, - FTS_texts_t* (*getTexts)(FTS_document_id_t, void*), - void (*freeWordlist)(FTS_texts_t*), - int options, - uint64_t sizes[4]) { - FTS_real_index* ix; - uint64_t i; - - LOG_TRACE("creating fulltext index with sizes %llu %llu %llu %llu", - (unsigned long long) sizes[0], - (unsigned long long) sizes[1], - (unsigned long long) sizes[2], - (unsigned long long) sizes[3]); - - ix = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(FTS_real_index), false); - if (ix == NULL) { - return NULL; - } - - ix->_handles = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, (sizes[0] + 2) * sizeof(FTS_document_id_t), false); - if (ix->_handles == NULL) { - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix); - return NULL; - } - - ix->_handlesFree = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, (sizes[0] + 2) * sizeof(uint8_t), false); - if (ix->_handlesFree == NULL) { - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix->_handles); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix); - return NULL; - } - - ix->_maxDocuments = sizes[0]; - ix->_numDocuments = 0; - ix->_numDeletions = 0; - ix->_context = context; - ix->_options = options; - ix->_ix3KKey = 0; - - // wordlists retrieval function - ix->getTexts = getTexts; - // free function for wordlists - ix->freeWordlist = freeWordlist; - - // set up free chain of document handles - for (i = 1; i < sizes[0]; i++) { - ix->_handles[i] = i + 1; - ix->_handlesFree[i] = 1; - } - - // end of free chain - ix->_handles[sizes[0]] = 0; - ix->_handlesFree[sizes[0]] = 1; - ix->_firstFree = 1; - ix->_lastSlot = sizes[0]; - - // create index 2 - // --------------------------------------------------- - - ix->_index2 = ZStrTuberCons(sizes[2], TUBER_BITS_8); - if (ix->_index2 == NULL) { - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix->_handlesFree); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix->_handles); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix); - - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - return NULL; - } - - if (ZStrTuberIns(ix->_index2, 0, 0) != 0) { - ZStrTuberDest(ix->_index2); - - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix->_handlesFree); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix->_handles); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix); - - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - return NULL; - } - - // create index 3 - // --------------------------------------------------- - - ix->_index3 = ZStrTuberCons(sizes[3], TUBER_BITS_64); - if (ix->_index3 == NULL) { - ZStrTuberDest(ix->_index2); - - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix->_handlesFree); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix->_handles); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix); - - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - return NULL; - } - - // create index 1 - // --------------------------------------------------- - - if (ix->_options == FTS_INDEX_SUBSTRINGS) { - ix->_index1 = ZStrTuberCons(sizes[1], TUBER_BITS_8); - if (ix->_index1 == NULL) { - ZStrTuberDest(ix->_index3); - ZStrTuberDest(ix->_index2); - - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix->_handlesFree); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix->_handles); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix); - - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - return NULL; - } - - if (ZStrTuberIns(ix->_index1, 0, 0) != 0) { - ZStrTuberDest(ix->_index1); - ZStrTuberDest(ix->_index3); - ZStrTuberDest(ix->_index2); - - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix->_handlesFree); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix->_handles); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix); - - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - return NULL; - } - } - - return (FTS_index_t*) ix; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief free an existing fulltext index -//////////////////////////////////////////////////////////////////////////////// - -void FTS_FreeIndex (FTS_index_t* ftx) { - FTS_real_index* ix; - - ix = (FTS_real_index*) ftx; - - if (ix->_options == FTS_INDEX_SUBSTRINGS) { - ZStrTuberDest(ix->_index1); - } - - ZStrTuberDest(ix->_index2); - ZStrTuberDest(ix->_index3); - - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix->_handlesFree); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix->_handles); - TRI_Free(TRI_UNKNOWN_MEM_ZONE, ix); -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief add a document to the index -/// the caller must have write-locked the index -//////////////////////////////////////////////////////////////////////////////// - -int FTS_AddDocument (FTS_index_t* ftx, FTS_document_id_t docid) { - FTS_real_index* ix; - FTS_texts_t* rawwords; - uint64_t sizes[4]; - int health; - int res; - - ix = (FTS_real_index*) ftx; - - // get the actual words from the caller - rawwords = ix->getTexts(docid, ix->_context); - if (rawwords == NULL || rawwords->_len == 0) { - // document does not contain words - return TRI_ERROR_NO_ERROR; - } - - res = RealAddDocument(ftx, docid, rawwords); - - health = FTS_HealthIndex(ftx, sizes); - if (health > HEALTH_THRESHOLD || res == TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE) { - LOG_TRACE("fulltext index health threshold exceeded. new suggested sizes are: %llu %llu %llu %llu", - (unsigned long long) sizes[0], - (unsigned long long) sizes[1], - (unsigned long long) sizes[2], - (unsigned long long) sizes[3]); - res = TRI_ERROR_ARANGO_INDEX_NEEDS_RESIZE; - } - - ix->freeWordlist(rawwords); - - return res; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief delete a document from the index -/// the caller must have write-locked the index -//////////////////////////////////////////////////////////////////////////////// - -int FTS_DeleteDocument (FTS_index_t* ftx, FTS_document_id_t docid) { - int res; - - res = RealDeleteDocument(ftx, docid); - - return res; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief update an existing document in the index -/// the caller must have write-locked the index -//////////////////////////////////////////////////////////////////////////////// - -int FTS_UpdateDocument (FTS_index_t* ftx, FTS_document_id_t docid) { - FTS_real_index* ix; - FTS_texts_t* rawwords; - int res; - - ix = (FTS_real_index*) ftx; - - // get the actual words from the caller - rawwords = ix->getTexts(docid, ix->_context); - if (rawwords == NULL || rawwords->_len == 0) { - // document does not contain words - return TRI_ERROR_NO_ERROR; - } - - RealDeleteDocument(ftx, docid); - res = RealAddDocument(ftx, docid, rawwords); - - ix->freeWordlist(rawwords); - - return res; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief whether or not the index should be cleaned up -//////////////////////////////////////////////////////////////////////////////// - -bool FTS_ShouldCleanupIndex (FTS_index_t* ftx) { - FTS_real_index* ix; - - ix = (FTS_real_index*) ftx; - - return (ix->_numDeletions > FTS_CLEANUP_THRESHOLD); -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief Incremental scan and cleanup routine, called from a background task -/// This reads index3 and removes handles of unused documents. Will stop after -/// stop after scanning document/word pair scans. -/// The caller must have write-locked the index -/// -/// The function may return the following values: -/// 0 = cleanup done, but not finished -/// 1 = out of memory -/// 2 = index needs a resize -/// 3 = cleanup finished -//////////////////////////////////////////////////////////////////////////////// - -int FTS_BackgroundTask (FTS_index_t* ftx, int docs) { - FTS_real_index* ix; - int dleft, i; - CTX cold; - CTX cnew; - uint64_t newterm; - uint64_t oldhan; - uint64_t han; - ZSTR* zold; - ZSTR* znew; - int result; - - znew = ZStrCons(100); - if (znew == NULL) { - return 1; - } - - zold = ZStrCons(100); - if (zold == NULL) { - ZStrDest(znew); - return 1; - } - - dleft = docs; - result = 0; - ix = (FTS_real_index*) ftx; - - while (dleft > 0) { - uint64_t numDeletions; - - assert(ix->_ix3KKey < (ix->_index3)->kmax); - - numDeletions = 0; - i = ZStrTuberRead(ix->_index3, ix->_ix3KKey, zold); - if (i == 2) { - result = 1; - break; - } - - if (i == 0) { - ZStrCxClear(&zcdoc, &cold); - ZStrCxClear(&zcdoc, &cnew); - ZStrClear(znew); - oldhan = 0; - newterm =0; - while (1) { - han = ZStrCxDec(zold, &zcdoc, &cold); - if (han == oldhan) { - break; - } - - oldhan = han; - dleft--; - - if (ix->_handlesFree[han] == 0) { - i = ZStrCxEnc(znew, &zcdoc, &cnew, han); - if (i != 0) { - ix->_ix3KKey = 0; - ZStrDest(znew); - ZStrDest(zold); - return 1; - } - newterm = han; - } - else { - // something was deleted - ++numDeletions; - } - } - - if (numDeletions > 0) { - // update existing entry in tuber - // but only if there's something to update - - i = ZStrCxEnc(znew, &zcdoc, &cnew, newterm); - if (i != 0) { - ix->_ix3KKey = 0; - ZStrDest(znew); - ZStrDest(zold); - return 1; - } - - if (ix->_numDeletions >= numDeletions) { - ix->_numDeletions -= numDeletions; - } - - ZStrNormalize(znew); - i = ZStrTuberUpdate(ix->_index3, ix->_ix3KKey, znew); - } - - if (i != 0) { - ix->_ix3KKey = 0; - ZStrDest(znew); - ZStrDest(zold); - return i; - } - } - - // next - ix->_ix3KKey++; - - if (ix->_ix3KKey >= (ix->_index3)->kmax) { - ix->_ix3KKey = 0; - result = 3; // finished iterating over all document handles - break; - } - } - - ZStrDest(znew); - ZStrDest(zold); - - return result; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief perform a search in the index -/// The caller must have read-locked the index -//////////////////////////////////////////////////////////////////////////////// - -FTS_document_ids_t* FTS_FindDocuments (FTS_index_t* ftx, - FTS_query_t* query) { - FTS_document_ids_t* dc; - FTS_real_index* ix; - ZSTR* zstr2; - ZSTR* zstr3; - ZSTR* zstra1; - ZSTR* zstra2; - ZSTR* ztemp; - ZSTR* zstr; - CTX ctxa1; - CTX ctxa2; - CTX ctx3; - size_t queryterm; - uint64_t word[2 * (MAX_WORD_LENGTH + SPACING)]; - uint64_t ndocs = 0; - - // initialise - dc = NULL; - TRI_set_errno(TRI_ERROR_NO_ERROR); - - zstr2 = ZStrCons(10); /* from index-2 tuber */ - if (zstr2 == NULL) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - return NULL; - } - - zstr3 = ZStrCons(10); /* from index-3 tuber */ - if (zstr3 == NULL) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - ZStrDest(zstr2); - return NULL; - } - - zstra1 = ZStrCons(10); /* current list of documents */ - if (zstra1 == NULL) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - ZStrDest(zstr3); - ZStrDest(zstr2); - return NULL; - } - - zstra2 = ZStrCons(10); /* new list of documents */ - if (zstra2 == NULL) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - ZStrDest(zstra1); - ZStrDest(zstr3); - ZStrDest(zstr2); - return NULL; - } - - zstr = ZStrCons(4); /* work zstr from stex */ - if (zstr == NULL) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - ZStrDest(zstra2); - ZStrDest(zstra1); - ZStrDest(zstr3); - ZStrDest(zstr2); - return NULL; - } - - ix = (FTS_real_index*) ftx; - - // for each term in the query - for (queryterm = 0; queryterm < query->_len; queryterm++) { - if (query->_localOptions[queryterm] == FTS_MATCH_SUBSTRING && - ix->_options != FTS_INDEX_SUBSTRINGS) { - // substring search but index does not contain substrings - ZStrDest(zstra1); - ZStrDest(zstra2); - ZStrDest(zstr); - ZStrDest(zstr2); - ZStrDest(zstr3); - return NULL; - } - -/* Depending on the query type, the objective is do */ -/* populate or "and" zstra1 with the sorted list */ -/* of document handles that match that term */ -/* TBD - what to do if it is not a legal option? */ -/* TBD combine this with other options - no need to use zstring */ - ndocs = 0; - - if (query->_localOptions[queryterm] == FTS_MATCH_COMPLETE) { - uint64_t docb; - uint64_t dock; - uint64_t kkey; - uint64_t lasthan; - - FillWordBuffer(&word[0], query->_texts[queryterm]); - - kkey = FindKKey2(ix, word); - if (kkey == NOTFOUND) { - break; - } - - ZStrTuberRead(ix->_index2, kkey, zstr2); - if (ZStrBitsOut(zstr2, 1) != 1) { - break; - } - - docb = ZStrDec(zstr2, &zcbky); - dock = ZStrTuberK(ix->_index3, kkey, 0, docb); - if (ZStrTuberRead(ix->_index3, dock, zstr3) == 1) { - printf("Kkey not in ix3 - we're terrified\n"); - } - - ZStrCxClear(&zcdoc, &ctx3); - ZStrCxClear(&zcdoc, &ctxa2); - ZStrClear(zstra2); - lasthan = 0; - - if (queryterm == 0) { - uint64_t newhan = 0; - - while (1) { - uint64_t oldhan; - - oldhan = newhan; - newhan = ZStrCxDec(zstr3, &zcdoc, &ctx3); - if (newhan == oldhan) { - break; - } - if (ix->_handlesFree[newhan] == 0) { - if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - goto oom; - } - lasthan = newhan; - ndocs++; - } - } - } - else { - uint64_t nhand1; - uint64_t ohand1; - uint64_t oldhan; - uint64_t newhan; - - ZStrCxClear(&zcdoc, &ctxa1); - ohand1 = 0; - nhand1 = ZStrCxDec(zstra1, &zcdoc, &ctxa1); - oldhan = 0; - newhan = ZStrCxDec(zstr3, &zcdoc, &ctx3); - // zstra1 = zstra1 & zstra2 - while (1) { - if (nhand1 == ohand1) { - break; - } - if (oldhan == newhan) { - break; - } - if (newhan == nhand1) { - if (ix->_handlesFree[newhan] == 0) { - if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - goto oom; - } - lasthan = newhan; - ndocs++; - } - oldhan = newhan; - newhan = ZStrCxDec(zstr3, &zcdoc, &ctx3); - ohand1 = nhand1; - nhand1 = ZStrCxDec(zstra1, &zcdoc, &ctxa1); - } - else if (newhan > nhand1) { - ohand1 = nhand1; - nhand1 = ZStrCxDec(zstra1, &zcdoc, &ctxa1); - } - else { - oldhan = newhan; - newhan = ZStrCxDec(zstr3, &zcdoc, &ctx3); - } - } - } - - if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, lasthan) != 0) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - goto oom; - } - - ZStrNormalize(zstra2); - ztemp = zstra1; - zstra1 = zstra2; - zstra2 = ztemp; - } /* end of match-complete code */ - else if ((query->_localOptions[queryterm] == FTS_MATCH_PREFIX) || - (query->_localOptions[queryterm] == FTS_MATCH_SUBSTRING)) { - uint16_t* docpt; - STEX* dochan; - uint64_t odocs; - uint64_t lasthan; - - // make STEX to contain new list of handles - dochan = ZStrSTCons(2); - if (dochan == NULL) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - goto oom; - } - - FillWordBuffer(&word[MAX_WORD_LENGTH + SPACING], query->_texts[queryterm]); - - if (query->_localOptions[queryterm] == FTS_MATCH_PREFIX) { - // prefix matching - uint64_t kkey; - - kkey = FindKKey2(ix, word + MAX_WORD_LENGTH + SPACING); - if (kkey == NOTFOUND) { - ZStrSTDest(dochan); - break; - } - - // call routine to recursively put handles to STEX - if (Ix2Recurs(dochan, ix, kkey) != TRI_ERROR_NO_ERROR) { - ZStrSTDest(dochan); - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - goto oom; - } - } - else if (query->_localOptions[queryterm] == FTS_MATCH_SUBSTRING) { - // substring matching - uint64_t kkey; - - kkey = FindKKey1(ix, word + MAX_WORD_LENGTH + SPACING); - if (kkey == NOTFOUND) { - ZStrSTDest(dochan); - break; - } - // call routine to recursively put handles to STEX - if (Ix1Recurs(dochan, ix, kkey, word + MAX_WORD_LENGTH + SPACING) != TRI_ERROR_NO_ERROR) { - ZStrSTDest(dochan); - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - goto oom; - } - } - - ZStrSTSort(dochan); - - odocs = dochan->cnt; - docpt = dochan->list; - ZStrCxClear(&zcdoc, &ctxa2); - ZStrClear(zstra2); - lasthan = 0; - - if (queryterm == 0) { - uint64_t i; - - for (i = 0; i < odocs; i++) { - uint64_t newhan; - - if (ZStrInsert(zstr, docpt, 2) != 0) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - ZStrSTDest(dochan); - goto oom; - } - newhan = ZStrDec(zstr, &zcdh); - docpt += ZStrExtLen(docpt, 2); - if (ix->_handlesFree[newhan] == 0) { - if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - ZStrSTDest(dochan); - goto oom; - } - lasthan = newhan; - ndocs++; - } - } - } - else { - // merge prefix stex with zstra1 - uint64_t newhan; - uint64_t nhand1; - uint64_t ohand1; - - ZStrCxClear(&zcdoc, &ctxa1); - if (odocs == 0) { - ZStrSTDest(dochan); - continue; - } - - nhand1 = ZStrCxDec(zstra1, &zcdoc, &ctxa1); - if (ZStrInsert(zstr, docpt, 2) != 0) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - ZStrSTDest(dochan); - goto oom; - } - newhan = ZStrDec(zstr, &zcdh); - docpt += ZStrExtLen(docpt, 2); - odocs--; - ohand1 = 0; - - // zstra1 = zstra1 & zstra2 - while (1) { - if (nhand1 == ohand1) { - break; - } - if (newhan == nhand1) { - if (ix->_handlesFree[newhan] == 0) { - if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, newhan) != 0) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - ZStrSTDest(dochan); - goto oom; - } - - lasthan = newhan; - ndocs++; - } - if (odocs == 0) { - break; - } - if (ZStrInsert(zstr, docpt, 2) != 0) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - ZStrSTDest(dochan); - goto oom; - } - - newhan = ZStrDec(zstr, &zcdh); - docpt += ZStrExtLen(docpt, 2); - odocs--; - ohand1 = nhand1; - nhand1 = ZStrCxDec(zstra1, &zcdoc, &ctxa1); - } - else if (newhan > nhand1) { - ohand1 = nhand1; - nhand1 = ZStrCxDec(zstra1, &zcdoc, &ctxa1); - } - else { - if (odocs == 0) { - break; - } - if (ZStrInsert(zstr, docpt, 2) != 0) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - ZStrSTDest(dochan); - goto oom; - } - newhan = ZStrDec(zstr, &zcdh); - docpt += ZStrExtLen(docpt, 2); - odocs--; - } - } - } - if (ZStrCxEnc(zstra2, &zcdoc, &ctxa2, lasthan) != 0) { - TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); - goto oom; - } - ZStrNormalize(zstra2); - ztemp = zstra1; - zstra1 = zstra2; - zstra2 = ztemp; - ZStrSTDest(dochan); - } /* end of match-prefix code */ - } - - - // prepare the result set - dc = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(FTS_document_ids_t), false); - if (dc == NULL) { - // out of memory - } - else { - // init result set - dc->_len = 0; - dc->_docs = NULL; - - if (ndocs > 0) { - // we found some results - dc->_docs = TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, ndocs * sizeof(FTS_document_id_t), false); - if (dc->_docs != NULL) { - ZStrCxClear(&zcdoc, &ctxa1); - AddResultDocuments(dc, ix, zstra1, &ctxa1); - } - else { - // this will trigger an out of memory error at the call size - TRI_Free(TRI_UNKNOWN_MEM_ZONE, dc); - dc = NULL; - } - } - } - -oom: - - ZStrDest(zstra1); - ZStrDest(zstra2); - ZStrDest(zstr); - ZStrDest(zstr2); - ZStrDest(zstr3); - - return dc; -} - -//////////////////////////////////////////////////////////////////////////////// -/// @brief free results of a search -//////////////////////////////////////////////////////////////////////////////// - -void FTS_Free_Documents (FTS_document_ids_t* doclist) { - if (doclist->_docs != NULL) { - TRI_Free(TRI_UNKNOWN_MEM_ZONE, doclist->_docs); - } - - TRI_Free(TRI_UNKNOWN_MEM_ZONE, doclist); -} - -//////////////////////////////////////////////////////////////////////////////// -/// @} -//////////////////////////////////////////////////////////////////////////////// - -#if 0 -int xxlet[100]; -void index2dump(FTS_real_index * ix, uint64_t kkey, int lev) -{ - CTX ctx, dctx,x3ctx; - ZSTR *zstr, *x3zstr; - int i,temp,md; - uint64_t x64,oldlet,newlet,bkey,newkkey; - uint64_t docb,dock,han,oldhan; - zstr=ZStrCons(30); - x3zstr=ZStrCons(35); - ZStrCxClear(&zcutf,&ctx); - ZStrCxClear(&zcdelt,&dctx); - ZStrCxClear(&zcdoc,&x3ctx); - for(i=1;i_index2,kkey,zstr); - temp=kkey; - if(i!=0) - { - printf("cannot read kkey = %d from TUBER\n",temp); - return; - } - md=ZStrBitsOut(zstr,1); - temp=kkey; - printf("...kkey %d ",temp); - temp=md; - printf("Md=%d ",temp); - temp=zstr->dat[0]; - printf(" zstr %x",temp); - if(md==1) - { - docb=ZStrCxDec(zstr,&zcbky,&ctx); - temp=docb; - printf(" doc-b = %d",temp); - dock=ZStrTuberK(ix->_index3,kkey,0,docb); - temp=dock; - printf(" doc-k = %d",temp); - } - oldlet=0; - - while(1) - { - newlet=ZStrCxDec(zstr,&zcdelt,&dctx); - if(newlet==oldlet) break; - bkey=ZStrCxDec(zstr,&zcbky,&ctx); - x64=ZStrUnXl(&zcutf,newlet); - temp=x64; - if(temp<128) - printf(" %c",temp); - else - printf(" %x",temp); - temp=bkey; - printf(" %d",temp); - oldlet=newlet; - } - if(md==1) - { - printf("\n --- Docs ---"); - i=ZStrTuberRead(ix->_index3,dock,x3zstr); - oldhan=0; - while(1) - { - han=ZStrCxDec(x3zstr,&zcdoc,&x3ctx); - if(han==oldhan) break; - temp=han; - printf("h= %d ",temp); - temp=ix->_handles[han]; - printf("id= %d; ",temp); - oldhan=han; - } - } - printf("\n"); - i=ZStrTuberRead(ix->_index2,kkey,zstr); - x64=ZStrBitsOut(zstr,1); - if(x64==1) - bkey=ZStrCxDec(zstr,&zcbky,&ctx); - oldlet=0; - ZStrCxClear(&zcdelt,&dctx); - while(1) - { - newlet=ZStrCxDec(zstr,&zcdelt,&dctx); - if(newlet==oldlet) return; - bkey=ZStrCxDec(zstr,&zcbky,&ctx); - newkkey=ZStrTuberK(ix->_index2,kkey,newlet,bkey); - xxlet[lev]=ZStrUnXl(&zcutf,newlet); - index2dump(ix,newkkey,lev+1); - oldlet=newlet; - } -} - -void indexd(FTS_index_t * ftx) -{ - FTS_real_index * ix; - int i; - uint64_t kroot; -int temp; - ix = (FTS_real_index *)ftx; - printf("\n\nDump of Index\n"); -temp=ix->_firstFree; - printf("Free-chain starts at handle %d\n",temp); - printf("======= First ten handles======\n"); - for(i=1;i<11;i++) - { -temp=ix->_handles[i]; - printf("Handle %d is docid %d\n", i,temp); - } - printf("======= Index 2 ===============\n"); - kroot=ZStrTuberK(ix->_index2,0,0,0); - index2dump(ix,kroot,1); -} -#endif - -// Local Variables: -// mode: outline-minor -// outline-regexp: "^\\(/// @brief\\|/// {@inheritDoc}\\|/// @addtogroup\\|// --SECTION--\\|/// @\\}\\)" -// End: diff --git a/arangod/FulltextIndex2/fulltext-index.h b/arangod/FulltextIndex2/fulltext-index.h deleted file mode 100644 index a2259b83b8..0000000000 --- a/arangod/FulltextIndex2/fulltext-index.h +++ /dev/null @@ -1,214 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -/// @brief full text search -/// -/// @file -/// -/// DISCLAIMER -/// -/// Copyright 2010-2011 triagens GmbH, Cologne, Germany -/// -/// Licensed under the Apache License, Version 2.0 (the "License"); -/// you may not use this file except in compliance with the License. -/// You may obtain a copy of the License at -/// -/// http://www.apache.org/licenses/LICENSE-2.0 -/// -/// Unless required by applicable law or agreed to in writing, software -/// distributed under the License is distributed on an "AS IS" BASIS, -/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -/// See the License for the specific language governing permissions and -/// limitations under the License. -/// -/// Copyright holder is triAGENS GmbH, Cologne, Germany -/// -/// @author R. A. Parker -/// @author Copyright 2012, triagens GmbH, Cologne, Germany -//////////////////////////////////////////////////////////////////////////////// - -#ifndef TRIAGENS_FULLTEXT_FTS_INDEX_H -#define TRIAGENS_FULLTEXT_FTS_INDEX_H 1 - -#include "BasicsC/common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// ----------------------------------------------------------------------------- -// --SECTION-- public types -// ----------------------------------------------------------------------------- - -//////////////////////////////////////////////////////////////////////////////// -/// @addtogroup Fulltext -/// @{ -//////////////////////////////////////////////////////////////////////////////// - -typedef struct FTS_index_s { -} -FTS_index_t; - -//////////////////////////////////////////////////////////////////////////////// -/// @brief number of document word pairs to scan in cleanup procedure -//////////////////////////////////////////////////////////////////////////////// - -#define FTS_CLEANUP_SCAN_AMOUNT 100000 - -//////////////////////////////////////////////////////////////////////////////// -/// @brief only perform cleanup if at least this many documents have been -/// deleted from the index -//////////////////////////////////////////////////////////////////////////////// - -#define FTS_CLEANUP_THRESHOLD 100 - -//////////////////////////////////////////////////////////////////////////////// -/// @brief index options -//////////////////////////////////////////////////////////////////////////////// - -#define FTS_INDEX_SUBSTRINGS 1 - -//////////////////////////////////////////////////////////////////////////////// -/// @brief type of documents indexed -//////////////////////////////////////////////////////////////////////////////// - -typedef uint64_t FTS_document_id_t; - -//////////////////////////////////////////////////////////////////////////////// -/// @brief fulltext query specification -//////////////////////////////////////////////////////////////////////////////// - -typedef struct { - size_t _len; // number of words in the query - uint8_t** _texts; // the actual words we are looking for - int* _localOptions; // options for the words we are looking for -} -FTS_query_t; - -//////////////////////////////////////////////////////////////////////////////// -/// @brief query options -//////////////////////////////////////////////////////////////////////////////// - -#define FTS_MATCH_COMPLETE 1 -#define FTS_MATCH_PREFIX 2 -#define FTS_MATCH_SUBSTRING 4 - -//////////////////////////////////////////////////////////////////////////////// -/// @brief query result list -//////////////////////////////////////////////////////////////////////////////// - -typedef struct { - size_t _len; - FTS_document_id_t* _docs; -} -FTS_document_ids_t; - -//////////////////////////////////////////////////////////////////////////////// -/// @brief type used to hold words for a document (passed by ArangoDB to the -/// index) -//////////////////////////////////////////////////////////////////////////////// - -typedef struct { - size_t _len; - uint8_t** _texts; -} -FTS_texts_t; - -//////////////////////////////////////////////////////////////////////////////// -/// @} -//////////////////////////////////////////////////////////////////////////////// - -// ----------------------------------------------------------------------------- -// --SECTION-- public functions -// ----------------------------------------------------------------------------- - -//////////////////////////////////////////////////////////////////////////////// -/// @addtogroup Fulltext -/// @{ -//////////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////////// -/// @brief determine the health of the index -//////////////////////////////////////////////////////////////////////////////// - -int FTS_HealthIndex (FTS_index_t*, uint64_t[4]); - -//////////////////////////////////////////////////////////////////////////////// -/// @brief whether or not the index should be cleaned up -//////////////////////////////////////////////////////////////////////////////// - -bool FTS_ShouldCleanupIndex (FTS_index_t*); - -//////////////////////////////////////////////////////////////////////////////// -/// @brief clone an existing index -//////////////////////////////////////////////////////////////////////////////// - -FTS_index_t* FTS_CloneIndex (FTS_index_t*, - FTS_document_id_t, - uint64_t[4]); - -//////////////////////////////////////////////////////////////////////////////// -/// @brief create a new fulltext index -//////////////////////////////////////////////////////////////////////////////// - -FTS_index_t* FTS_CreateIndex (void*, - FTS_texts_t* (*)(FTS_document_id_t, void*), - void (*)(FTS_texts_t*), - int, - uint64_t[4]); - -//////////////////////////////////////////////////////////////////////////////// -/// @brief free an existing fulltext index -//////////////////////////////////////////////////////////////////////////////// - -void FTS_FreeIndex (FTS_index_t*); - -//////////////////////////////////////////////////////////////////////////////// -/// @brief add a document to the index -//////////////////////////////////////////////////////////////////////////////// - -int FTS_AddDocument (FTS_index_t*, FTS_document_id_t); - -//////////////////////////////////////////////////////////////////////////////// -/// @brief delete a document from the index -//////////////////////////////////////////////////////////////////////////////// - -int FTS_DeleteDocument (FTS_index_t*, FTS_document_id_t); - -//////////////////////////////////////////////////////////////////////////////// -/// @brief update an existing document in the index -//////////////////////////////////////////////////////////////////////////////// - -int FTS_UpdateDocument (FTS_index_t*, FTS_document_id_t); - -//////////////////////////////////////////////////////////////////////////////// -/// @brief read index3 and remove handles of unused documents. stop after docs -/// deletions. the bc struct can be used to do an incremental scan & cleanup -//////////////////////////////////////////////////////////////////////////////// - -int FTS_BackgroundTask (FTS_index_t* ftx, int docs); - -//////////////////////////////////////////////////////////////////////////////// -/// @brief perform a search in the index -//////////////////////////////////////////////////////////////////////////////// - -FTS_document_ids_t* FTS_FindDocuments (FTS_index_t*, FTS_query_t*); - -//////////////////////////////////////////////////////////////////////////////// -/// @brief free results of a search -//////////////////////////////////////////////////////////////////////////////// - -void FTS_Free_Documents (FTS_document_ids_t*); - -//////////////////////////////////////////////////////////////////////////////// -/// @} -//////////////////////////////////////////////////////////////////////////////// - -#ifdef __cplusplus -} -#endif - -#endif - -// Local Variables: -// mode: outline-minor -// outline-regexp: "^\\(/// @brief\\|/// {@inheritDoc}\\|/// @addtogroup\\|// --SECTION--\\|/// @\\}\\)" -// End: diff --git a/arangod/FulltextIndex2/zcode.c b/arangod/FulltextIndex2/zcode.c deleted file mode 100644 index 34ca6ffb98..0000000000 --- a/arangod/FulltextIndex2/zcode.c +++ /dev/null @@ -1,99 +0,0 @@ -/* zcode.c - the Z-string code and hash module */ -/* R. A. Parker 13.11.2012 */ - -#include -#include -#include - -#include "FulltextIndex/zstr.h" - -/* zcutf code for storing letters in words */ -uint64_t zcutfX[]={0,1,2,3,4,5,6,7,8,9,10,12,16,24,88,65624}; -uint64_t zcutfC[]={0x0,0x8,0x4,0xC,0x2,0x6,0xA,0xE, - 0x1,0x3,0xA,0x1C,0x48,0x2C0,0xD0000,0xF00000000}; -uint8_t zcutfL[]={4,4,4,4,4,4,4,4,4,4,5,6,7,10,20,36}; -uint8_t zcutfS[]={0,8,4,9,2,10,5,11,1,12,6,13,3,14,7,15}; -uint8_t zcutfTX[]={0x00,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B, - 0x3C,0x3D,0x3E,0x3F,0x40,0x41,0x42,0x43, - 0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B, - 0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53, - 0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B, - 0x5C,0x5D,0x5E,0x5F,0x60,0x61,0x62,0x63, - 0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B, - 0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0x73, - 0x74,0x1D,0x2E,0x27,0x24,0x1B,0x29,0x2F, - 0x22,0x1F,0x32,0x30,0x25,0x28,0x20,0x1E, - 0x2C,0x31,0x23,0x21,0x1C,0x26,0x2D,0x2A, - 0x33,0x2B,0x34,0x75,0x76,0x77,0x78,0x79, - 0x7A,0x03,0x14,0x0D,0x0A,0x01,0x0F,0x15, - 0x08,0x05,0x18,0x16,0x0B,0x0E,0x06,0x04, - 0x12,0x17,0x09,0x07,0x02,0x0C,0x13,0x10, - 0x19,0x11,0x1A,0x7B,0x7C,0x7D,0x7E,0x7F}; -uint8_t zcutfUX[]={0x00,0x65,0x74,0x61,0x6F,0x69,0x6E,0x73, - 0x68,0x72,0x64,0x6C,0x75,0x63,0x6D,0x66, - 0x77,0x79,0x70,0x76,0x62,0x67,0x6B,0x71, - 0x6A,0x78,0x7A,0x45,0x54,0x41,0x4F,0x49, - 0x4E,0x53,0x48,0x52,0x44,0x4C,0x55,0x43, - 0x4D,0x46,0x57,0x59,0x50,0x56,0x42,0x47, - 0x4B,0x51,0x4A,0x58,0x5A,0x01,0x02,0x03, - 0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B, - 0x0C,0x0D,0x0E,0x0F,0x10,0x11,0x12,0x13, - 0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B, - 0x1C,0x1D,0x1E,0x1F,0x20,0x21,0x22,0x23, - 0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B, - 0x2C,0x2D,0x2E,0x2F,0x30,0x31,0x32,0x33, - 0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B, - 0x3C,0x3D,0x3E,0x3F,0x40,0x5B,0x5C,0x5D, - 0x5E,0x5F,0x60,0x7B,0x7C,0x7D,0x7E,0x7F}; -ZCOD zcutf = {2,16,127,4,zcutfX,zcutfC,zcutfL,zcutfS,zcutfTX,zcutfUX}; - -/* zcbky code for storing a B_KEY */ -uint64_t zcbkyX[]={0,1,2,3,4,20,276}; -uint64_t zcbkyC[]={0,4,6,10,0xB0,0xE00,0xF0000}; -uint8_t zcbkyL[]={1,3,3,4,8,12,20}; -uint8_t zcbkyS[]={0,0,0,0, 0,0,0,0, 1,1,3,4, 2,2,5,6}; -ZCOD zcbky = {1,7,0,4,zcbkyX,zcbkyC,zcbkyL,zcbkyS,NULL,NULL}; - -/* zcdelt code for storing UTF-8 deltas */ -uint64_t zcdeltX[]={0,1,2,3,4,5,6,7,8,10,14,22,86,65622}; -uint64_t zcdeltC[]={0,0x4,0x4,0xC,0x2,0x6,0xA,0xE,0x6,0x14, - 0x38,0x2C0, 0xD0000,0xF00000000}; -uint8_t zcdeltL[]={3,3,4,4,4,4,4,4,5,6,7,10,20,36}; -uint8_t zcdeltS[]={0,0,4,8,2,9,5,10,1,1,6,11,3,12,7,13}; -ZCOD zcdelt = {3,14,0,4,zcdeltX,zcdeltC,zcdeltL,zcdeltS,NULL,NULL}; - -/* zcdoc code for storing document handle deltas */ -uint64_t zcdocX[]={0,1,3,11,43,171,1195,1049771}; -uint64_t zcdocC[]={0,0x8,0x10,0xC0,0x80,0xC00,0x500000,0x38000000000}; -uint8_t zcdocL[]={3,4,6,8,10,13,23,42}; -uint8_t zcdocS[]={0,4,2,5,1,6,3,7}; -ZCOD zcdoc = {3,8,0,3,zcdocX,zcdocC,zcdocL,zcdocS,NULL,NULL}; - -/* zckk code for storing direct K-KEY values */ -uint64_t zckkX[]={0,65536,1114112,17891328}; -uint64_t zckkC[]={0,0x200000,0x1000000,0x18000000000}; -uint8_t zckkL[]={18,22,26,41}; -uint8_t zckkS[]={0,2,1,3}; -ZCOD zckk = {1,4,0,2,zckkX,zckkC,zckkL,zckkS,NULL,NULL}; - -/* zcdh code for putting doc handles into a stex */ -uint64_t zcdhX[]={0,8192,134225920}; -uint64_t zcdhC[]={0,0x10000000,0xC0000000000}; -uint8_t zcdhL[]={14,29,44}; -uint8_t zcdhS[]={0,0,1,2}; -ZCOD zcdh = {1,4,0,2,zcdhX,zcdhC,zcdhL,zcdhS,NULL,NULL}; - -uint64_t ZStrTuberK(TUBER * t, uint64_t d1, - uint64_t d2, uint64_t keyb) -{ - uint64_t keya; - if(d2<3) keya= (d1+5*d2) % t->kmax; - else keya = ( d1*(d1+d2) + 2*d2*d2 ) % t->kmax; - if(keyb==0) return keya; - if(keyb==1) return (keya+19)%t->kmax; - if(keyb==2) return (keya+43)%t->kmax; - if(keyb<47) return ((keya+3)*keyb)%t->kmax; - return ZStrTuberK(t,ZStrTuberK(t,d1,d2,keyb%47),0,keyb/47); -} - -/* end of zcode.c */ diff --git a/arangod/FulltextIndex2/zstr-include.h b/arangod/FulltextIndex2/zstr-include.h deleted file mode 100644 index 8650651476..0000000000 --- a/arangod/FulltextIndex2/zstr-include.h +++ /dev/null @@ -1,48 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -/// @brief include wrapper for original zstr.h file -/// -/// @file -/// -/// DISCLAIMER -/// -/// Copyright 2010-2011 triagens GmbH, Cologne, Germany -/// -/// Licensed under the Apache License, Version 2.0 (the "License"); -/// you may not use this file except in compliance with the License. -/// You may obtain a copy of the License at -/// -/// http://www.apache.org/licenses/LICENSE-2.0 -/// -/// Unless required by applicable law or agreed to in writing, software -/// distributed under the License is distributed on an "AS IS" BASIS, -/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -/// See the License for the specific language governing permissions and -/// limitations under the License. -/// -/// Copyright holder is triAGENS GmbH, Cologne, Germany -/// -/// @author Jan Steemann -/// @author Copyright 2012, triagens GmbH, Cologne, Germany -//////////////////////////////////////////////////////////////////////////////// - -#ifndef TRIAGENS_FULLTEXT_ZSTR_H -#define TRIAGENS_FULLTEXT_ZSTR_H 1 - -#include "BasicsC/common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "zstr.h" - -#ifdef __cplusplus -} -#endif - -#endif - -// Local Variables: -// mode: outline-minor -// outline-regexp: "^\\(/// @brief\\|/// {@inheritDoc}\\|/// @addtogroup\\|// --SECTION--\\|/// @\\}\\)" -// End: diff --git a/arangod/FulltextIndex2/zstr.c b/arangod/FulltextIndex2/zstr.c deleted file mode 100644 index 63aa356753..0000000000 --- a/arangod/FulltextIndex2/zstr.c +++ /dev/null @@ -1,1281 +0,0 @@ -/* zstr.c - the Z-string module */ -/* R. A. Parker 3.12.2012 */ -/* bugfixed in merge - adjtop call added */ -/* bugfixed in tuber - wraparound */ -/* bugfix shift of 64 not happening */ -/* bugfix peeking after last word */ -/* bugfix use firstix not z->firstix */ -/* bugfix page turn */ - -#include -#include -#include -#include "zstr.h" - -ZSTR * ZStrCons(int elts) -{ - ZSTR * z; - z=malloc(sizeof(ZSTR)); - if(z==NULL) return NULL; - z->dat=malloc(elts*sizeof(uint64_t)); - if(z->dat==NULL) - { - free(z); - return NULL; - } - z->dlen=malloc(elts*sizeof(long)); - if(z->dlen==NULL) - { - free(z->dat); - free(z); - return NULL; - } - z->alloc=elts; - z->firstix=0; - z->lastix=0; - z->dat[0]=0; - z->dlen[0]=0; - return z; -} - -void ZStrDest(ZSTR * z) -{ - free(z->dat); - free(z->dlen); - free(z); -} - -void ZStrClear(ZSTR * z) -{ - z->firstix=0; - z->lastix=0; - z->dat[0]=0; - z->dlen[0]=0; -} - -int ZStrBitsIn(uint64_t a, long bits, ZSTR * z) -{ - long clen; - void * ptr; - clen=z->dlen[z->lastix]; - if(clen+bits <= 64) - { - z->dat[z->lastix]=(z->dat[z->lastix]<dlen[z->lastix]=clen+bits; - } - else - { - if(z->lastix+1 >= z->alloc) - { - z->alloc=(z->alloc + z->alloc/4 + 2); - ptr=realloc(z->dat,z->alloc*sizeof(uint64_t)); - if(ptr==NULL) return 1; - z->dat=ptr; - ptr=realloc(z->dlen,z->alloc*sizeof(long)); - if(ptr==NULL) return 1; - z->dlen=ptr; - } - z->lastix++; - z->dat[z->lastix]=a; - z->dlen[z->lastix]=bits; - } - return 0; -} - -uint64_t ZStrBitsOut(ZSTR * z, long bits) -{ - uint64_t s,t; - long slen,wlen; - s=0; - slen=0; - while( slen+z->dlen[z->firstix] <= bits) - { - s<<=z->dlen[z->firstix]; - s+=z->dat[z->firstix]; - slen+=z->dlen[z->firstix]; - if(z->firstix==z->lastix) - { - z->dlen[z->firstix]=0; - z->dat[z->firstix]=0; - return s<<(bits-slen); - } - z->firstix++; - } - wlen=bits-slen; - if(wlen==0) return s; - s<<=wlen; - t=z->dat[z->firstix]>>(z->dlen[z->firstix]-wlen); - s+=t; - z->dat[z->firstix]^=(t<<(z->dlen[z->firstix]-wlen)); - z->dlen[z->firstix]-=wlen; - return s; -} - -uint64_t ZStrBitsPeek(ZSTR * z, long bits) -{ - uint64_t s; - int firstix; - long slen,wlen; - s=0; - slen=0; - firstix=z->firstix; - while( slen+z->dlen[firstix] <= bits) - { - s<<=z->dlen[firstix]; - s+=z->dat[firstix]; - slen+=z->dlen[firstix]; -/* bugfix peeking after last word */ - if(firstix==z->lastix) - return s<<(bits-slen); - firstix++; - } - wlen=bits-slen; - if(wlen==0) return s; - s<<=wlen; -/* bugfix use firstix not z->firstix */ - s+=z->dat[firstix]>>(z->dlen[firstix]-wlen); - return s; -} - -long ZStrLen(ZSTR * z) -{ - long tot; - int i; - tot=0; - for(i=z->firstix;i<=z->lastix;i++) tot+=z->dlen[i]; - return tot; -} - -void ZStrNormalize(ZSTR * z) -{ - while(z->lastix>z->firstix) - { - if(z->dat[z->lastix]!=0) break; - z->lastix--; - } - if(z->dat[z->lastix]==0) - { - z->dlen[z->lastix]=0; - return; - } - while( (z->dat[z->lastix]&1)==0 ) - { - z->dat[z->lastix]>>=1; - z->dlen[z->lastix]--; - } -} - -int ZStrEnc(ZSTR * z, ZCOD * zc, uint64_t a) -{ - int seg; - switch (zc->t) - { - case 1: - for(seg=1;seg<=zc->s;seg++) - if(aX[seg]) break; - seg--; - return ZStrBitsIn(a-zc->X[seg]+zc->C[seg],zc->L[seg],z); - case 2: - if(a<=zc->tmax) a=zc->TX[a]; - for(seg=1;seg<=zc->s;seg++) - if(aX[seg]) break; - seg--; - return ZStrBitsIn(a-zc->X[seg]+zc->C[seg],zc->L[seg],z); - default: - printf("invalid ZCOD type %d\n",zc->t); - exit(16); - } -} - -uint64_t ZStrDec(ZSTR * z, ZCOD * zc) -{ - int seg; - uint64_t topbit,s; - switch (zc->t) - { - case 1: - topbit=ZStrBitsPeek(z,zc->bits); - seg=zc->SG[topbit]; - s=ZStrBitsOut(z,zc->L[seg]); - return (s-zc->C[seg])+zc->X[seg]; - case 2: - topbit=ZStrBitsPeek(z,zc->bits); - seg=zc->SG[topbit]; - s=ZStrBitsOut(z,zc->L[seg]); - s = (s-zc->C[seg])+zc->X[seg]; - if(s<=zc->tmax) s=zc->UX[s]; - return s; - default: - printf("invalid ZCOD type %d\n",zc->t); - exit(18); - } -} - -uint64_t ZStrXlate(ZCOD * zc, uint64_t a) -{ - if(a<=zc->tmax) return zc->TX[a]; - return a; -} - -uint64_t ZStrUnXl(ZCOD * zc, uint64_t a) -{ - if(a<=zc->tmax) return zc->UX[a]; - return a; -} - -int ZStrLastEnc(ZSTR * z, uint64_t a) -{ - uint64_t b; - long len; - if(a==0) return 0; - b=a; - len=1; - while(b>1) - { - len++; - b>>=1; - } - a-=b<<(len-1); - return ZStrBitsIn(1+(a<<1),len,z); -} - -uint64_t ZStrLastDec(ZSTR * z) -{ - long len; - uint64_t num,x; - len=ZStrLen(z); - - if(len==0) num=0; - else - { - num=ZStrBitsOut(z,len); - x=1; - x<<=len; - num+=x; - } - return (num>>1); -} - -void ZStrCxClear(ZCOD * zc, CTX * ctx) -{ - ctx->x1=0; -} - -int ZStrCxEnc(ZSTR * z, ZCOD * zc, CTX * ctx, uint64_t a) -{ - int seg; - uint64_t b; - switch (zc->t) - { - case 1: - case 2: - return ZStrEnc(z,zc,a); - case 3: - b=a-ctx->x1; - ctx->x1=a; - for(seg=1;seg<=zc->s;seg++) - if(bX[seg]) break; - seg--; - return ZStrBitsIn(b-zc->X[seg]+zc->C[seg],zc->L[seg],z); - default: - printf("invalid ZCOD type %d\n",zc->t); - exit(17); - } -} - -uint64_t ZStrCxDec(ZSTR * z, ZCOD * zc, CTX * ctx) -{ - int seg; - uint64_t topbit,s; - switch (zc->t) - { - case 1: - case 2: - return ZStrDec(z,zc); - case 3: - topbit=ZStrBitsPeek(z,zc->bits); - seg=zc->SG[topbit]; - s=ZStrBitsOut(z,zc->L[seg]); - s = (s-zc->C[seg])+zc->X[seg]; - ctx->x1+=s; - return ctx->x1; - default: - printf("invalid ZCOD type %d\n",zc->t); - exit(17); - } -} - - -int ZStrMaxLen(ZSTR * z, int fmt) -{ - uint64_t x; - if(fmt==2) x=15; - else - { - printf("unknown format %d in ZStrMaxLen\n",fmt); - exit(33); - } - return 1+(ZStrLen(z)/x); -} - -int ZStrExtract(ZSTR * z, void * x, int fmt) -{ - uint16_t * x2; - uint64_t s; - int len; - int words; - words=1; - if(fmt==2) - { - x2=(uint16_t *)x; - ZStrNormalize(z); - len=ZStrLen(z); - while(len>14) - { - words++; - s=ZStrBitsPeek(z,15); - if( (s&1)==1 ) - { - s=ZStrBitsOut(z,15); - *(x2++)=1+(s<<1); - len-=15; - } - else - { - s=ZStrBitsOut(z,16); - *(x2++)=1+s; -/* next line looks unsafe, but all non-zero z-strings have */ -/* their last bit 1, so if length is 15, previous case applies */ - len-=16; - } - } - s=ZStrBitsOut(z,14); - *x2 = s<<2; - return words; - } - printf("Format %d not known in ZStrExtract\n",fmt); - return 0; -} - -int ZStrInsert(ZSTR * z, void * x, int fmt) -{ - uint16_t * x2; - uint64_t s; - int r; - if(fmt==2) - { - x2=(uint16_t *)x; - ZStrClear(z); - while(1) - { - s=*(x2++); - if( (s&3)==0 ) - { - r=ZStrBitsIn(s>>2,14,z); - if(r!=0) return r; - ZStrNormalize(z); - return 0; - } - if( (s&3)==3 ) - r=ZStrBitsIn(s>>1,15,z); - else - r=ZStrBitsIn(s-1,16,z); - if(r!=0) return r; - } - } - return 1; -} - -int ZStrExtLen(void * x, int fmt) -{ - uint16_t * w; - int len; - w = (uint16_t *) x; - len=1; - while(((*(w++))&3)!=0) len++; - return len; -} - -STEX * ZStrSTCons(int fmt) -{ - STEX * st; - int i; - st=malloc(sizeof(STEX)); - if(st==NULL) return NULL; - st->pst=malloc(1281*sizeof(uint16_t *)); - if(st->pst==NULL) - { - free(st); - return NULL; - } - st->ptp=malloc(1281*sizeof(uint16_t *)); - if(st->ptp==NULL) - { - free(st->pst); - free(st); - return NULL; - } - st->mal=malloc(1281*sizeof(uint64_t)); - if(st->mal==NULL) - { - free(st->ptp); - free(st->pst); - free(st); - return NULL; - } - st->stcnt=malloc(1281*sizeof(uint64_t)); - if(st->stcnt==NULL) - { - free(st->mal); - free(st->ptp); - free(st->pst); - free(st); - return NULL; - } - for(i=0;i<1281;i++) - st->mal[i]=0; - for(i=0;i<6;i++) st->inuse[i]=0; - st->listm=0; - return st; -} - -void ZStrSTDest(STEX * st) -{ - int i; - for(i=0;i<1281;i++) - if(st->mal[i]!=0) free(st->pst[i]); - if(st->listm!=0) free(st->list); - free(st->pst); - free(st->ptp); - free(st->mal); - free(st->stcnt); - free(st); -} - -int ZStrExtCompare(void * a, void * b, int fmt) -{ - uint16_t *a1, *b1; - a1=(uint16_t *) a; - b1=(uint16_t *) b; - while(1) - { - if((*a1) < (*b1)) return -1; - if((*a1) > (*b1)) return 1; - if(((*a1)&3)==0) - { - if(((*b1)&3)==0) - return 0; - return -1; - } - if(((*b1)&3)==0) - return 1; - a1++; - b1++; - } -} - -typedef struct -{ - STEX * st; - uint16_t pq[256]; - uint16_t ch[128]; -} SICH; -#define DEBUG - -#ifdef DEBUG - -void dumpheap(SICH * si) -{ - STEX * st; - int i,dat,ch; - st=si->st; - for(i=1;i<=50;i++) - { - dat=0xABCD; - if(i<128) ch=si->ch[i]; - else ch=-1; - if(si->pq[i]<1280) dat=*(st->pst[si->pq[i]]); - printf("nd %3d pq %3d ch %3d dt %x\n", - i,si->pq[i],ch,dat); - } -} - -#endif - -/* the first letter of variables are used . . . */ - -/* h int 1-255 index si (pq,ch) heap numbers */ -/* si look them up in SICH pq and you get */ - -/* s uint16_t 0-1278 index st. slot numbers */ -/* st look them up in STEX st (pst,etc) */ - -#define EXPIRED 10000 - -static void pqadvance(SICH * si, int htop) -{ - uint16_t snode; - STEX * st; - st=si->st; - snode=si->pq[htop]; - st->stcnt[snode]--; - if(st->stcnt[snode]==0) - { - si->pq[htop]=EXPIRED; - return; - } - while((*(st->ptp[snode])&3)!=0) st->ptp[snode]++; - st->ptp[snode]++; - return; -} - -static int heapcomp(SICH * si, int ha, int hb) -{ - STEX * st; - int r; - uint16_t *wa,*wb; - st=si->st; - if(si->pq[hb]==EXPIRED) return -1; - if(si->pq[ha]==EXPIRED) return 1; - wa=st->ptp[si->pq[ha]]; - wb=st->ptp[si->pq[hb]]; - r= ZStrExtCompare((void*)wa,(void*)wb,2); - return r; -} - -/* v int 0-7 index spath. level of operation */ -/* spath look them up in spath to get an h */ - -static void adjtop(SICH * si, int htop) -{ - int spath[8]; /* h = spath(v) */ - int vlev; - int hcur,hpar,hsib; /* 1-255 heap points */ - int r; - uint16_t temp; - vlev=0; - hcur=htop; - while(1) /* loop over all strings to insert */ - { -/* populate the special path */ - while(1) - { - spath[vlev]=hcur; - if(hcur>=128) break; - if( (si->pq[hcur]==EXPIRED) && (hcur!=htop) ) break; - hcur=2*hcur+si->ch[hcur]; - vlev++; - } - while(1) /* find the correct place to put hcur */ - { - if(vlev==0) return; - r = heapcomp(si,htop,hcur); - if(r!=-1) break; - vlev--; - hcur=spath[vlev]; - } - if(r==1) while(1) /* bump up */ - { - if(vlev==0) return; - hpar=spath[vlev-1]; - hsib=hcur^1; - r = heapcomp(si,htop,hsib); - if(r==0) break; - if(r==1) si->ch[hpar]^=1; - temp=si->pq[hcur]; - si->pq[hcur]=si->pq[htop]; - si->pq[htop]=temp; - vlev--; - hcur=spath[vlev]; - } - pqadvance(si, htop); - } -} - -/* Return pointer to last string <= x */ - -void * ZStrSTFind(STEX * st, void * x) -{ - uint16_t *wx, *w3, *w1, *w2; - int i; - if(st->listw==0) return NULL; /* list is empty */ - wx = (uint16_t *) x; - w1=st->list; /* very first word */ - w3=w1+st->listw-2; /* just before last word */ - i=ZStrExtCompare( (void*)w1, (void*)wx,2); - if(i>0) return NULL; /* first word bigger */ - while(w3>=w1) - { - if(((*w3)&3)==0) break; - w3--; - } - w3++; /* first word of last string */ -/* x1 and x3 point to first and last string */ - while(w1!=w3) - { - w2=w1+(w3-w1)/2; - while(w2>=w1) - { - if(((*w2)&3)==0) break; - w2--; - } - w2++; - if(w2==w1) /* no earlier start - try later */ - { - w2=w1+(w3-w1)/2; - while(w2=w3) return w1; - } - i=ZStrExtCompare( (void*)w2, (void*)wx,2); - if(i>0) w3=w2; - else w1=w2; - } - return w1; -} - -static int merge(STEX * st, int layer) -{ - uint16_t sfst,slst,snpl,ssc,i; - uint16_t *wout, *w1; - SICH si; - size_t mem; - int hcur,r; - if(st->inuse[layer]==0) return 0; - si.st=st; - sfst=256*layer; - slst=sfst+st->inuse[layer]; /* one more than last */ - snpl=256*(layer+1)+st->inuse[layer+1]; /* new place */ - hcur=1; - mem=0; - for(i=sfst;iptp[i]-st->pst[i])*sizeof(uint16_t); - st->ptp[i]=st->pst[i]; - si.pq[hcur++]=i; - } - while(hcur<256) si.pq[hcur++]=EXPIRED; - - if(mem>st->mal[snpl]) - { - if(st->mal[snpl]!=0) free(st->pst[snpl]); - st->pst[snpl]=malloc(mem); - if(st->pst[snpl]==NULL) return 1; - st->mal[snpl]=mem; - } - st->stcnt[snpl]=0; - hcur=127; - while(hcur>=1) - { - r=0; - while(r==0) - { - r=heapcomp(&si,2*hcur,2*hcur+1); - if(r!=0) break; - pqadvance(&si,2*hcur); - adjtop(&si,2*hcur); /* bugfix added */ - } - if(r==-1) si.ch[hcur]=0; - else si.ch[hcur]=1; - adjtop(&si,hcur); - hcur--; - } - - wout=st->pst[snpl]; - while(si.pq[1]!=EXPIRED) - { - ssc=si.pq[1]; - w1=st->ptp[ssc]; - while(((*w1)&3)!=0) *(wout++) =*(w1++); - *(wout++) =*(w1++); - st->ptp[ssc]=w1; - st->stcnt[ssc]--; - if(st->stcnt[ssc]==0) - si.pq[1]=EXPIRED; - adjtop(&si,1); - st->stcnt[snpl]++; - } - st->ptp[snpl]=wout; - st->inuse[layer]=0; - st->inuse[layer+1]++; - if(st->inuse[layer+1] == 255) return merge(st,layer+1); - return 0; -} - -int ZStrSTAppend(STEX * st, ZSTR * z) -{ - size_t len; - int sno; - len=ZStrMaxLen(z,2)*sizeof(uint16_t); - sno=st->inuse[0]; - if(len>st->mal[sno]) - { - if(st->mal[sno]!=0) free(st->pst[sno]); - st->pst[sno]=malloc(len); - if(st->pst[sno]==NULL) return 1; - st->mal[sno]=len; - } - len=ZStrExtract(z,st->pst[sno],2); - st->ptp[sno]=st->pst[sno]+len; - st->stcnt[sno]=1; - st->inuse[0]++; - if(st->inuse[0]>=255) return merge(st,0); - return 0; -} - -int ZStrSTSort(STEX * st) -{ - int lev,lev2,mxlev,r; - uint16_t sans; - lev=0; - mxlev=0; - while(lev<6) - { -/* check to find maximum level */ - for(lev2=0;lev2<6;lev2++) - if(st->inuse[lev2]!=0) mxlev=lev2; - if( (lev==mxlev) && (st->inuse[lev]==1) ) break; - r=merge(st,lev); - if(r!=0) return r; - lev++; - continue; - } - if(st->listm!=0) free (st->list); - if(st->inuse[lev]==0) /* nothing there at all! */ - { - st->listw=0; - st->listm=0; - return 0; - } - sans=256*lev; - st->list=st->pst[sans]; - st->listw=st->ptp[sans]-st->pst[sans]; - st->listm=st->mal[sans]; - st->cnt=st->stcnt[sans]; - st->mal[sans]=0; - return 0; -} - -TUBER * ZStrTuberCons(size_t size, int options) -{ - TUBER * t; - int i; - t=malloc(sizeof(TUBER)); - if(t==NULL) return NULL; -/* compute number of K-keys per word from options */ - i=options&7; - t->kperw=0; - if(i==1) t->kperw=8; - if(i==2) t->kperw=4; - if(i==3) t->kperw=2; - if(i==4) t->kperw=1; - if(t->kperw == 0) - { - printf("Invalid options field in ZStrTuberCons\n"); - exit(35); - } -/* compute maximum K-key from suggested size */ - t->kmax=(size*t->kperw)/8; - t->kmax++; - if( (t->kmax%2) == 0) t->kmax++; - while(1) - { - t->kmax+=2; - for(i=3;i<47;i++) - if( (t->kmax%i)==0) break; - if(i==47) break; - } - t->wct = (t->kmax+t->kperw-1)/t->kperw; - t->tiptop=t->wct*t->kperw; - t->tub = malloc(8*t->wct); - if(t->tub == NULL) - { - free(t); - return NULL; - } - for(i=0;iwct;i++) t->tub[i]=0x8000000000000000ll; - t->lenlen=3; - t->mult=8; - if(t->kperw==2) - { - t->lenlen=4; - t->mult=16; - } - if(t->kperw==1) - { - t->lenlen=5; - t->mult=32; - } - t->freekey=t->kmax; - t->freebit=(t->wct*63)-(t->kmax*(t->lenlen+1)); - t->fuses=0; - return t; -} - -void ZStrTuberDest(TUBER * t) -{ - free(t->tub); - free(t); -} - -typedef struct -{ - TUBER * tub; - uint64_t curw; /* up on tub->tub */ - long curb; /* 0-62 */ - long hdrlen; -} CuR; - -static void copycur(CuR * c1, CuR * c2) -{ - c2->tub=c1->tub; - c2->curw=c1->curw; - c2->curb=c1->curb; -} - -static uint64_t getbits(CuR * cur, long bits) -{ - uint64_t got,got1; - uint64_t one; - long newbits; - uint64_t x; - TUBER * tub; - - one=1; - tub=cur->tub; - if(bits+cur->curb < 63) - { - got=tub->tub[cur->curw]; - cur->curb+=bits; - got>>=(63-cur->curb); - } - else - { - got=tub->tub[cur->curw]; - newbits=bits+cur->curb-63; - cur->curb=newbits; - cur->curw++; - if(cur->curw>=tub->wct) cur->curw=0; - got1=tub->tub[cur->curw]; - got1<<=1; -/* bugfix shift of 64 not happening */ - if(newbits!=0) - got=(got<>(64-newbits)); - } - x = got&((one<tub; - cur->curw+=(bits/63); - cur->curb+=(bits%63); - if(cur->curb>62) - { - cur->curw++; - cur->curb-=63; - } - while(cur->curw>=t->wct) cur->curw-=t->wct; -} - -static void putbits(CuR * cur, uint64_t data, long bits) -{ - TUBER * tub; - uint64_t x1,x2; - uint64_t one; - long newbits; - tub=cur->tub; - one=1; - x2=(one<<(63-cur->curb))-one; /* mask for ~old bits */ - if(bits+cur->curb < 63) - { - x1=(one<<(63-cur->curb-bits))-one; - x2=x1^x2; /* new bits mask */ - x1=~x2; /* old bits mask */ - x1=x1&tub->tub[cur->curw]; /*old bits (inc. top one) */ - tub->tub[cur->curw]=x1+((data<<(63-cur->curb-bits))&x2); - cur->curb+=bits; - return; - } - x1=~x2; - x1=x1&tub->tub[cur->curw]; /* old bits */ - newbits=cur->curb+bits-63; - tub->tub[cur->curw]=x1+(data>>(newbits)); - cur->curw++; - if(cur->curw>=tub->wct) cur->curw=0; - cur->curb=newbits; - x1=((one<<(63-newbits))-one)|0x8000000000000000; /* keep these */ - x2=tub->tub[cur->curw]&x1; - tub->tub[cur->curw]=x2+((data<<(63-newbits))&(~x1)); - return; -} - -static long gethdr(CuR * cur) -{ - TUBER * t; - long dlen; - uint64_t h; - t=cur->tub; - h=getbits(cur,t->lenlen+1); - cur->hdrlen=t->lenlen+1; - dlen=h; - dlen-=2; - if(h<3) return dlen; - dlen=0; - while( (h>>t->lenlen)!=0 ) - { - h-=t->mult; - h=(h<<1)+getbits(cur,1); - cur->hdrlen++; - dlen+=t->mult; - } - dlen+=h; - dlen-=2; - return dlen; -} - -void ZStrTuberStats(TUBER * t, uint64_t * stats) -{ - uint64_t d1,d2; - d1=(t->fuses*100)/t->wct; - d2=(t->freebit*100)/( (t->wct*63)-(t->kmax*(t->lenlen+1)) ); - d2=100-d2; - if(d2>d1) d1=d2; - d2=(100*t->freekey)/t->kmax; - d2=100-d2; - if(d2>d1) d1=d2; - d2=((t->wct*8)*(d1+1))/50; - /* -printf("fuse %d freebit %d freekey %d kmax %d wct %d lenlen %d\n", - (int)t->fuses, (int)t->freebit, (int)t->freekey, (int)t->kmax, - (int)t->wct, (int)t->lenlen); -*/ - if(d2<72*t->fuses)d2=72*t->fuses; - stats[0]=d1; - stats[1]=d2; -} - -typedef struct -{ - TUBER * tub; - uint64_t first; - uint64_t last; - uint64_t words; -} BlK; - -/* Set cur to point to the wanted string */ -static void locate(TUBER * t, uint64_t kkey, BlK * blk, CuR * cur) -{ - uint64_t curkkey; - long dlen; -/* fill in the BlK structure with first, last and number of words */ - blk->last = blk->first = kkey/t->kperw; - blk->words=1; - blk->tub=t; - if(blk->first>0) blk->first--; - else blk->first=t->wct-1; - while( (t->tub[blk->first]>>63)==0) - { - if(blk->first>0) blk->first--; - else blk->first=t->wct-1; - blk->words++; - } - blk->first++; - if(blk->first >= t->wct) blk->first=0; - while( (t->tub[blk->last]>>63)==0) - { - blk->last++; - if(blk->last >= t->wct) blk->last=0; - blk->words++; - } -/* set the CuR structure to point to the required string */ - cur->tub=t; - cur->curw=blk->first; - cur->curb=0; - curkkey=blk->first*t->kperw; - while(curkkey!=kkey) - { - dlen=gethdr(cur); - if(dlen>0)skipbits(cur,dlen); - curkkey++; -/* bugfixed in tuber - wraparound */ - if(curkkey==t->kperw*t->wct) curkkey=0; - } -} - -/* grabs specified number of kkeys from cur*/ -/* returns number of free bits, or -1 if memory allocation occurs */ -long grabrest(CuR * cur, BlK * blk, uint64_t kkeys, ZSTR * z) -{ - uint64_t i,b; - long j,k,freeb; - int r; - TUBER * t; - CuR cur1; - - t = blk->tub; - for(i=0;i63) - { - b=getbits(cur,63); - r=ZStrBitsIn(b,63,z); - if(r!=0) return -1; - k-=63; - } - b=getbits(cur,k); - r=ZStrBitsIn(b,k,z); - if(r!=0) return -1; - while(j>63) - { - b=getbits(cur,63); - r=ZStrBitsIn(b,63,z); - if(r!=0) return -1; - j-=63; - } - if(j>0) - { - b=getbits(cur,j); - r=ZStrBitsIn(b,j,z); - if(r!=0) return -1; - }; - } -/* bugfix page turn */ - if(cur->curb==0) - { - cur->curb=63; - if(cur->curw!=0) cur->curw--; - else cur->curw = t->wct-1; - } -/* end of bugfix page turn */ - freeb=63-cur->curb; - while(cur->curw!=blk->last) - { - freeb+=63; - cur->curw++; - if(cur->curw>=t->wct) cur->curw=0; - } - return freeb; -} - -static long blkfuse(BlK * blk, CuR * cur, ZSTR * z) -{ - TUBER * t; - uint64_t kkeys; - t=blk->tub; - blk->last++; - if(blk->last >= t->wct) blk->last=0; - cur->curw=blk->last; - cur->curb=0; - blk->words++; - kkeys=t->kperw; - while( (t->tub[blk->last]>>63)==0) - { - blk->last++; - if(blk->last >= t->wct) blk->last=0; - blk->words++; - kkeys+=t->kperw; - } - return grabrest(cur,blk,kkeys,z); -} - -void movebits(ZSTR * z, long bits, CuR * cur) -{ - uint64_t j; - long bt; - bt=bits; - while(bt>60) - { - j=ZStrBitsOut(z,60); - bt-=60; - putbits(cur,j,60); - } - j=ZStrBitsOut(z,bt); - putbits(cur,j,bt); -} - - -int ZStrTuberRead(TUBER * t, uint64_t kkey, ZSTR * z) -{ - long i; - int r; - uint64_t j; - BlK blk; - CuR cur; - locate(t,kkey,&blk,&cur); - i=gethdr(&cur); - if(i==-2) return 1; - ZStrClear(z); - if(i==-1) return 0; - while(i>60) - { - j=getbits(&cur,60); - r=ZStrBitsIn(j,60,z); - if(r!=0) return 2; - i-=60; - } - if(i>0) - { - j=getbits(&cur,i); - r=ZStrBitsIn(j,i,z); - if(r!=0) return 2; - } - r=ZStrBitsIn(1,1,z); - if(r!=0) return 2; - return 0; -} - -uint64_t ZStrTuberIns(TUBER * t, uint64_t d1, uint64_t d2) -{ - BlK blk; - CuR cur,cur1; - uint64_t kkey,keyb; - int i; -/* first find a keyb that works */ - for(keyb=0;keyb<65536;keyb++) - { - kkey=ZStrTuberK(t,d1,d2,keyb); - locate(t,kkey,&blk, &cur); - copycur(&cur,&cur1); - i=gethdr(&cur); - if(i==-2) break; - } - if(keyb==65536) return INSFAIL; -/* equal size so change from key-not-found to zero */ - putbits(&cur1,1,(cur.tub)->lenlen+1); - t->freekey--; - return keyb; -} - -int ZStrTuberUpdate(TUBER * t, uint64_t kkey, ZSTR * z) -{ - BlK blk; - CuR cur; - CuR cur1; - ZSTR * z1; - long i1,i2,i3,j,k,b1,sparebits,bitlen,spb1; - int i; - uint64_t kkeys; - uint64_t w,m1,m2; - int fuseflag; - locate(t,kkey,&blk, &cur); - copycur(&cur,&cur1); - i1=gethdr(&cur1); - if(i1<0) - i1=0; - i3=i1; - i1+=cur1.hdrlen; /* current total length in tuber */ - j=ZStrLen(z); - k=j+1; - b1=0; - while(k>=(t->mult)) - { - b1++; - k-=t->mult; - } -/* so b1 is the number of 1-bits in the header */ -/* and k is the value of the remainder of the header bits */ -/* and j is the length of the z-string part (inc. last 1) */ - i2=b1+j+t->lenlen; - if(j==0) i2++; -/* so now i2 is the new length */ - if(i2==i1) /* same length case */ - { - for(i=0;ilenlen); - if(j>1)movebits(z,j-1,&cur); - return 0; - } - t->freebit-=i2; - t->freebit+=i1; - skipbits(&cur1,i3); - kkeys=((blk.last+1)*t->kperw)-1; - if(kkeys>=kkey) kkeys=kkeys-kkey; - else kkeys=t->tiptop+kkeys-kkey; - z1=ZStrCons(kkeys/t->wct+7); /* first shot */ - if(z1==NULL) return 1; - sparebits=grabrest(&cur1,&blk,kkeys,z1); - if(sparebits==-1) return 1; - fuseflag=0; - while(sparebits+i1fuses++; - spb1=blkfuse(&blk,&cur1,z1); - if(spb1==-1) return 2; - sparebits += spb1; - fuseflag=1; - if(blk.words > (t->wct/3)) return 2; - } - sparebits=sparebits+i1-i2; - - if(fuseflag==1) - { - m1=0x7fffffffffffffffull; - m2=0x8000000000000000ull; - w=blk.first; - while(w!=blk.last) - { - t->tub[w]&=m1; - w++; - if(w>=t->wct) w=0; - } - t->tub[w]|=m2; - } - bitlen=ZStrLen(z); - for(i=0;ilenlen); - if(j>1) movebits(z,j-1,&cur); - bitlen=ZStrLen(z1); - movebits(z1,bitlen,&cur); - ZStrClear(z1); - movebits(z1,sparebits,&cur); - ZStrDest(z1); - return 0; -} - -int ZStrTuberDelete(TUBER * t, uint64_t kkey) -{ - BlK blk; - CuR cur; - CuR cur1; - ZSTR * z; - int r; - long i1,bitlen; - uint64_t kkeys; - - locate(t,kkey,&blk, &cur); - copycur(&cur,&cur1); - i1=gethdr(&cur1); - t->freebit+=cur1.hdrlen; - - skipbits(&cur1,i1); - kkeys=((blk.last+1)*t->kperw)-1; - if(kkeys>=kkey) kkeys=kkeys-kkey; - else kkeys=t->tiptop+kkeys-kkey; - z=ZStrCons(kkeys/t->wct+7); /* about right */ - if(z==NULL) return 1; - r=grabrest(&cur1,&blk,kkeys,z); - if(r!=0) return 1; - bitlen=ZStrLen(z); /* probably should compute in grabrest */ - putbits(&cur,0,t->lenlen+1); /* put in key-not-present */ - movebits(z,bitlen,&cur); - t->freekey++; - t->freebit+=i1; - - t->freebit-=t->lenlen; - return 0; -} - -/* end of zstr.c */ diff --git a/arangod/FulltextIndex2/zstr.h b/arangod/FulltextIndex2/zstr.h deleted file mode 100644 index eefee1f491..0000000000 --- a/arangod/FulltextIndex2/zstr.h +++ /dev/null @@ -1,110 +0,0 @@ -/* zstr.h - header file for the z-string module */ -/* R. A. Parker 3.5.2012 */ - -typedef struct -{ - uint64_t * dat; - long * dlen; - int alloc; - int firstix; - int lastix; -} ZSTR; - -ZSTR * ZStrCons(int elts); -void ZStrDest(ZSTR * z); -void ZStrClear(ZSTR * z); -int ZStrBitsIn(uint64_t a, long bits, ZSTR * z); -uint64_t ZStrBitsOut(ZSTR * z, long bits); -uint64_t ZStrBitsPeek(ZSTR * z, long bits); -long ZStrLen(ZSTR * z); -void ZStrNormalize(ZSTR * z); - -typedef struct -{ - int t; /* code type */ - int s; /* segments */ - int tmax; /* Top to translate */ - int bits; /* that determine len */ - uint64_t * X; /* first of segment */ - uint64_t * C; /* code added */ - uint8_t * L; /* length in bits */ - uint8_t * SG; /* segment for top bits */ - uint8_t * TX; /* translate table */ - uint8_t * UX; /* untranslate table */ -} ZCOD; - -int ZStrEnc(ZSTR * z, ZCOD * zc, uint64_t a); -uint64_t ZStrDec(ZSTR * z, ZCOD * zc); -uint64_t ZStrXlate(ZCOD * zc, uint64_t a); -uint64_t ZStrUnXl(ZCOD * zc, uint64_t a); -int ZStrLastEnc(ZSTR * z, uint64_t a); -uint64_t ZStrLastDec(ZSTR * z); - -typedef struct -{ - uint64_t x1; -} CTX; - -void ZStrCxClear(ZCOD * zc, CTX * ctx); -int ZStrCxEnc(ZSTR * z, ZCOD * zc, CTX * ctx, uint64_t a); -uint64_t ZStrCxDec(ZSTR * z, ZCOD * zc, CTX * ctx); - - -int ZStrMaxLen(ZSTR * z, int fmt); -int ZStrExtract(ZSTR * z, void * x, int fmt); -int ZStrInsert(ZSTR * z, void * x, int fmt); -int ZStrExtCompare(void * x, void * y, int fmt); -int ZStrExtLen(void * x, int fmt); - -typedef struct -{ - uint16_t ** pst; /* 1281 pointers to start */ - uint16_t ** ptp; /* 1281 pointers to top */ - uint64_t * mal; /* 1281 number of bytes allocated */ - uint64_t * stcnt; /* 1281 number of strings in clump */ - uint16_t inuse[6]; - uint16_t * list; /* final list */ - uint64_t listw; /* number of uint16s in final list */ - uint64_t listm; /* number of uint16's malloc'd */ - uint64_t cnt; /* number if strings in list */ -} STEX; - -STEX * ZStrSTCons(int fmt); -void ZStrSTDest(STEX * st); -int ZStrSTAppend(STEX * st, ZSTR * z); -int ZStrSTSort(STEX * st); -void * ZStrSTFind(STEX * st, void * x); - -typedef struct -{ - uint64_t kperw; /* K keys per word */ - uint64_t kmax; /* (prime) number of keys */ - uint64_t tiptop; /* number of spaces in tuber */ - uint64_t wct; /* number of 64-bit words */ - long lenlen; /* length of length string */ - uint64_t mult; /* length bits per initial 1-bit */ - uint64_t * tub; /* tuber data pointer */ - uint64_t freekey; /* free keys */ - uint64_t freebit; /* free bits */ - uint64_t fuses; /* number of block fuses */ -} TUBER; - -#define TUBER_BITS_8 1 -#define TUBER_BITS_16 2 -#define TUBER_BITS_32 3 -#define TUBER_BITS_64 4 - -TUBER * ZStrTuberCons(size_t size, int options); -void ZStrTuberDest(TUBER * t); -void ZStrTuberStats(TUBER * t, uint64_t * stats); -int ZStrTuberRead(TUBER * t, uint64_t kkey, ZSTR * z); -int ZStrTuberUpdate(TUBER * t, uint64_t kkey, ZSTR * z); -int ZStrTuberDelete(TUBER * t, uint64_t kkey); -#define INSFAIL 128000 -uint64_t ZStrTuberIns(TUBER * t, uint64_t d1, uint64_t d2); -uint64_t ZStrTuberK(TUBER * t, uint64_t d1, - uint64_t d2, uint64_t keyb); - -/* end of zstr.h */ - - diff --git a/arangod/FulltextIndex2/zstrreg.c b/arangod/FulltextIndex2/zstrreg.c deleted file mode 100644 index ca31f8d59c..0000000000 --- a/arangod/FulltextIndex2/zstrreg.c +++ /dev/null @@ -1,488 +0,0 @@ -/* zstring regression program */ -/* R. A. Parker 15.11.2012 */ - -#include -#include -#include "zstr.h" - -int err; - -void ckint(int x, int was, int shdbe) -{ - if(was==shdbe) return; - err++; - printf("Error %d, was %x (%d), should be %x\n",x,was,was,shdbe); -} - -void ZDUMP(ZSTR * z) -{ - int i; - printf("alloc %d firstix %d lastix %d\n", - z->alloc,z->firstix,z->lastix); - for(i=z->firstix;i<=z->lastix;i++) - printf("ix %d, val %16llx length %d\n", - i,(unsigned long long)z->dat[i],(int)z->dlen[i]); -} - -void TUBDUMP(TUBER * t) -{ - long i1,i2,i3,i4,i5,i6; - int i; - long long ff; - i1=t->kperw; - i2=t->kmax; - i3=t->wct; - i4=t->tiptop; - i5=t->lenlen; - i6=t->mult; - printf("kperw %ld, kmax %ld, wct %ld, ", - i1,i2,i3); - printf("tiptop %ld, lenlen %ld, mult %ld\n", - i4,i5,i6); - for(i=0;itub[i]; - printf("%16llx ",ff); - if(i%5==4) printf("\n"); - } - if((i%5)!=0) printf("\n"); -} - -int main(int argc, char ** argv) -{ - uint16_t y[10]; -/* first test code 0xx 10xxx 11xxxx */ -/* 0-3 4-11 12-27 */ - - uint64_t tx1[]={0,4,12,28}; - uint64_t tc1[]={0,0x10,0x30}; - uint8_t tl1[]={3,5,6}; - uint8_t tsg1[]={0,0,1,2}; - ZCOD zc1 = {1,3,0,2,tx1,tc1,tl1,tsg1,tl1,tl1}; - -/* second test code 0xx 10xxx 11xxxx */ -/* 0-3 4-11 12-27 */ -/* after translation 0 1 2 3 4 5 6 */ -/* goes to 4 5 0 2 1 6 3 */ - - uint64_t tx2[]={0,4,12,28}; - uint64_t tc2[]={0,0x10,0x30}; - uint8_t tl2[]={3,5,6}; - uint8_t tsg2[]={0,0,1,2}; - uint8_t ttx2[]={4,5,0,2,1,6,3}; - uint8_t tux2[]={2,4,3,6,0,1,5}; - ZCOD zc2 = {2,3,6,2,tx2,tc2,tl2,tsg2,ttx2,tux2}; - -/* third test code 0xx 10xxx 11xxxx */ -/* with delta 0-3 4-11 12-27 */ - - uint64_t tx3[]={0,4,12,28}; - uint64_t tc3[]={0,0x10,0x30}; - uint8_t tl3[]={3,5,6}; - uint8_t tsg3[]={0,0,1,2}; - ZCOD zc3 = {3,3,0,2,tx3,tc3,tl3,tsg3,tl3,tl3}; - - STEX * st; - uint16_t sw0[]={0x0000}; - uint16_t sw2[]={0xFFFC}; - - TUBER * t1; - uint64_t stats[20]; - - ZSTR * z1; - CTX ctx; - uint64_t i,j,k; - uint64_t nokeys; - long len; - uint64_t d1,d2; - uint64_t b0,b1,b2,b3,b4,b5,b6,b7; - uint64_t k0,k1,k2,k3,k4,k5,k6,k7; - uint16_t * fw1; - int q; - err=0; - -/* */ -/* 001 - 020 First batch to just exercise the simple */ -/* bit handling routines a little */ - - z1=ZStrCons(3); - ckint(1,z1->alloc,3); /* did it allocate OK */ - len=ZStrLen(z1); - ckint(2,len,0); /* len=0 at start */ - ZStrBitsIn(0x05A792,24,z1); - len=ZStrLen(z1); - ckint(3,len,24); /* len=24 now */ - ZStrBitsIn(0xF,4,z1); - len=ZStrLen(z1); - ckint(4,len,28); /* len=28 now */ - j=ZStrBitsPeek(z1,16); - ckint(5,j,0x05A7); /* first 16 bits */ - len=ZStrLen(z1); - ckint(6,len,28); /* length the same */ - j=ZStrBitsOut(z1,8); - ckint(7,j,0x05); /* first 8 bits */ - len=ZStrLen(z1); - ckint(8,len,20); /* length 20 now */ - j=ZStrBitsPeek(z1,28); - ckint(9,j,0xA792F00); /* first 28 bits! */ - j=ZStrBitsOut(z1,12); - ckint(10,j,0xA79); /* last 12 bits */ - len=ZStrLen(z1); - ckint(11,len,8); /* length 8 now */ - ZStrBitsIn(0xC0,8,z1); - len=ZStrLen(z1); - ckint(12,len,16); /* length 16 */ - j=ZStrBitsPeek(z1,16); - ckint(13,j,0x2FC0); /* 0x2FC0 -> Normalize */ - ZStrNormalize(z1); - j=ZStrBitsPeek(z1,16); - ckint(14,j,0x2FC0); /* still 0x2FC0 */ - len=ZStrLen(z1); - ckint(15,len,10); /* length 11 now */ - ZStrClear(z1); - len=ZStrLen(z1); - ckint(16,len,0); /* length 0 now */ - j=ZStrBitsPeek(z1,28); - ckint(17,j,0); /* last 28 bits all 0 */ - j=ZStrBitsOut(z1,12); - ckint(18,j,0); /* last 12 bits all 0 */ - ZStrDest(z1); - -/* */ -/* 0021 - 039 Next batch to test basic Enc/Decode */ -/* */ -/* test code 0xx 10xxx 11xxxx */ -/* 0-3 4-11 12-27 (28+ illegal) */ - - z1=ZStrCons(3); - ZStrCxEnc(z1,&zc1,&ctx,3); - len=ZStrLen(z1); - ckint(21,len,3); /* length 3 now */ - j=ZStrBitsPeek(z1,5); - ckint(22,j,0xC); /* 011 00 */ - j=ZStrCxDec(z1,&zc1,&ctx); - ckint(23,j,3); - len=ZStrLen(z1); - ckint(24,len,0); /* length 0 now */ - ZStrClear(z1); - ZStrCxEnc(z1,&zc1,&ctx,27); /* put in limit values */ - ZStrCxEnc(z1,&zc1,&ctx,4); - ZStrCxEnc(z1,&zc1,&ctx,3); - ZStrCxEnc(z1,&zc1,&ctx,12); - ZStrCxEnc(z1,&zc1,&ctx,11); - ZStrCxEnc(z1,&zc1,&ctx,0); - len=ZStrLen(z1); - ckint(25,len,28); /* length should be 28 */ - ZStrNormalize(z1); - len=ZStrLen(z1); - ckint(26,len,25); /* length should be 25 */ - j=ZStrCxDec(z1,&zc1,&ctx); - ckint(27,j,27); - j=ZStrCxDec(z1,&zc1,&ctx); - ckint(28,j,4); - j=ZStrCxDec(z1,&zc1,&ctx); - ckint(29,j,3); - j=ZStrCxDec(z1,&zc1,&ctx); - ckint(30,j,12); - j=ZStrCxDec(z1,&zc1,&ctx); - ckint(31,j,11); - j=ZStrCxDec(z1,&zc1,&ctx); - ckint(32,j,0); - ZStrClear(z1); - j=0; - for(i=0;i<1000;i++) - { - j+=11; - if(j>27) j-=28; - ZStrCxEnc(z1,&zc1,&ctx,j); - } - ZStrNormalize(z1); - j=0; - for(i=0;i<1000;i++) - { - j+=11; - if(j>27) j-=28; - k=ZStrCxDec(z1,&zc1,&ctx); - ckint(33,k,j); - } - len=ZStrLen(z1); - ckint(34,len,0); - ZStrNormalize(z1); - ZStrDest(z1); - -/* */ -/* 0041 - 059 Next batch to test type 2 Enc/Decode */ - -/* second test code 0xx 10xxx 11xxxx */ -/* 0-3 4-11 12-27 */ -/* after translation 0 1 2 3 4 5 6 */ -/* goes to 4 5 0 2 1 6 3 */ - - z1=ZStrCons(3); - ZStrCxEnc(z1,&zc2,&ctx,6); - len=ZStrLen(z1); - ckint(41,len,3); /* length 3 now */ - j=ZStrBitsPeek(z1,5); - ckint(42,j,0xC); /* 011 00 */ - j=ZStrCxDec(z1,&zc2,&ctx); - ckint(43,j,6); - len=ZStrLen(z1); - ckint(44,len,0); /* length 0 now */ - ZStrClear(z1); - ZStrCxEnc(z1,&zc2,&ctx,27); /* put in limit values */ - ZStrCxEnc(z1,&zc2,&ctx,0); /* 4 */ - ZStrCxEnc(z1,&zc2,&ctx,6); /* 3 */ - ZStrCxEnc(z1,&zc2,&ctx,12); - ZStrCxEnc(z1,&zc2,&ctx,11); - ZStrCxEnc(z1,&zc2,&ctx,2); /* 0 */ - len=ZStrLen(z1); - ckint(45,len,28); /* length should be 28 */ - ZStrNormalize(z1); - len=ZStrLen(z1); - ckint(46,len,25); /* length should be 25 */ - j=ZStrCxDec(z1,&zc2,&ctx); - ckint(47,j,27); - j=ZStrCxDec(z1,&zc2,&ctx); - ckint(48,j,0); - j=ZStrCxDec(z1,&zc2,&ctx); - ckint(49,j,6); - j=ZStrCxDec(z1,&zc2,&ctx); - ckint(50,j,12); - j=ZStrCxDec(z1,&zc2,&ctx); - ckint(51,j,11); - j=ZStrCxDec(z1,&zc2,&ctx); - ckint(52,j,2); - ZStrClear(z1); - j=0; - for(i=0;i<1000;i++) - { - j+=11; - if(j>27) j-=28; - ZStrCxEnc(z1,&zc2,&ctx,j); - } - ZStrNormalize(z1); - j=0; - for(i=0;i<1000;i++) - { - j+=11; - if(j>27) j-=28; - k=ZStrCxDec(z1,&zc2,&ctx); - ckint(53,k,j); - } - len=ZStrLen(z1); - ckint(54,len,0); - ZStrNormalize(z1); - ZStrDest(z1); - -/* */ -/* 0060 - 079 Test Xlate and UnXl */ -/* after translation 0 1 2 3 4 5 6 */ -/* goes to 4 5 0 2 1 6 3 */ - - k=ZStrXlate(&zc2,0); - ckint(60,k,4); - k=ZStrXlate(&zc2,1); - ckint(61,k,5); - k=ZStrXlate(&zc2,2); - ckint(62,k,0); - k=ZStrXlate(&zc2,3); - ckint(63,k,2); - k=ZStrXlate(&zc2,4); - ckint(64,k,1); - k=ZStrXlate(&zc2,5); - ckint(65,k,6); - k=ZStrXlate(&zc2,6); - ckint(66,k,3); - k=ZStrXlate(&zc2,7); - ckint(67,k,7); - k=ZStrXlate(&zc2,17); - ckint(68,k,17); - k=ZStrXlate(&zc2,77777); - ckint(69,k,77777); - - k=ZStrUnXl(&zc2,0); - ckint(70,k,2); - k=ZStrUnXl(&zc2,1); - ckint(71,k,4); - k=ZStrUnXl(&zc2,2); - ckint(72,k,3); - k=ZStrUnXl(&zc2,3); - ckint(73,k,6); - k=ZStrUnXl(&zc2,4); - ckint(74,k,0); - k=ZStrUnXl(&zc2,5); - ckint(75,k,1); - k=ZStrUnXl(&zc2,6); - ckint(76,k,5); - k=ZStrUnXl(&zc2,7); - ckint(77,k,7); - k=ZStrUnXl(&zc2,17); - ckint(78,k,17); - k=ZStrUnXl(&zc2,7777); - ckint(79,k,7777); - -/* */ -/* 0080 - 099 Test Enc/Decode of type 3 (delta) code */ -/* */ -/* test code 0xx 10xxx 11xxxx DELTA */ -/* 0-3 4-11 12-27 (28+ illegal) */ - - z1=ZStrCons(3); - ZStrCxClear(&zc3,&ctx); - ZStrCxEnc(z1,&zc3,&ctx,3); - ZStrCxEnc(z1,&zc3,&ctx,5); - ZStrCxEnc(z1,&zc3,&ctx,9); /* 011 010 10000 */ - len=ZStrLen(z1); - ckint(80,len,11); /* length 11 now */ - j=ZStrBitsPeek(z1,10); - ckint(81,j,0x1A8); /* 011 010 1000 */ - ZStrNormalize(z1); - j=ZStrBitsPeek(z1,10); - ckint(82,j,0x1A8); /* 011 010 1000 */ - ZStrCxClear(&zc3,&ctx); - j=ZStrCxDec(z1,&zc3,&ctx); - ckint(83,j,3); - j=ZStrCxDec(z1,&zc3,&ctx); - ckint(84,j,5); - j=ZStrCxDec(z1,&zc3,&ctx); - ckint(85,j,9); - len=ZStrLen(z1); - ckint(86,len,0); /* length 0 now */ - j=ZStrCxDec(z1,&zc3,&ctx); - ckint(87,j,9); - j=ZStrCxDec(z1,&zc3,&ctx); - ckint(88,j,9); - - ZStrClear(z1); - ZStrCxClear(&zc3,&ctx); - j=0; - for(i=0;i<1000;i++) - { - j+=4; - ZStrCxEnc(z1,&zc3,&ctx,j); - } - ZStrNormalize(z1); - ZStrCxClear(&zc3,&ctx); - j=0; - for(i=0;i<1000;i++) - { - j+=4; - k=ZStrCxDec(z1,&zc3,&ctx); - ckint(89,k,j); - } - len=ZStrLen(z1); - ckint(90,len,0); - ZStrDest(z1); -/* */ -/* 100 - 119 Test Extract, Insert and ExtLen */ - z1=ZStrCons(3); - ZStrBitsIn(0xDEADBEEF,32,z1); - len=ZStrLen(z1); - ckint(100,len,32); /* len=32 now */ - len=ZStrMaxLen(z1,2); - ckint(101,len,3); - len=ZStrExtract(z1,(void *)y,2); - ckint(102,len,3); - ckint(103,y[0],0xDEAE); - ckint(104,y[1],0xBEEF); - ckint(105,y[2],0x8000); - ZStrDest(z1); - z1=ZStrCons(5); - ZStrInsert(z1,(void *)y,2); - len=ZStrLen(z1); - ckint(106,len,32); - j=ZStrBitsOut(z1,32); - ckint(107,j,0xDEADBEEF); - ZStrDest(z1); - -/* 200 - 299 - test the codes and hashes */ -/* not yet written */ - -/* 300 - 399 test the tuber things */ - - z1=ZStrCons(3); - for(q=0;q<400;q+=100) - { - if(q==0) t1 = ZStrTuberCons(152,TUBER_BITS_8); - if(q==100) t1 = ZStrTuberCons(152,TUBER_BITS_16); - if(q==200) t1 = ZStrTuberCons(152,TUBER_BITS_32); - if(q==300) t1 = ZStrTuberCons(152,TUBER_BITS_64); - - nokeys=t1->kmax; - d1=nokeys/2; - d2=0; -/* try inserting three items with same keya */ -/* should get keybe as 0, 1 and 2 respectively. */ -/* this relies on three inserts working with keyb*/ -/* coming out as 0, 1 and 2. Change construction*/ -/* size if this doesn't work. */ - b0=ZStrTuberIns(t1,d1,d2); - ckint(300+q,b0,0); - b1=ZStrTuberIns(t1,d1,d2); - ckint(301+q,b1,1); - b2=ZStrTuberIns(t1,d1,d2); - ckint(302+q,b2,2); - k0=ZStrTuberK(t1,d1,d2,b0); - k1=ZStrTuberK(t1,d1,d2,b1); - k2=ZStrTuberK(t1,d1,d2,b2); - ZStrTuberDelete(t1,k0); - - ZStrBitsIn(0xDEAD,16,z1); - ZStrNormalize(z1); - ZStrTuberUpdate(t1,k1,z1); - ZStrClear(z1); - j=ZStrTuberRead(t1,k1,z1); - ckint(303+q,j,0); - len=ZStrLen(z1); - ckint(304+q,len,16); - j=ZStrBitsOut(z1,16); - ckint(305+q,j,0xDEAD); /* get our data back */ - ZStrTuberDest(t1); - } - ZStrDest(z1); -/* */ -/* 700 - 799 STEX testing - sorting the words */ -/* */ - - z1=ZStrCons(3); - st=ZStrSTCons(2); - ZStrBitsIn(0xDB,8,z1); - ZStrSTAppend(st,z1); - ZStrSTSort(st); - fw1=ZStrSTFind(st,(void*) sw0); - if(fw1!=NULL) ckint(704,*fw1,7777); - fw1=ZStrSTFind(st,(void*) sw2); - ckint(705,*fw1,0xDB00); - ZStrSTDest(st); - st=ZStrSTCons(2); - for(i=1;i<100;i++) - { - j=(17*i)%97; - ZStrClear(z1); - ZStrBitsIn(j,8,z1); - ZStrSTAppend(st,z1); - } - ZStrSTSort(st); - ZStrSTDest(st); - ZStrDest(z1); - -/* */ -/* 800 - 810 LastEnc and LastDec testing */ -/* */ - - z1=ZStrCons(5); - for(i=0;i<10000;i++) - { - ZStrClear(z1); - ZStrLastEnc(z1,i); - j=ZStrLastDec(z1); - ckint(800,j,i); - } - ZStrDest(z1); - - - printf("End of z-string regression - %d errors\n",err); - return 0; -} - -/* end of zstring regression module */ diff --git a/arangod/VocBase/index.c b/arangod/VocBase/index.c index 4e3ff54671..823629aa1d 100644 --- a/arangod/VocBase/index.c +++ b/arangod/VocBase/index.c @@ -4365,8 +4365,6 @@ void TRI_DestroyFulltextIndex (TRI_index_t* idx) { LOG_TRACE("destroying fulltext index"); - fulltextIndex = (TRI_fulltext_index_t*) idx; - TRI_FreeFtsIndex(fulltextIndex->_fulltextIndex); } diff --git a/arangod/VocBase/vocbase.c b/arangod/VocBase/vocbase.c index ffe4b1ef28..2899914c59 100644 --- a/arangod/VocBase/vocbase.c +++ b/arangod/VocBase/vocbase.c @@ -280,7 +280,6 @@ static bool DropCollectionCallback (TRI_collection_t* col, void* data) { TRI_vocbase_t* vocbase; regmatch_t matches[3]; regex_t re; - char* newFilename; int res; size_t i; @@ -366,6 +365,8 @@ static bool DropCollectionCallback (TRI_collection_t* col, void* data) { char* tmp1; char* tmp2; char* tmp3; + + char* newFilename; tmp1 = TRI_DuplicateString2(first, firstLen); tmp2 = TRI_DuplicateString2(second, secondLen);