diff --git a/arangod/Aql/ExecutionBlock.cpp b/arangod/Aql/ExecutionBlock.cpp index b57fe923c2..956a10a322 100644 --- a/arangod/Aql/ExecutionBlock.cpp +++ b/arangod/Aql/ExecutionBlock.cpp @@ -886,6 +886,8 @@ IndexRangeBlock::IndexRangeBlock (ExecutionEngine* engine, _posInDocs(0), _anyBoundVariable(false), _skiplistIterator(nullptr), + _hashIndexSearchValue({ 0, nullptr }), + _hashNextElement(nullptr), _condition(new IndexOrCondition()), _posInRanges(0), _sortCoords(), @@ -1223,58 +1225,63 @@ bool IndexRangeBlock::initRanges () { removeOverlapsIndexOr(*_condition); } - if (en->_index->type == TRI_IDX_TYPE_PRIMARY_INDEX) { + if (en->_index->type == TRI_IDX_TYPE_PRIMARY_INDEX || + en->_index->type == TRI_IDX_TYPE_EDGE_INDEX) { return true; //no initialization here! } - else if (en->_index->type == TRI_IDX_TYPE_HASH_INDEX) { - return true; //no initialization here! + + if (en->_index->type == TRI_IDX_TYPE_HASH_INDEX) { + if (_condition->empty()) { + return false; + } + + _posInRanges = 0; + getHashIndexIterator(_condition->at(_posInRanges)); + return (_hashIndexSearchValue._values != nullptr); } if (en->_index->type == TRI_IDX_TYPE_SKIPLIST_INDEX) { - if (! _condition->empty()) { - // sort the conditions! + if (_condition->empty()) { + return false; + } - // TODO this should also be done for hash indexes when - // they are lazy too. + // sort the conditions! - // first sort by the prefix of the index - std::vector> prefix; - if (! _sortCoords.empty()) { - _sortCoords.clear(); - _sortCoords.reserve(_condition->size()); - } - for (size_t s = 0; s < _condition->size(); s++) { - _sortCoords.push_back(s); - std::vector next; - next.reserve(en->_index->fields.size()); - prefix.emplace_back(next); - // prefix[s][t] = position in _condition[s] corresponding to the th index - // field - for (size_t t = 0; t < en->_index->fields.size(); t++) { - for (size_t u = 0; u < _condition->at(s).size(); u++) { - auto ri = _condition->at(s)[u]; - if (en->_index->fields[t].compare(ri._attr) == 0) { - prefix.at(s).insert(prefix.at(s).begin() + t, u); - break; - } + // TODO this should also be done for hash indexes when + // they are lazy too. + + // first sort by the prefix of the index + std::vector> prefix; + if (! _sortCoords.empty()) { + _sortCoords.clear(); + _sortCoords.reserve(_condition->size()); + } + for (size_t s = 0; s < _condition->size(); s++) { + _sortCoords.push_back(s); + std::vector next; + next.reserve(en->_index->fields.size()); + prefix.emplace_back(next); + // prefix[s][t] = position in _condition[s] corresponding to the th index + // field + for (size_t t = 0; t < en->_index->fields.size(); t++) { + for (size_t u = 0; u < _condition->at(s).size(); u++) { + auto ri = _condition->at(s)[u]; + if (en->_index->fields[t].compare(ri._attr) == 0) { + prefix.at(s).insert(prefix.at(s).begin() + t, u); + break; } } } - - SortFunc sortFunc(prefix, _condition, en->_reverse); - - // then sort by the values of the bounds - std::sort(_sortCoords.begin(), _sortCoords.end(), sortFunc); - _posInRanges = 0; - getSkiplistIterator(_condition->at(_sortCoords[_posInRanges])); - return (_skiplistIterator != nullptr); - } - else { - return false; } - } - else if (en->_index->type == TRI_IDX_TYPE_EDGE_INDEX) { - return true; //no initialization here! + + SortFunc sortFunc(prefix, _condition, en->_reverse); + + // then sort by the values of the bounds + std::sort(_sortCoords.begin(), _sortCoords.end(), sortFunc); + _posInRanges = 0; + + getSkiplistIterator(_condition->at(_sortCoords[_posInRanges])); + return (_skiplistIterator != nullptr); } THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "unexpected index type"); @@ -1431,9 +1438,7 @@ bool IndexRangeBlock::readIndex (size_t atMost) { } } else if (en->_index->type == TRI_IDX_TYPE_HASH_INDEX) { - if (_flag) { - readHashIndex(*_condition); - } + readHashIndex(atMost); } else if (en->_index->type == TRI_IDX_TYPE_SKIPLIST_INDEX) { readSkiplistIndex(atMost); @@ -1715,87 +1720,6 @@ void IndexRangeBlock::readPrimaryIndex (IndexOrCondition const& ranges) { LEAVE_BLOCK; } -//////////////////////////////////////////////////////////////////////////////// -/// @brief read documents using a hash index -//////////////////////////////////////////////////////////////////////////////// - -void IndexRangeBlock::readHashIndex (IndexOrCondition const& ranges) { - ENTER_BLOCK; - auto en = static_cast(getPlanNode()); - TRI_index_t* idx = en->_index->data; - TRI_ASSERT(idx != nullptr); - TRI_hash_index_t* hashIndex = (TRI_hash_index_t*) idx; - - TRI_shaper_t* shaper = _collection->documentCollection()->getShaper(); - TRI_ASSERT(shaper != nullptr); - - TRI_index_search_value_t searchValue; - - auto destroySearchValue = [&]() { - if (searchValue._values != nullptr) { - for (size_t i = 0; i < searchValue._length; ++i) { - TRI_DestroyShapedJson(shaper->_memoryZone, &searchValue._values[i]); - } - TRI_Free(TRI_UNKNOWN_MEM_ZONE, searchValue._values); - } - searchValue._values = nullptr; - }; - - auto setupSearchValue = [&](size_t pos) -> bool { - size_t const n = hashIndex->_paths._length; - searchValue._length = 0; - // initialize the whole range of shapes with zeros - searchValue._values = static_cast(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, - n * sizeof(TRI_shaped_json_t), true)); - - if (searchValue._values == nullptr) { - THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); - } - - searchValue._length = n; - - - for (size_t i = 0; i < n; ++i) { - TRI_shape_pid_t pid = *(static_cast(TRI_AtVector(&hashIndex->_paths, i))); - TRI_ASSERT(pid != 0); - - char const* name = TRI_AttributeNameShapePid(shaper, pid); - std::string const lookFor = std::string(name); - - for (auto x : ranges[pos]) { - if (x._attr == lookFor) { //found attribute - auto shaped = TRI_ShapedJsonJson(shaper, x._lowConst.bound().json(), false); - // here x->_low->_bound = x->_high->_bound - if (shaped == nullptr) { - return false; - } - searchValue._values[i] = *shaped; - TRI_Free(shaper->_memoryZone, shaped); - break; - } - } - } - return true; - }; - - for (size_t i = 0; i < ranges.size(); i++) { - if (setupSearchValue(i)) { - try { - size_t const n = _documents.size(); - TRI_LookupHashIndex(idx, &searchValue, _documents); - _engine->_stats.scannedIndex += static_cast(_documents.size() - n); - } - catch (...) { - destroySearchValue(); - throw; - } - } - - destroySearchValue(); - } - LEAVE_BLOCK; -} - //////////////////////////////////////////////////////////////////////////////// /// @brief read documents using the edges index //////////////////////////////////////////////////////////////////////////////// @@ -1853,6 +1777,119 @@ void IndexRangeBlock::readEdgeIndex (IndexOrCondition const& ranges) { LEAVE_BLOCK; } +void IndexRangeBlock::destroyHashIndexSearchValues () { + if (_hashIndexSearchValue._values != nullptr) { + TRI_shaper_t* shaper = _collection->documentCollection()->getShaper(); + + for (size_t i = 0; i < _hashIndexSearchValue._length; ++i) { + TRI_DestroyShapedJson(shaper->_memoryZone, &_hashIndexSearchValue._values[i]); + } + + TRI_Free(TRI_UNKNOWN_MEM_ZONE, _hashIndexSearchValue._values); + _hashIndexSearchValue._values = nullptr; + } +} + +bool IndexRangeBlock::setupHashIndexSearchValue (IndexAndCondition const& range) { + auto en = static_cast(getPlanNode()); + TRI_index_t* idx = en->_index->data; + TRI_ASSERT(idx != nullptr); + TRI_hash_index_t* hashIndex = (TRI_hash_index_t*) idx; + + TRI_shaper_t* shaper = _collection->documentCollection()->getShaper(); + + size_t const n = hashIndex->_paths._length; + + _hashIndexSearchValue._length = 0; + // initialize the whole range of shapes with zeros + _hashIndexSearchValue._values = static_cast(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, + n * sizeof(TRI_shaped_json_t), true)); + + if (_hashIndexSearchValue._values == nullptr) { + THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); + } + + _hashIndexSearchValue._length = n; + + + for (size_t i = 0; i < n; ++i) { + TRI_shape_pid_t pid = *(static_cast(TRI_AtVector(&hashIndex->_paths, i))); + TRI_ASSERT(pid != 0); + + char const* name = TRI_AttributeNameShapePid(shaper, pid); + std::string const lookFor = std::string(name); + + for (auto x : range) { + if (x._attr == lookFor) { //found attribute + auto shaped = TRI_ShapedJsonJson(shaper, x._lowConst.bound().json(), false); + // here x->_low->_bound = x->_high->_bound + if (shaped == nullptr) { + return false; + } + + _hashIndexSearchValue._values[i] = *shaped; + TRI_Free(shaper->_memoryZone, shaped); + break; + } + } + } + + return true; +} + +//////////////////////////////////////////////////////////////////////////////// +/// @brief build search values for hash index lookup +//////////////////////////////////////////////////////////////////////////////// + +void IndexRangeBlock::getHashIndexIterator (IndexAndCondition const& ranges) { + ENTER_BLOCK; + + _hashNextElement = nullptr; + + destroyHashIndexSearchValues(); + if (! setupHashIndexSearchValue(ranges)) { + destroyHashIndexSearchValues(); + } + + LEAVE_BLOCK; +} + +void IndexRangeBlock::readHashIndex (size_t atMost) { + ENTER_BLOCK; + + if (_hashIndexSearchValue._values == nullptr) { + return; + } + + auto en = static_cast(getPlanNode()); + TRI_index_t* idx = en->_index->data; + TRI_ASSERT(idx != nullptr); + + size_t nrSent = 0; + while (nrSent < atMost) { + size_t const n = _documents.size(); + + TRI_LookupHashIndex(idx, &_hashIndexSearchValue, _documents, _hashNextElement, atMost); + size_t const numRead = _documents.size() - n; + + _engine->_stats.scannedIndex += static_cast(numRead); + nrSent += numRead; + + if (_hashNextElement == nullptr) { + destroyHashIndexSearchValues(); + + if (++_posInRanges < _condition->size()) { + getHashIndexIterator(_condition->at(_posInRanges)); + } + if (_hashIndexSearchValue._values == nullptr) { + _hashNextElement = nullptr; + break; + } + } + } + LEAVE_BLOCK; +} + //////////////////////////////////////////////////////////////////////////////// /// @brief read documents using a skiplist index //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/Aql/ExecutionBlock.h b/arangod/Aql/ExecutionBlock.h index e005f7277d..3f61523f9c 100644 --- a/arangod/Aql/ExecutionBlock.h +++ b/arangod/Aql/ExecutionBlock.h @@ -43,6 +43,8 @@ #include "Utils/V8TransactionContext.h" #include "Cluster/ClusterComm.h" +struct TRI_hash_index_element_multi_s; + namespace triagens { namespace aql { @@ -605,10 +607,28 @@ namespace triagens { void readEdgeIndex (IndexOrCondition const&); //////////////////////////////////////////////////////////////////////////////// -/// @brief read using a skiplist index +/// @brief destroy the hash index search value //////////////////////////////////////////////////////////////////////////////// - void readSkiplistIndex (size_t atMost); + void destroyHashIndexSearchValues (); + +//////////////////////////////////////////////////////////////////////////////// +/// @brief set up a hash index search value +//////////////////////////////////////////////////////////////////////////////// + + bool setupHashIndexSearchValue (IndexAndCondition const&); + +//////////////////////////////////////////////////////////////////////////////// +/// @brief produce a reentrant hash index iterator +//////////////////////////////////////////////////////////////////////////////// + + void getHashIndexIterator (IndexAndCondition const&); + +//////////////////////////////////////////////////////////////////////////////// +/// @brief read using a hash index +//////////////////////////////////////////////////////////////////////////////// + + void readHashIndex (size_t); //////////////////////////////////////////////////////////////////////////////// /// @brief this tries to create a skiplistIterator to read from the index. @@ -617,10 +637,10 @@ namespace triagens { void getSkiplistIterator (IndexAndCondition const&); //////////////////////////////////////////////////////////////////////////////// -/// @brief read using a hash index +/// @brief read using a skiplist index //////////////////////////////////////////////////////////////////////////////// - void readHashIndex (IndexOrCondition const&); + void readSkiplistIndex (size_t atMost); //////////////////////////////////////////////////////////////////////////////// /// @brief andCombineRangeInfoVecs: combine the arguments into a single vector, @@ -724,6 +744,18 @@ namespace triagens { TRI_skiplist_iterator_t* _skiplistIterator; +//////////////////////////////////////////////////////////////////////////////// +/// @brief current search value for hash index lookup +//////////////////////////////////////////////////////////////////////////////// + + TRI_index_search_value_t _hashIndexSearchValue; + +//////////////////////////////////////////////////////////////////////////////// +/// @brief reentrant hash index iterator state +//////////////////////////////////////////////////////////////////////////////// + + struct TRI_hash_index_element_multi_s* _hashNextElement; + //////////////////////////////////////////////////////////////////////////////// /// @brief _condition: holds the IndexAndCondition for the current incoming block, /// this is just the _ranges[_rangesPos] member of the plan node if _allBoundsConstant diff --git a/arangod/HashIndex/hash-array-multi.cpp b/arangod/HashIndex/hash-array-multi.cpp index 08cd85e614..c546bb8245 100644 --- a/arangod/HashIndex/hash-array-multi.cpp +++ b/arangod/HashIndex/hash-array-multi.cpp @@ -516,6 +516,53 @@ int TRI_LookupByKeyHashArrayMulti (TRI_hash_array_multi_t const* array, return TRI_ERROR_NO_ERROR; } +//////////////////////////////////////////////////////////////////////////////// +/// @brief lookups an element given a key and a state +//////////////////////////////////////////////////////////////////////////////// + +int TRI_LookupByKeyHashArrayMulti (TRI_hash_array_multi_t const* array, + TRI_index_search_value_t const* key, + std::vector& result, + TRI_hash_index_element_multi_t*& next, + size_t batchSize) { + size_t const initialSize = result.size(); + TRI_ASSERT_EXPENSIVE(array->_nrUsed < array->_nrAlloc); + TRI_ASSERT(batchSize > 0); + + if (next == nullptr) { + // no previous state. start at the beginning + uint64_t const n = array->_nrAlloc; + uint64_t i, k; + + i = k = HashKey(array, key) % n; + + for (; i < n && array->_table[i]._document != nullptr && ! IsEqualKeyElement(array, key, &array->_table[i]); ++i); + if (i == n) { + for (i = 0; i < k && array->_table[i]._document != nullptr && ! IsEqualKeyElement(array, key, &array->_table[i]); ++i); + } + + TRI_ASSERT_EXPENSIVE(i < n); + + if (array->_table[i]._document != nullptr) { + result.emplace_back(*(array->_table[i]._document)); + } + next = array->_table[i]._next; + } + + if (next != nullptr) { + // we already had a state + size_t total = result.size() - initialSize; + + while (next != nullptr && total < batchSize) { + result.emplace_back(*(next->_document)); + next = next->_next; + ++total; + } + } + + return TRI_ERROR_NO_ERROR; +} + //////////////////////////////////////////////////////////////////////////////// /// @brief adds an element to the array /// diff --git a/arangod/HashIndex/hash-array-multi.h b/arangod/HashIndex/hash-array-multi.h index 40e63c8786..f4f7062731 100644 --- a/arangod/HashIndex/hash-array-multi.h +++ b/arangod/HashIndex/hash-array-multi.h @@ -129,6 +129,16 @@ int TRI_LookupByKeyHashArrayMulti (TRI_hash_array_multi_t const*, struct TRI_index_search_value_s const*, std::vector&); +//////////////////////////////////////////////////////////////////////////////// +/// @brief lookups an element given a key +//////////////////////////////////////////////////////////////////////////////// + +int TRI_LookupByKeyHashArrayMulti (TRI_hash_array_multi_t const*, + struct TRI_index_search_value_s const*, + std::vector&, + struct TRI_hash_index_element_multi_s*&, + size_t); + //////////////////////////////////////////////////////////////////////////////// /// @brief adds an element to the array //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/HashIndex/hash-index.cpp b/arangod/HashIndex/hash-index.cpp index 2f243e6452..c5e556f379 100644 --- a/arangod/HashIndex/hash-index.cpp +++ b/arangod/HashIndex/hash-index.cpp @@ -35,6 +35,8 @@ #include "ShapedJson/shaped-json.h" #include "VocBase/document-collection.h" #include "VocBase/voc-shaper.h" + +struct TRI_hash_index_element_multi_s; // ----------------------------------------------------------------------------- // --SECTION-- private functions @@ -713,6 +715,25 @@ int TRI_LookupHashIndex (TRI_index_t* idx, return TRI_LookupByKeyHashArrayMulti(&hashIndex->_hashArrayMulti, searchValue, documents); } +//////////////////////////////////////////////////////////////////////////////// +/// @brief locates entries in the hash index given shaped json objects +//////////////////////////////////////////////////////////////////////////////// + +int TRI_LookupHashIndex (TRI_index_t* idx, + TRI_index_search_value_t* searchValue, + std::vector& documents, + struct TRI_hash_index_element_multi_s*& next, + size_t batchSize) { + TRI_hash_index_t* hashIndex = (TRI_hash_index_t*) idx; + + if (hashIndex->base._unique) { + next = nullptr; + return HashIndex_find(hashIndex, searchValue, documents); + } + + return TRI_LookupByKeyHashArrayMulti(&hashIndex->_hashArrayMulti, searchValue, documents, next, batchSize); +} + // ----------------------------------------------------------------------------- // --SECTION-- END-OF-FILE // ----------------------------------------------------------------------------- diff --git a/arangod/HashIndex/hash-index.h b/arangod/HashIndex/hash-index.h index bef3c1e0ba..0657bc0109 100644 --- a/arangod/HashIndex/hash-index.h +++ b/arangod/HashIndex/hash-index.h @@ -133,13 +133,22 @@ TRI_vector_pointer_t TRI_LookupHashIndex (TRI_index_t*, //////////////////////////////////////////////////////////////////////////////// /// @brief locates entries in the hash index given shaped json objects -/// it is the callers responsibility to destroy the result //////////////////////////////////////////////////////////////////////////////// int TRI_LookupHashIndex (TRI_index_t*, struct TRI_index_search_value_s*, std::vector&); +//////////////////////////////////////////////////////////////////////////////// +/// @brief locates entries in the hash index given shaped json objects +//////////////////////////////////////////////////////////////////////////////// + +int TRI_LookupHashIndex (TRI_index_t*, + struct TRI_index_search_value_s*, + std::vector&, + struct TRI_hash_index_element_multi_s*&, + size_t); + #endif // -----------------------------------------------------------------------------