//////////////////////////////////////////////////////////////////////////////// /// @brief skiplist index /// /// @file /// /// DISCLAIMER /// /// Copyright 2014 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Dr. Frank Celler /// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany /// @author Copyright 2011-2013, triAGENS GmbH, Cologne, Germany //////////////////////////////////////////////////////////////////////////////// #include "SkiplistIndex.h" #include "Aql/AstNode.h" #include "Aql/SortCondition.h" #include "Basics/AttributeNameParser.h" #include "Basics/debugging.h" #include "Basics/json-utilities.h" #include "Basics/logging.h" #include "VocBase/document-collection.h" #include "VocBase/transaction.h" #include "VocBase/VocShaper.h" using namespace triagens::arango; using Json = triagens::basics::Json; // ----------------------------------------------------------------------------- // --SECTION-- private functions // ----------------------------------------------------------------------------- static size_t sortWeight (triagens::aql::AstNode const* node) { switch (node->type) { case triagens::aql::NODE_TYPE_OPERATOR_BINARY_EQ: return 1; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_IN: return 2; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_LT: return 3; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_GT: return 4; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_LE: return 5; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_GE: return 6; default: return 42; } } //////////////////////////////////////////////////////////////////////////////// /// @brief Create an index operator for the given bound. //////////////////////////////////////////////////////////////////////////////// static TRI_index_operator_t* buildBoundOperator (TRI_json_t const* bound, bool includeEqual, bool upper, TRI_json_t const* parameters, VocShaper* shaper) { if (bound == nullptr) { return nullptr; } std::unique_ptr boundOperator; TRI_index_operator_type_e type; if (includeEqual) { if (upper) { type = TRI_LE_INDEX_OPERATOR; } else { type = TRI_GE_INDEX_OPERATOR; } } else { if (upper) { type = TRI_LT_INDEX_OPERATOR; } else { type = TRI_GT_INDEX_OPERATOR; } } std::unique_ptr paramCopy; if (parameters == nullptr) { paramCopy.reset(TRI_CreateArrayJson(TRI_UNKNOWN_MEM_ZONE, 1)); } else { paramCopy.reset(TRI_CopyJson(TRI_UNKNOWN_MEM_ZONE, parameters)); } if (paramCopy == nullptr) { return nullptr; } TRI_PushBack3ArrayJson(TRI_UNKNOWN_MEM_ZONE, paramCopy.get(), TRI_CopyJson(TRI_UNKNOWN_MEM_ZONE, bound)); boundOperator.reset(TRI_CreateIndexOperator(type, nullptr, nullptr, paramCopy.get(), shaper, 1)); paramCopy.release(); return boundOperator.release(); } //////////////////////////////////////////////////////////////////////////////// /// @brief Create an index operator for the range information. /// Will either be a nullptr if no range is used. /// Or a LE, LT, GE, GT operator if only one bound is given /// Or an AND operator if both bounds are given. //////////////////////////////////////////////////////////////////////////////// static TRI_index_operator_t* buildRangeOperator (TRI_json_t const* lowerBound, bool lowerBoundInclusive, TRI_json_t const* upperBound, bool upperBoundInclusive, TRI_json_t const* parameters, VocShaper* shaper) { std::unique_ptr lowerOperator(buildBoundOperator(lowerBound, lowerBoundInclusive, false, parameters, shaper)); std::unique_ptr upperOperator(buildBoundOperator(upperBound, upperBoundInclusive, true, parameters, shaper)); /* std::cout << "LOWER BOUND: " << lowerBound << ", LOWER INCLUSIVE: " << lowerBoundInclusive << "\n"; std::cout << "UPPER BOUND: " << upperBound << ", UPPER INCLUSIVE: " << upperBoundInclusive << "\n"; */ if (lowerOperator == nullptr) { return upperOperator.release(); } if (upperOperator == nullptr) { return lowerOperator.release(); } // And combine both std::unique_ptr rangeOperator(TRI_CreateIndexOperator(TRI_AND_INDEX_OPERATOR, lowerOperator.get(), upperOperator.get(), nullptr, nullptr, 2)); lowerOperator.release(); upperOperator.release(); return rangeOperator.release(); }; //////////////////////////////////////////////////////////////////////////////// /// @brief frees an element in the skiplist //////////////////////////////////////////////////////////////////////////////// static void FreeElm (void* e) { auto element = static_cast(e); TRI_index_element_t::free(element); } // ............................................................................. // recall for all of the following comparison functions: // // left < right return -1 // left > right return 1 // left == right return 0 // // furthermore: // // the following order is currently defined for placing an order on documents // undef < null < boolean < number < strings < lists < hash arrays // note: undefined will be treated as NULL pointer not NULL JSON OBJECT // within each type class we have the following order // boolean: false < true // number: natural order // strings: lexicographical // lists: lexicographically and within each slot according to these rules. // ........................................................................... //////////////////////////////////////////////////////////////////////////////// /// @brief compares a key with an element, version with proper types //////////////////////////////////////////////////////////////////////////////// static int CompareKeyElement (TRI_shaped_json_t const* left, TRI_index_element_t const* right, size_t rightPosition, VocShaper* shaper) { TRI_ASSERT(nullptr != left); TRI_ASSERT(nullptr != right); auto rightSubobjects = right->subObjects(); return TRI_CompareShapeTypes(nullptr, nullptr, left, shaper, right->document()->getShapedJsonPtr(), &rightSubobjects[rightPosition], nullptr, shaper); } //////////////////////////////////////////////////////////////////////////////// /// @brief compares elements, version with proper types //////////////////////////////////////////////////////////////////////////////// static int CompareElementElement (TRI_index_element_t const* left, size_t leftPosition, TRI_index_element_t const* right, size_t rightPosition, VocShaper* shaper) { TRI_ASSERT(nullptr != left); TRI_ASSERT(nullptr != right); auto leftSubobjects = left->subObjects(); auto rightSubobjects = right->subObjects(); return TRI_CompareShapeTypes(left->document()->getShapedJsonPtr(), &leftSubobjects[leftPosition], nullptr, shaper, right->document()->getShapedJsonPtr(), &rightSubobjects[rightPosition], nullptr, shaper); } static int FillLookupOperator (TRI_index_operator_t* slOperator, TRI_document_collection_t* document) { if (slOperator == nullptr) { return TRI_ERROR_INTERNAL; } switch (slOperator->_type) { case TRI_AND_INDEX_OPERATOR: { TRI_logical_index_operator_t* logicalOperator = (TRI_logical_index_operator_t*) slOperator; int res = FillLookupOperator(logicalOperator->_left, document); if (res == TRI_ERROR_NO_ERROR) { res = FillLookupOperator(logicalOperator->_right, document); } if (res != TRI_ERROR_NO_ERROR) { return res; } break; } case TRI_EQ_INDEX_OPERATOR: case TRI_GE_INDEX_OPERATOR: case TRI_GT_INDEX_OPERATOR: case TRI_NE_INDEX_OPERATOR: case TRI_LE_INDEX_OPERATOR: case TRI_LT_INDEX_OPERATOR: { TRI_relation_index_operator_t* relationOperator = (TRI_relation_index_operator_t*) slOperator; relationOperator->_numFields = TRI_LengthVector(&relationOperator->_parameters->_value._objects); relationOperator->_fields = static_cast(TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_shaped_json_t) * relationOperator->_numFields, false)); if (relationOperator->_fields != nullptr) { for (size_t j = 0; j < relationOperator->_numFields; ++j) { TRI_json_t const* jsonObject = static_cast(TRI_AtVector(&(relationOperator->_parameters->_value._objects), j)); // find out if the search value is a list or an array if ((TRI_IsArrayJson(jsonObject) || TRI_IsObjectJson(jsonObject)) && slOperator->_type != TRI_EQ_INDEX_OPERATOR) { // non-equality operator used on list or array data type, this is disallowed // because we need to shape these objects first. however, at this place (index lookup) // we never want to create new shapes so we will have a problem if we cannot find an // existing shape for the search value. in this case we would need to raise an error // but then the query results would depend on the state of the shaper and if it had // seen previous such objects // we still allow looking for list or array values using equality. this is safe. TRI_Free(TRI_UNKNOWN_MEM_ZONE, relationOperator->_fields); relationOperator->_fields = nullptr; return TRI_ERROR_BAD_PARAMETER; } // now shape the search object (but never create any new shapes) TRI_shaped_json_t* shapedObject = TRI_ShapedJsonJson(document->getShaper(), jsonObject, false); // ONLY IN INDEX, PROTECTED by RUNTIME if (shapedObject != nullptr) { // found existing shape relationOperator->_fields[j] = *shapedObject; // shallow copy here is ok TRI_Free(TRI_UNKNOWN_MEM_ZONE, shapedObject); // don't require storage anymore } else { // shape not found TRI_Free(TRI_UNKNOWN_MEM_ZONE, relationOperator->_fields); relationOperator->_fields = nullptr; return TRI_RESULT_ELEMENT_NOT_FOUND; } } } else { relationOperator->_numFields = 0; // out of memory? } break; } } return TRI_ERROR_NO_ERROR; } // ----------------------------------------------------------------------------- // --SECTION-- class SkiplistIterator // ----------------------------------------------------------------------------- // ----------------------------------------------------------------------------- // --SECTION-- public methods // ----------------------------------------------------------------------------- size_t SkiplistIterator::size () const { return _intervals.size(); } void SkiplistIterator::initCursor () { size_t const n = size(); if (0 < n) { if (_reverse) { // start at last interval, right endpoint _currentInterval = n - 1; _cursor = _intervals.at(_currentInterval)._rightEndPoint; } else { // start at first interval, left endpoint _currentInterval = 0; _cursor = _intervals.at(_currentInterval)._leftEndPoint; } } else { _cursor = nullptr; } } bool SkiplistIterator::hasNext () const { if (_reverse) { return hasPrevIteration(); } return hasNextIteration(); } TRI_index_element_t* SkiplistIterator::next () { if (_reverse) { return prevIteration(); } return nextIteration(); } //////////////////////////////////////////////////////////////////////////////// /// @brief Locates one or more ranges within the skiplist and returns iterator //////////////////////////////////////////////////////////////////////////////// // ............................................................................. // Tests whether the LeftEndPoint is < than RightEndPoint (-1) // Tests whether the LeftEndPoint is == to RightEndPoint (0) [empty] // Tests whether the LeftEndPoint is > than RightEndPoint (1) [undefined] // ............................................................................. bool SkiplistIterator::findHelperIntervalValid ( SkiplistIteratorInterval const& interval) { Node* lNode = interval._leftEndPoint; if (lNode == nullptr) { return false; } // Note that the right end point can be nullptr to indicate the end of // the index. Node* rNode = interval._rightEndPoint; if (lNode == rNode) { return false; } if (lNode->nextNode() == rNode) { // Interval empty, nothing to do with it. return false; } if (nullptr != rNode && rNode->nextNode() == lNode) { // Interval empty, nothing to do with it. return false; } if (_index->_skiplistIndex->getNrUsed() == 0) { return false; } if ( lNode == _index->_skiplistIndex->startNode() || nullptr == rNode ) { // The index is not empty, the nodes are not neighbours, one of them // is at the boundary, so the interval is valid and not empty. return true; } int compareResult = _index->CmpElmElm(lNode->document(), rNode->document(), triagens::basics::SKIPLIST_CMP_TOTORDER); return (compareResult == -1); // Since we know that the nodes are not neighbours, we can guarantee // at least one document in the interval. } bool SkiplistIterator::findHelperIntervalIntersectionValid ( SkiplistIteratorInterval const& lInterval, SkiplistIteratorInterval const& rInterval, SkiplistIteratorInterval& interval) { Node* lNode = lInterval._leftEndPoint; Node* rNode = rInterval._leftEndPoint; if (nullptr == lNode || nullptr == rNode) { // At least one left boundary is the end, intersection is empty. return false; } int compareResult; // Now find the larger of the two start nodes: if (lNode == _index->_skiplistIndex->startNode()) { // We take rNode, even if it is the start node as well. compareResult = -1; } else if (rNode == _index->_skiplistIndex->startNode()) { // We take lNode compareResult = 1; } else { compareResult = _index->CmpElmElm(lNode->document(), rNode->document(), triagens::basics::SKIPLIST_CMP_TOTORDER); } if (compareResult < 1) { interval._leftEndPoint = rNode; } else { interval._leftEndPoint = lNode; } lNode = lInterval._rightEndPoint; rNode = rInterval._rightEndPoint; // Now find the smaller of the two end nodes: if (nullptr == lNode) { // We take rNode, even is this also the end node. compareResult = 1; } else if (nullptr == rNode) { // We take lNode. compareResult = -1; } else { compareResult = _index->CmpElmElm(lNode->document(), rNode->document(), triagens::basics::SKIPLIST_CMP_TOTORDER); } if (compareResult < 1) { interval._rightEndPoint = lNode; } else { interval._rightEndPoint = rNode; } return findHelperIntervalValid(interval); } void SkiplistIterator::findHelper (TRI_index_operator_t const* indexOperator, std::vector& intervals) { TRI_skiplist_index_key_t values; std::vector leftResult; std::vector rightResult; SkiplistIteratorInterval interval; Node* temp; TRI_relation_index_operator_t* relationOperator = (TRI_relation_index_operator_t*) indexOperator; TRI_logical_index_operator_t* logicalOperator = (TRI_logical_index_operator_t*) indexOperator; switch (indexOperator->_type) { case TRI_EQ_INDEX_OPERATOR: case TRI_LE_INDEX_OPERATOR: case TRI_LT_INDEX_OPERATOR: case TRI_GE_INDEX_OPERATOR: case TRI_GT_INDEX_OPERATOR: values._fields = relationOperator->_fields; values._numFields = relationOperator->_numFields; break; // this is to silence a compiler warning default: { // must not access relationOperator->xxx if the operator is not a // relational one otherwise we'll get invalid reads and the prog // might crash } } switch (indexOperator->_type) { case TRI_AND_INDEX_OPERATOR: { findHelper(logicalOperator->_left, leftResult); findHelper(logicalOperator->_right, rightResult); size_t nl = leftResult.size(); size_t nr = rightResult.size(); for (size_t i = 0; i < nl; ++i) { for (size_t j = 0; j < nr; ++j) { auto tempLeftInterval = leftResult[i]; auto tempRightInterval = rightResult[j]; if (findHelperIntervalIntersectionValid( tempLeftInterval, tempRightInterval, interval)) { intervals.emplace_back(interval); } } } return; } case TRI_EQ_INDEX_OPERATOR: { temp = _index->_skiplistIndex->leftKeyLookup(&values); TRI_ASSERT(nullptr != temp); interval._leftEndPoint = temp; bool const allAttributesCoveredByCondition = (values._numFields == _index->numPaths()); if (_index->unique() && allAttributesCoveredByCondition) { // At most one hit: temp = temp->nextNode(); if (nullptr != temp) { if (0 == _index->CmpKeyElm(&values, temp->document())) { interval._rightEndPoint = temp->nextNode(); if (findHelperIntervalValid(interval)) { intervals.emplace_back(interval); } } } } else { temp = _index->_skiplistIndex->rightKeyLookup(&values); interval._rightEndPoint = temp->nextNode(); if (findHelperIntervalValid(interval)) { intervals.emplace_back(interval); } } return; } case TRI_LE_INDEX_OPERATOR: { interval._leftEndPoint = _index->_skiplistIndex->startNode(); temp = _index->_skiplistIndex->rightKeyLookup(&values); interval._rightEndPoint = temp->nextNode(); if (findHelperIntervalValid(interval)) { intervals.emplace_back(interval); } return; } case TRI_LT_INDEX_OPERATOR: { interval._leftEndPoint = _index->_skiplistIndex->startNode(); temp = _index->_skiplistIndex->leftKeyLookup(&values); interval._rightEndPoint = temp->nextNode(); if (findHelperIntervalValid(interval)) { intervals.emplace_back(interval); } return; } case TRI_GE_INDEX_OPERATOR: { temp = _index->_skiplistIndex->leftKeyLookup(&values); interval._leftEndPoint = temp; interval._rightEndPoint = _index->_skiplistIndex->endNode(); if (findHelperIntervalValid(interval)) { intervals.emplace_back(interval); } return; } case TRI_GT_INDEX_OPERATOR: { temp = _index->_skiplistIndex->rightKeyLookup(&values); interval._leftEndPoint = temp; interval._rightEndPoint = _index->_skiplistIndex->endNode(); if (findHelperIntervalValid(interval)) { intervals.emplace_back(interval); } return; } default: { TRI_ASSERT(false); } } // end of switch statement } // ----------------------------------------------------------------------------- // --SECTION-- private methods // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief Attempts to determine if there is a previous document within an /// interval or before it - without advancing the iterator. //////////////////////////////////////////////////////////////////////////////// bool SkiplistIterator::hasPrevIteration () const { // ........................................................................... // if we have more intervals than the one we are currently working // on then of course we have a previous doc, because intervals are nonempty. // ........................................................................... if (_currentInterval > 0) { return true; } Node const* leftNode = _index->_skiplistIndex->prevNode(_cursor); // Note that leftNode can be nullptr here! // ........................................................................... // If the leftNode == left end point AND there are no more intervals // then we have no next. // ........................................................................... return leftNode != _intervals.at(_currentInterval)._leftEndPoint; } //////////////////////////////////////////////////////////////////////////////// /// @brief Attempts to determine if there is a next document within an /// interval - without advancing the iterator. //////////////////////////////////////////////////////////////////////////////// bool SkiplistIterator::hasNextIteration () const { if (_cursor == nullptr) { return false; } // ........................................................................... // if we have more intervals than the one we are currently working // on then of course we have a next doc, since intervals are nonempty. // ........................................................................... if (_intervals.size() - 1 > _currentInterval) { return true; } Node const* leftNode = _cursor->nextNode(); // Note that leftNode can be nullptr here! // ........................................................................... // If the left == right end point AND there are no more intervals then we have // no next. // ........................................................................... return leftNode != _intervals.at(_currentInterval)._rightEndPoint; } //////////////////////////////////////////////////////////////////////////////// /// @brief Jumps backwards by 1 and returns the document //////////////////////////////////////////////////////////////////////////////// TRI_index_element_t* SkiplistIterator::prevIteration () { if (_currentInterval >= _intervals.size()) { return nullptr; } SkiplistIteratorInterval& interval = _intervals.at(_currentInterval); // ........................................................................... // use the current cursor and move 1 backward // ........................................................................... Node* result = nullptr; result = _index->_skiplistIndex->prevNode(_cursor); if (result == interval._leftEndPoint) { if (_currentInterval == 0) { _cursor = nullptr; // exhausted return nullptr; } --_currentInterval; interval = _intervals.at(_currentInterval); _cursor = interval._rightEndPoint; result = _index->_skiplistIndex->prevNode(_cursor); } _cursor = result; TRI_ASSERT(result != nullptr); return result->document(); } //////////////////////////////////////////////////////////////////////////////// /// @brief Jumps forwards by jumpSize and returns the document //////////////////////////////////////////////////////////////////////////////// TRI_index_element_t* SkiplistIterator::nextIteration () { if (_cursor == nullptr) { // In this case the iterator is exhausted or does not even have intervals. return nullptr; } if (_currentInterval >= _intervals.size()) { return nullptr; } SkiplistIteratorInterval& interval = _intervals.at(_currentInterval); while (true) { // will be left by break _cursor = _cursor->nextNode(); if (_cursor != interval._rightEndPoint) { if (_cursor == nullptr) { return nullptr; } break; // we found a next one } if (_currentInterval == _intervals.size() - 1) { _cursor = nullptr; // exhausted return nullptr; } ++_currentInterval; interval = _intervals.at(_currentInterval); _cursor = interval._leftEndPoint; } return _cursor->document(); } // ----------------------------------------------------------------------------- // --SECTION-- class SkiplistIndex // ----------------------------------------------------------------------------- // ----------------------------------------------------------------------------- // --SECTION-- public methods // ----------------------------------------------------------------------------- TRI_doc_mptr_t* SkiplistIndexIterator::next () { while (_iterator == nullptr) { if (_currentOperator == _operators.size()) { // Sorry nothing found at all return nullptr; } // We restart the lookup _iterator = _index->lookup(_operators[_currentOperator], _reverse); if (_iterator == nullptr) { // This iterator was not created. _currentOperator++; } } TRI_ASSERT(_iterator != nullptr); TRI_index_element_t* res = _iterator->next(); while (res == nullptr) { // Try the next iterator _currentOperator++; if (_currentOperator == _operators.size()) { // We are done return nullptr; } // Free the former iterator and get the next one delete _iterator; _iterator = _index->lookup(_operators[_currentOperator], _reverse); res = _iterator->next(); } return res->document(); } void SkiplistIndexIterator::reset () { delete _iterator; _iterator = nullptr; _currentOperator = 0; } // ----------------------------------------------------------------------------- // --SECTION-- class SkiplistIndex // ----------------------------------------------------------------------------- // ----------------------------------------------------------------------------- // --SECTION-- constructors and destructors // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief create the skiplist index //////////////////////////////////////////////////////////////////////////////// SkiplistIndex::SkiplistIndex (TRI_idx_iid_t iid, TRI_document_collection_t* collection, std::vector> const& fields, bool unique, bool sparse) : PathBasedIndex(iid, collection, fields, unique, sparse, true), CmpElmElm(this), CmpKeyElm(this), _skiplistIndex(nullptr) { _skiplistIndex = new TRI_Skiplist(CmpElmElm, CmpKeyElm, FreeElm, unique, _useExpansion); } //////////////////////////////////////////////////////////////////////////////// /// @brief create an index stub with a hard-coded selectivity estimate /// this is used in the cluster coordinator case //////////////////////////////////////////////////////////////////////////////// SkiplistIndex::SkiplistIndex (TRI_json_t const* json) : PathBasedIndex(json, true), CmpElmElm(this), CmpKeyElm(this), _skiplistIndex(nullptr) { } //////////////////////////////////////////////////////////////////////////////// /// @brief destroy the skiplist index //////////////////////////////////////////////////////////////////////////////// SkiplistIndex::~SkiplistIndex () { delete _skiplistIndex; } // ----------------------------------------------------------------------------- // --SECTION-- public methods // ----------------------------------------------------------------------------- size_t SkiplistIndex::memory () const { return _skiplistIndex->memoryUsage() + static_cast(_skiplistIndex->getNrUsed()) * elementSize(); } //////////////////////////////////////////////////////////////////////////////// /// @brief return a JSON representation of the index //////////////////////////////////////////////////////////////////////////////// triagens::basics::Json SkiplistIndex::toJson (TRI_memory_zone_t* zone, bool withFigures) const { auto json = Index::toJson(zone, withFigures); json("unique", triagens::basics::Json(zone, _unique)) ("sparse", triagens::basics::Json(zone, _sparse)); return json; } //////////////////////////////////////////////////////////////////////////////// /// @brief return a JSON representation of the index figures //////////////////////////////////////////////////////////////////////////////// triagens::basics::Json SkiplistIndex::toJsonFigures (TRI_memory_zone_t* zone) const { triagens::basics::Json json(triagens::basics::Json::Object); json("memory", triagens::basics::Json(static_cast(memory()))); _skiplistIndex->appendToJson(zone, json); return json; } //////////////////////////////////////////////////////////////////////////////// /// @brief inserts a document into a skiplist index //////////////////////////////////////////////////////////////////////////////// int SkiplistIndex::insert (TRI_doc_mptr_t const* doc, bool) { std::vector elements; int res = fillElement(elements, doc); if (res != TRI_ERROR_NO_ERROR) { for (auto& it : elements) { // free all elements to prevent leak TRI_index_element_t::free(it); } return res; } // insert into the index. the memory for the element will be owned or freed // by the index size_t count = elements.size(); for (size_t i = 0; i < count; ++i) { res = _skiplistIndex->insert(elements[i]); if (res == TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED && ! _unique) { // We ignore unique_constraint violated if we are not unique res = TRI_ERROR_NO_ERROR; } if (res != TRI_ERROR_NO_ERROR) { TRI_index_element_t::free(elements[i]); // Note: this element is freed already for (size_t j = i + 1; j < count; ++j) { TRI_index_element_t::free(elements[j]); } for (size_t j = 0; j < i; ++j) { _skiplistIndex->remove(elements[j]); // No need to free elements[j] skiplist has taken over already } break; } } return res; } //////////////////////////////////////////////////////////////////////////////// /// @brief removes a document from a skiplist index //////////////////////////////////////////////////////////////////////////////// int SkiplistIndex::remove (TRI_doc_mptr_t const* doc, bool) { std::vector elements; int res = fillElement(elements, doc); // attempt the removal for skiplist indexes // ownership for the index element is transferred to the index size_t count = elements.size(); for (size_t i = 0; i < count; ++i) { res = _skiplistIndex->remove(elements[i]); TRI_index_element_t::free(elements[i]); } return res; } //////////////////////////////////////////////////////////////////////////////// /// @brief attempts to locate an entry in the skip list index /// /// Note: this function will not destroy the passed slOperator before it returns /// Warning: who ever calls this function is responsible for destroying /// the TRI_index_operator_t* and the SkiplistIterator* results //////////////////////////////////////////////////////////////////////////////// SkiplistIterator* SkiplistIndex::lookup (TRI_index_operator_t* slOperator, bool reverse) const { TRI_ASSERT(slOperator != nullptr); // ......................................................................... // fill the relation operators which may be embedded in the slOperator with // additional information. Recall the slOperator is what information was // received from a user for query the skiplist. // ......................................................................... int res = FillLookupOperator(slOperator, _collection); if (res != TRI_ERROR_NO_ERROR) { TRI_set_errno(res); return nullptr; } std::unique_ptr results(new SkiplistIterator(this, reverse)); results->findHelper(slOperator, results->_intervals); results->initCursor(); // Finally initialize _cursor if the result is not empty: return results.release(); } // ----------------------------------------------------------------------------- // --SECTION-- private methods // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief compares a key with an element in a skip list, generic callback //////////////////////////////////////////////////////////////////////////////// int SkiplistIndex::KeyElementComparator::operator() (TRI_skiplist_index_key_t const* leftKey, TRI_index_element_t const* rightElement) const { TRI_ASSERT(nullptr != leftKey); TRI_ASSERT(nullptr != rightElement); auto shaper = _idx->collection()->getShaper(); // ONLY IN INDEX, PROTECTED by RUNTIME // Note that the key might contain fewer fields than there are indexed // attributes, therefore we only run the following loop to // leftKey->_numFields. for (size_t j = 0; j < leftKey->_numFields; j++) { int compareResult = CompareKeyElement(&leftKey->_fields[j], rightElement, j, shaper); if (compareResult != 0) { return compareResult; } } return 0; } //////////////////////////////////////////////////////////////////////////////// /// @brief compares two elements in a skip list, this is the generic callback //////////////////////////////////////////////////////////////////////////////// int SkiplistIndex::ElementElementComparator::operator() (TRI_index_element_t const* leftElement, TRI_index_element_t const* rightElement, triagens::basics::SkipListCmpType cmptype) const { TRI_ASSERT(nullptr != leftElement); TRI_ASSERT(nullptr != rightElement); // .......................................................................... // The document could be the same -- so no further comparison is required. // .......................................................................... if (leftElement == rightElement || (! _idx->_skiplistIndex->isArray() && leftElement->document() == rightElement->document())) { return 0; } auto shaper = _idx->_collection->getShaper(); // ONLY IN INDEX, PROTECTED by RUNTIME for (size_t j = 0; j < _idx->numPaths(); j++) { int compareResult = CompareElementElement(leftElement, j, rightElement, j, shaper); if (compareResult != 0) { return compareResult; } } // ........................................................................... // This is where the difference between the preorder and the proper total // order comes into play. Here if the 'keys' are the same, // but the doc ptr is different (which it is since we are here), then // we return 0 if we use the preorder and look at the _key attribute // otherwise. // ........................................................................... if (triagens::basics::SKIPLIST_CMP_PREORDER == cmptype) { return 0; } // We break this tie in the key comparison by looking at the key: int compareResult = strcmp(TRI_EXTRACT_MARKER_KEY(leftElement->document()), // ONLY IN INDEX, PROTECTED by RUNTIME TRI_EXTRACT_MARKER_KEY(rightElement->document())); // ONLY IN INDEX, PROTECTED by RUNTIME if (compareResult < 0) { return -1; } else if (compareResult > 0) { return 1; } return 0; } bool SkiplistIndex::accessFitsIndex (triagens::aql::AstNode const* access, triagens::aql::AstNode const* other, triagens::aql::AstNode const* op, triagens::aql::Variable const* reference, std::unordered_map>& found, bool isExecution) const { if (! this->canUseConditionPart(access, other, op, reference, isExecution)) { return false; } triagens::aql::AstNode const* what = access; std::pair> attributeData; if (op->type != triagens::aql::NODE_TYPE_OPERATOR_BINARY_IN) { if (! what->isAttributeAccessForVariable(attributeData) || attributeData.first != reference) { // this access is not referencing this collection return false; } if (triagens::basics::TRI_AttributeNamesHaveExpansion(attributeData.second)) { // doc.value[*] IN 'value' return false; } } else { // ok, we do have an IN here... check if it's something like 'value' IN doc.value[*] bool canUse = false; if (what->isAttributeAccessForVariable(attributeData) && attributeData.first == reference && ! triagens::basics::TRI_AttributeNamesHaveExpansion(attributeData.second)) { // doc.value[*] IN 'value' canUse = true; } if (! canUse) { // check for doc.value[*] IN 'value' what = other; if (! what->isAttributeAccessForVariable(attributeData) || attributeData.first != reference) { // this access is not referencing this collection return false; } } } std::vector const& fieldNames = attributeData.second; for (size_t i = 0; i < _fields.size(); ++i) { if (_fields[i].size() != fieldNames.size()) { // attribute path length differs continue; } if (this->isAttributeExpanded(i) && op->type != triagens::aql::NODE_TYPE_OPERATOR_BINARY_IN) { // If this attribute is correct or not, it could only serve for IN continue; } bool match = triagens::basics::AttributeName::isIdentical(_fields[i], fieldNames, true); if (match) { // mark ith attribute as being covered auto it = found.find(i); if (it == found.end()) { found.emplace(i, std::vector{ op }); } else { (*it).second.emplace_back(op); } TRI_IF_FAILURE("SkiplistIndex::accessFitsIndex") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } return true; } } return false; } void SkiplistIndex::matchAttributes (triagens::aql::AstNode const* node, triagens::aql::Variable const* reference, std::unordered_map>& found, size_t& values, bool isExecution) const { for (size_t i = 0; i < node->numMembers(); ++i) { auto op = node->getMember(i); switch (op->type) { case triagens::aql::NODE_TYPE_OPERATOR_BINARY_EQ: case triagens::aql::NODE_TYPE_OPERATOR_BINARY_LT: case triagens::aql::NODE_TYPE_OPERATOR_BINARY_LE: case triagens::aql::NODE_TYPE_OPERATOR_BINARY_GT: case triagens::aql::NODE_TYPE_OPERATOR_BINARY_GE: TRI_ASSERT(op->numMembers() == 2); accessFitsIndex(op->getMember(0), op->getMember(1), op, reference, found, isExecution); accessFitsIndex(op->getMember(1), op->getMember(0), op, reference, found, isExecution); break; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_IN: if (accessFitsIndex(op->getMember(0), op->getMember(1), op, reference, found, isExecution)) { auto m = op->getMember(1); if (m->isArray() && m->numMembers() > 1) { // attr IN [ a, b, c ] => this will produce multiple items, so count them! values += m->numMembers() - 1; } } break; default: break; } } } bool SkiplistIndex::supportsFilterCondition (triagens::aql::AstNode const* node, triagens::aql::Variable const* reference, size_t itemsInIndex, size_t& estimatedItems, double& estimatedCost) const { std::unordered_map> found; size_t values = 0; matchAttributes(node, reference, found, values, false); bool lastContainsEquality = true; size_t attributesCovered = 0; size_t attributesCoveredByEquality = 0; double equalityReductionFactor = 20.0; estimatedCost = static_cast(itemsInIndex); for (size_t i = 0; i < _fields.size(); ++i) { auto it = found.find(i); if (it == found.end()) { // index attribute not covered by condition break; } // check if the current condition contains an equality condition auto const& nodes = (*it).second; bool containsEquality = false; for (size_t j = 0; j < nodes.size(); ++j) { if (nodes[j]->type == triagens::aql::NODE_TYPE_OPERATOR_BINARY_EQ || nodes[j]->type == triagens::aql::NODE_TYPE_OPERATOR_BINARY_IN ) { containsEquality = true; break; } } if (! lastContainsEquality) { // unsupported condition. must abort break; } ++attributesCovered; if (containsEquality) { ++attributesCoveredByEquality; estimatedCost /= equalityReductionFactor; // decrease the effect of the equality reduction factor equalityReductionFactor *= 0.25; if (equalityReductionFactor < 2.0) { // equalityReductionFactor shouldn't get too low equalityReductionFactor = 2.0; } } else { // quick estimate for the potential reductions caused by the conditions if (nodes.size() >= 2) { // at least two (non-equality) conditions. probably a range with lower // and upper bound defined estimatedCost /= 7.5; } else { // one (non-equality). this is either a lower or a higher bound estimatedCost /= 2.0; } } lastContainsEquality = containsEquality; } if (values == 0) { values = 1; } if (attributesCoveredByEquality == _fields.size() && unique()) { // index is unique and condition covers all attributes by equality if (estimatedItems >= values) { // reduce costs due to uniqueness estimatedItems = values; estimatedCost = static_cast(estimatedItems); } else { // cost is already low... now slightly prioritize the unique index estimatedCost *= 0.995; } return true; } if (attributesCovered > 0 && (! _sparse || (_sparse && attributesCovered == _fields.size()))) { // if the condition contains at least one index attribute and is not sparse, // or the index is sparse and all attributes are covered by the condition, // then it can be used (note: additional checks for condition parts in // sparse indexes are contained in Index::canUseConditionPart) estimatedItems = static_cast((std::max)(static_cast(estimatedCost * values), static_cast(1))); estimatedCost *= static_cast(values); return true; } // no condition estimatedItems = itemsInIndex; estimatedCost = static_cast(estimatedItems); return false; } bool SkiplistIndex::supportsSortCondition (triagens::aql::SortCondition const* sortCondition, triagens::aql::Variable const* reference, size_t itemsInIndex, double& estimatedCost) const { TRI_ASSERT(sortCondition != nullptr); if (! _sparse) { // only non-sparse indexes can be used for sorting if (! _useExpansion && sortCondition->isUnidirectional() && sortCondition->isOnlyAttributeAccess()) { size_t const coveredAttributes = sortCondition->coveredAttributes(reference, _fields); if (coveredAttributes >= sortCondition->numAttributes()) { // sort is fully covered by index. no additional sort costs! estimatedCost = 0.0; return true; } else if (coveredAttributes > 0) { estimatedCost = (itemsInIndex / coveredAttributes) * std::log2(static_cast(itemsInIndex)); return true; } } } // by default no sort conditions are supported if (itemsInIndex > 0) { estimatedCost = itemsInIndex * std::log2(static_cast(itemsInIndex)); } else { estimatedCost = 0.0; } return false; } IndexIterator* SkiplistIndex::iteratorForCondition (IndexIteratorContext* context, triagens::aql::Ast* ast, triagens::aql::AstNode const* node, triagens::aql::Variable const* reference, bool reverse) const { // Create the skiplistOperator for the IndexLookup if (node == nullptr) { // We have no condition, we just use sort Json nullArray(Json::Array); nullArray.add(Json(Json::Null)); std::unique_ptr unboundOperator(TRI_CreateIndexOperator(TRI_GE_INDEX_OPERATOR, nullptr, nullptr, nullArray.steal(), _shaper, 1)); std::vector searchValues({unboundOperator.get()}); unboundOperator.release(); TRI_IF_FAILURE("SkiplistIndex::noSortIterator") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } return new SkiplistIndexIterator(this, searchValues, reverse); } std::unordered_map> found; size_t unused = 0; matchAttributes(node, reference, found, unused, true); // found contains all attributes that are relevant for this node. // It might be less than fields(). // // Handle the first attributes. They can only be == or IN and only // one node per attribute auto getValueAccess = [&] (triagens::aql::AstNode const* comp, triagens::aql::AstNode const*& access, triagens::aql::AstNode const*& value) -> bool { access = comp->getMember(0); value = comp->getMember(1); std::pair> paramPair; if (! (access->isAttributeAccessForVariable(paramPair) && paramPair.first == reference)) { access = comp->getMember(1); value = comp->getMember(0); if (! (access->isAttributeAccessForVariable(paramPair) && paramPair.first == reference)) { // Both side do not have a correct AttributeAccess, this should not happen and indicates // an error in the optimizer TRI_ASSERT(false); } return true; } return false; }; // initialize permutations std::vector permutationStates; permutationStates.reserve(_fields.size()); size_t maxPermutations = 1; size_t usedFields = 0; for (; usedFields < _fields.size(); ++usedFields) { // We are in the equality range, we only allow one == or IN node per attribute auto it = found.find(usedFields); if (it == found.end() || it->second.size() != 1) { // We are either done, // or this is a range. // Continue with more complicated loop break; } auto comp = it->second[0]; TRI_ASSERT(comp->numMembers() == 2); triagens::aql::AstNode const* access = nullptr; triagens::aql::AstNode const* value = nullptr; getValueAccess(comp, access, value); // We found an access for this field if (comp->type == triagens::aql::NODE_TYPE_OPERATOR_BINARY_EQ) { // This is an equalityCheck, we can continue with the next field permutationStates.emplace_back(PermutationState(comp->type, value, usedFields, 1)); TRI_IF_FAILURE("SkiplistIndex::permutationEQ") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } } else if (comp->type == triagens::aql::NODE_TYPE_OPERATOR_BINARY_IN) { if (isAttributeExpanded(usedFields)) { permutationStates.emplace_back(PermutationState(aql::NODE_TYPE_OPERATOR_BINARY_EQ, value, usedFields, 1)); TRI_IF_FAILURE("SkiplistIndex::permutationArrayIN") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } } else { if (value->numMembers() == 0) { return nullptr; } permutationStates.emplace_back(PermutationState(comp->type, value, usedFields, value->numMembers())); TRI_IF_FAILURE("SkiplistIndex::permutationIN") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } maxPermutations *= value->numMembers(); } } else { // This is a one-sided range break; } } // Now handle the next element, which might be a range bool includeLower = false; bool includeUpper = false; std::unique_ptr lower; std::unique_ptr upper; if (usedFields < _fields.size()) { auto it = found.find(usedFields); if (it != found.end()) { auto rangeConditions = it->second; TRI_ASSERT(rangeConditions.size() <= 2); for (auto& comp : rangeConditions) { TRI_ASSERT(comp->numMembers() == 2); triagens::aql::AstNode const* access = nullptr; triagens::aql::AstNode const* value = nullptr; bool isReverseOrder = getValueAccess(comp, access, value); auto setBorder = [&] (bool isLower, bool includeBound) -> void { if ( isLower == isReverseOrder ) { // We set an upper bound TRI_ASSERT(upper == nullptr); upper.reset(value->toJsonValue(TRI_UNKNOWN_MEM_ZONE)); includeUpper = includeBound; } else { // We set an lower bound TRI_ASSERT(lower == nullptr); lower.reset(value->toJsonValue(TRI_UNKNOWN_MEM_ZONE)); includeLower = includeBound; } }; // This is not an equalityCheck, set lower or upper switch (comp->type) { case triagens::aql::NODE_TYPE_OPERATOR_BINARY_LT: setBorder(false, false); break; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_LE: setBorder(false, true); break; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_GT: setBorder(true, false); break; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_GE: setBorder(true, true); break; default: // unsupported right now. Should have been rejected by supportsFilterCondition TRI_ASSERT(false); return nullptr; } } } } std::vector searchValues; searchValues.reserve(maxPermutations); if (usedFields == 0) { // We have a range query based on the first _field auto op = buildRangeOperator(lower.get(), includeLower, upper.get(), includeUpper, nullptr, _shaper); if (op != nullptr) { searchValues.emplace_back(op); TRI_IF_FAILURE("SkiplistIndex::onlyRangeOperator") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } } } else { bool done = false; // create all permutations while (! done) { std::unique_ptr parameter(TRI_CreateArrayJson(TRI_UNKNOWN_MEM_ZONE, usedFields)); bool valid = true; for (size_t i = 0; i < usedFields; ++i) { TRI_ASSERT(i < permutationStates.size()); auto& state = permutationStates[i]; std::unique_ptr json(state.getValue()->toJsonValue(TRI_UNKNOWN_MEM_ZONE)); if (json == nullptr) { valid = false; break; } TRI_PushBack3ArrayJson(TRI_UNKNOWN_MEM_ZONE, parameter.get(), json.release()); } if (valid) { std::unique_ptr tmpOp(TRI_CreateIndexOperator(TRI_EQ_INDEX_OPERATOR, nullptr, nullptr, parameter.get(), _shaper, usedFields)); // Note we create a new RangeOperator always. std::unique_ptr rangeOperator(buildRangeOperator(lower.get(), includeLower, upper.get(), includeUpper, parameter.get(), _shaper)); parameter.release(); if (rangeOperator != nullptr) { std::unique_ptr combinedOp(TRI_CreateIndexOperator(TRI_AND_INDEX_OPERATOR, tmpOp.get(), rangeOperator.get(), nullptr, _shaper, 2)); rangeOperator.release(); tmpOp.release(); searchValues.emplace_back(combinedOp.get()); TRI_IF_FAILURE("SkiplistIndex::rangeOperatorNoTmp") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } combinedOp.release(); } else { if (tmpOp != nullptr) { searchValues.emplace_back(tmpOp.get()); TRI_IF_FAILURE("SkiplistIndex::rangeOperatorTmp") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } tmpOp.release(); } } } size_t const np = permutationStates.size() - 1; size_t current = 0; // now permute while (true) { if (++permutationStates[np - current].current < permutationStates[np - current].n) { current = 0; // abort inner iteration break; } permutationStates[np - current].current = 0; if (++current >= usedFields) { done = true; break; } // next inner iteration } } } if (searchValues.empty()) { return nullptr; } if (reverse) { std::reverse(searchValues.begin(), searchValues.end()); } TRI_IF_FAILURE("SkiplistIndex::noIterator") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } return new SkiplistIndexIterator(this, searchValues, reverse); } //////////////////////////////////////////////////////////////////////////////// /// @brief specializes the condition for use with the index //////////////////////////////////////////////////////////////////////////////// triagens::aql::AstNode* SkiplistIndex::specializeCondition (triagens::aql::AstNode* node, triagens::aql::Variable const* reference) const { std::unordered_map> found; size_t values = 0; matchAttributes(node, reference, found, values, false); std::vector children; bool lastContainsEquality = true; for (size_t i = 0; i < _fields.size(); ++i) { auto it = found.find(i); if (it == found.end()) { // index attribute not covered by condition break; } // check if the current condition contains an equality condition auto& nodes = (*it).second; bool containsEquality = false; for (size_t j = 0; j < nodes.size(); ++j) { if (nodes[j]->type == triagens::aql::NODE_TYPE_OPERATOR_BINARY_EQ || nodes[j]->type == triagens::aql::NODE_TYPE_OPERATOR_BINARY_IN) { containsEquality = true; break; } } if (! lastContainsEquality) { // unsupported condition. must abort break; } std::sort(nodes.begin(), nodes.end(), [] (triagens::aql::AstNode const* lhs, triagens::aql::AstNode const* rhs) -> bool { return sortWeight(lhs) < sortWeight(rhs); }); lastContainsEquality = containsEquality; std::unordered_set operatorsFound; for (auto& it : nodes) { // do not less duplicate or related operators pass if (isDuplicateOperator(it, operatorsFound)) { continue; } operatorsFound.emplace(static_cast(it->type)); children.emplace_back(it); } } while (node->numMembers() > 0) { node->removeMemberUnchecked(0); } for (auto& it : children) { node->addMember(it); } return node; } bool SkiplistIndex::isDuplicateOperator (triagens::aql::AstNode const* node, std::unordered_set const& operatorsFound) const { auto type = node->type; if (operatorsFound.find(static_cast(type)) != operatorsFound.end()) { // duplicate operator return true; } if (operatorsFound.find(static_cast(triagens::aql::NODE_TYPE_OPERATOR_BINARY_EQ)) != operatorsFound.end() || operatorsFound.find(static_cast(triagens::aql::NODE_TYPE_OPERATOR_BINARY_IN)) != operatorsFound.end()) { return true; } bool duplicate = false; switch (type) { case triagens::aql::NODE_TYPE_OPERATOR_BINARY_LT: duplicate = operatorsFound.find(static_cast(triagens::aql::NODE_TYPE_OPERATOR_BINARY_LE)) != operatorsFound.end(); break; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_LE: duplicate = operatorsFound.find(static_cast(triagens::aql::NODE_TYPE_OPERATOR_BINARY_LT)) != operatorsFound.end(); break; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_GT: duplicate = operatorsFound.find(static_cast(triagens::aql::NODE_TYPE_OPERATOR_BINARY_GE)) != operatorsFound.end(); break; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_GE: duplicate = operatorsFound.find(static_cast(triagens::aql::NODE_TYPE_OPERATOR_BINARY_GT)) != operatorsFound.end(); break; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_EQ: duplicate = operatorsFound.find(static_cast(triagens::aql::NODE_TYPE_OPERATOR_BINARY_IN)) != operatorsFound.end(); break; case triagens::aql::NODE_TYPE_OPERATOR_BINARY_IN: duplicate = operatorsFound.find(static_cast(triagens::aql::NODE_TYPE_OPERATOR_BINARY_EQ)) != operatorsFound.end(); break; default: { // ignore } } return duplicate; } // ----------------------------------------------------------------------------- // --SECTION-- END-OF-FILE // ----------------------------------------------------------------------------- // Local Variables: // mode: outline-minor // outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}" // End: