//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Jan Steemann /// @author Daniel H. Larkin /// @author Simon Grätzer //////////////////////////////////////////////////////////////////////////////// #include "RocksDBVPackIndex.h" #include "Aql/AstNode.h" #include "Aql/SortCondition.h" #include "Basics/AttributeNameParser.h" #include "Basics/FixedSizeAllocator.h" #include "Basics/StaticStrings.h" #include "Basics/VelocyPackHelper.h" #include "Indexes/IndexLookupContext.h" #include "RocksDBEngine/RocksDBCollection.h" #include "RocksDBEngine/RocksDBCommon.h" #include "RocksDBEngine/RocksDBComparator.h" #include "RocksDBEngine/RocksDBPrimaryIndex.h" #include "RocksDBEngine/RocksDBToken.h" #include "RocksDBEngine/RocksDBTransactionState.h" #include "Transaction/Helpers.h" #include "Transaction/Methods.h" #include "VocBase/LogicalCollection.h" #include #include #include #include using namespace arangodb; /// @brief the _key attribute, which, when used in an index, will implictly make /// it unique static std::vector const KeyAttribute{ arangodb::basics::AttributeName("_key", false)}; static size_t sortWeight(arangodb::aql::AstNode const* node) { switch (node->type) { case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ: return 1; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN: return 2; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT: return 3; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT: return 4; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE: return 5; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE: return 6; default: return 42; } } // ............................................................................. // recall for all of the following comparison functions: // // left < right return -1 // left > right return 1 // left == right return 0 // // furthermore: // // the following order is currently defined for placing an order on documents // undef < null < boolean < number < strings < lists < hash arrays // note: undefined will be treated as NULL pointer not NULL JSON OBJECT // within each type class we have the following order // boolean: false < true // number: natural order // strings: lexicographical // lists: lexicographically and within each slot according to these rules. // ........................................................................... RocksDBVPackIndexIterator::RocksDBVPackIndexIterator( LogicalCollection* collection, transaction::Methods* trx, ManagedDocumentResult* mmdr, arangodb::RocksDBVPackIndex const* index, arangodb::RocksDBPrimaryIndex* primaryIndex, bool reverse, VPackSlice const& left, VPackSlice const& right) : IndexIterator(collection, trx, mmdr, index), _index(index), _primaryIndex(primaryIndex), _cmp(index->_cmp), _reverse(reverse), _bounds(index->_unique ? RocksDBKeyBounds::UniqueIndexRange( index->objectId(), left, right) : RocksDBKeyBounds::IndexRange(index->objectId(), left, right)) { RocksDBTransactionState* state = rocksutils::toRocksTransactionState(trx); rocksdb::Transaction* rtrx = state->rocksTransaction(); auto options = state->readOptions(); _iterator.reset(rtrx->GetIterator(options)); if (reverse) { _iterator->SeekForPrev(_bounds.end()); } else { _iterator->Seek(_bounds.start()); } } /// @brief Reset the cursor void RocksDBVPackIndexIterator::reset() { if (_reverse) { _iterator->SeekForPrev(_bounds.end()); } else { _iterator->Seek(_bounds.start()); } } bool RocksDBVPackIndexIterator::outOfRange() const { if (_reverse) { return (-1 == _cmp->Compare(_iterator->key(), _bounds.start())); } else { return (1 == _cmp->Compare(_iterator->key(), _bounds.end())); } } bool RocksDBVPackIndexIterator::next(TokenCallback const& cb, size_t limit) { if (limit == 0 || !_iterator->Valid() || outOfRange()) { // No limit no data, or we are actually done. The last call should have // returned false TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken return false; } while (limit > 0) { StringRef primaryKey = _index->_unique ? RocksDBValue::primaryKey(_iterator->value()) : RocksDBKey::primaryKey(_iterator->key()); RocksDBToken token(_primaryIndex->lookupKey(_trx, primaryKey)); cb(token); if (_reverse) { _iterator->Prev(); } else { _iterator->Next(); } if (!_iterator->Valid() || outOfRange()) { return false; } } return true; } /// @brief create the index RocksDBVPackIndex::RocksDBVPackIndex(TRI_idx_iid_t iid, arangodb::LogicalCollection* collection, arangodb::velocypack::Slice const& info) : RocksDBIndex(iid, collection, info), _useExpansion(false), _allowPartialIndex(true) { { TRI_ASSERT(!_fields.empty()); TRI_ASSERT(iid != 0); fillPaths(_paths, _expanding); for (auto const& it : _fields) { if (TRI_AttributeNamesHaveExpansion(it)) { _useExpansion = true; break; } } } } /// @brief destroy the index RocksDBVPackIndex::~RocksDBVPackIndex() {} size_t RocksDBVPackIndex::memory() const { return 0; // TODO } /// @brief return a VelocyPack representation of the index void RocksDBVPackIndex::toVelocyPack(VPackBuilder& builder, bool withFigures) const { Index::toVelocyPack(builder, withFigures); builder.add("unique", VPackValue(_unique)); builder.add("sparse", VPackValue(_sparse)); } /// @brief return a VelocyPack representation of the index figures void RocksDBVPackIndex::toVelocyPackFigures(VPackBuilder& builder) const { TRI_ASSERT(builder.isOpenObject()); builder.add("memory", VPackValue(memory())); } /// @brief whether or not the index is implicitly unique /// this can be the case if the index is not declared as unique, but contains /// a /// unique attribute such as _key bool RocksDBVPackIndex::implicitlyUnique() const { if (_unique) { // a unique index is always unique return true; } if (_useExpansion) { // when an expansion such as a[*] is used, the index may not be unique, // even // if it contains attributes that are guaranteed to be unique return false; } for (auto const& it : _fields) { // if _key is contained in the index fields definition, then the index is // implicitly unique if (it == KeyAttribute) { return true; } } // _key not contained return false; } /// @brief helper function to insert a document into any index type /// Should result in an elements vector filled with the new index entries /// uses the _unique field to determine the kind of key structure int RocksDBVPackIndex::fillElement( transaction::Methods* trx, TRI_voc_rid_t revisionId, VPackSlice const& doc, std::vector>& elements) { if (doc.isNone()) { LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "encountered invalid marker with slice of type None"; return TRI_ERROR_INTERNAL; } TRI_IF_FAILURE("FillElementIllegalSlice") { return TRI_ERROR_INTERNAL; } if (!_useExpansion) { // fast path for inserts... no array elements used transaction::BuilderLeaser indexVals(trx); indexVals->openArray(); size_t const n = _paths.size(); for (size_t i = 0; i < n; ++i) { TRI_ASSERT(!_paths[i].empty()); VPackSlice slice = doc.get(_paths[i]); if (slice.isNone() || slice.isNull()) { // attribute not found if (_sparse) { // if sparse we do not have to index, this is indicated by result // being shorter than n return TRI_ERROR_NO_ERROR; } // null, note that this will be copied later! indexVals->add(VPackSlice::nullSlice()); } else { indexVals->add(slice); } } indexVals->close(); StringRef key(doc.get(StaticStrings::KeyString)); if (_unique) { // Unique VPack index values are stored as follows: // - Key: 7 + 8-byte object ID of index + VPack array with index // value(s) // + separator (NUL) byte // - Value: primary key elements.emplace_back( RocksDBKey::UniqueIndexValue(_objectId, indexVals->slice()), RocksDBValue::UniqueIndexValue(key)); } else { // Non-unique VPack index values are stored as follows: // - Key: 6 + 8-byte object ID of index + VPack array with index // value(s) // + separator (NUL) byte + primary key // - Value: empty elements.emplace_back( RocksDBKey::IndexValue(_objectId, key, indexVals->slice()), RocksDBValue::IndexValue()); } } else { // other path for handling array elements, too std::vector sliceStack; transaction::BuilderLeaser indexVals(trx); buildIndexValues(doc, 0, elements, sliceStack); } return TRI_ERROR_NO_ERROR; } void RocksDBVPackIndex::addIndexValue( VPackSlice const& document, std::vector>& elements, std::vector& sliceStack) { // TODO maybe use leaded Builder from transaction. VPackBuilder b; b.openArray(); for (VPackSlice const& s : sliceStack) { b.add(s); } b.close(); StringRef key(document.get(StaticStrings::KeyString)); if (_unique) { // Unique VPack index values are stored as follows: // - Key: 7 + 8-byte object ID of index + VPack array with index value(s) // - Value: primary key elements.emplace_back(RocksDBKey::UniqueIndexValue(_objectId, b.slice()), RocksDBValue::UniqueIndexValue(key)); } else { // Non-unique VPack index values are stored as follows: // - Key: 6 + 8-byte object ID of index + VPack array with index value(s) // + primary key // - Value: empty elements.emplace_back( RocksDBKey::IndexValue(_objectId, StringRef(key), b.slice()), RocksDBValue::IndexValue()); } } /// @brief helper function to create a set of index combinations to insert void RocksDBVPackIndex::buildIndexValues( VPackSlice const document, size_t level, std::vector>& elements, std::vector& sliceStack) { // Invariant: level == sliceStack.size() // Stop the recursion: if (level == _paths.size()) { addIndexValue(document, elements, sliceStack); return; } if (_expanding[level] == -1) { // the trivial, non-expanding case VPackSlice slice = document.get(_paths[level]); if (slice.isNone() || slice.isNull()) { if (_sparse) { return; } sliceStack.emplace_back(arangodb::basics::VelocyPackHelper::NullValue()); } else { sliceStack.emplace_back(slice); } buildIndexValues(document, level + 1, elements, sliceStack); sliceStack.pop_back(); return; } // Finally, the complex case, where we have to expand one entry. // Note again that at most one step in the attribute path can be // an array step. Furthermore, if _allowPartialIndex is true and // anything goes wrong with this attribute path, we have to bottom out // with None values to be able to use the index for a prefix match. // Trivial case to bottom out with Illegal types. VPackSlice illegalSlice = arangodb::basics::VelocyPackHelper::IllegalValue(); auto finishWithNones = [&]() -> void { if (!_allowPartialIndex || level == 0) { return; } for (size_t i = level; i < _paths.size(); i++) { sliceStack.emplace_back(illegalSlice); } addIndexValue(document.get(StaticStrings::KeyString), elements, sliceStack); for (size_t i = level; i < _paths.size(); i++) { sliceStack.pop_back(); } }; size_t const n = _paths[level].size(); // We have 0 <= _expanding[level] < n. VPackSlice current(document); for (size_t i = 0; i <= static_cast(_expanding[level]); i++) { if (!current.isObject()) { finishWithNones(); return; } current = current.get(_paths[level][i]); if (current.isNone()) { finishWithNones(); return; } } // Now the expansion: if (!current.isArray() || current.length() == 0) { finishWithNones(); return; } std::unordered_set seen(2, arangodb::basics::VelocyPackHelper::VPackHash(), arangodb::basics::VelocyPackHelper::VPackEqual()); auto moveOn = [&](VPackSlice something) -> void { auto it = seen.find(something); if (it == seen.end()) { seen.insert(something); sliceStack.emplace_back(something); buildIndexValues(document, level + 1, elements, sliceStack); sliceStack.pop_back(); } }; for (auto const& member : VPackArrayIterator(current)) { VPackSlice current2(member); bool doneNull = false; for (size_t i = _expanding[level] + 1; i < n; i++) { if (!current2.isObject()) { if (!_sparse) { moveOn(arangodb::basics::VelocyPackHelper::NullValue()); } doneNull = true; break; } current2 = current2.get(_paths[level][i]); if (current2.isNone()) { if (!_sparse) { moveOn(arangodb::basics::VelocyPackHelper::NullValue()); } doneNull = true; break; } } if (!doneNull) { moveOn(current2); } // Finally, if, because of sparsity, we have not inserted anything by now, // we need to play the above trick with None because of the above // mentioned // reasons: if (seen.empty()) { finishWithNones(); } } } /// @brief helper function to transform AttributeNames into strings. void RocksDBVPackIndex::fillPaths(std::vector>& paths, std::vector& expanding) { paths.clear(); expanding.clear(); for (std::vector const& list : _fields) { paths.emplace_back(); std::vector& interior(paths.back()); int expands = -1; int count = 0; for (auto const& att : list) { interior.emplace_back(att.name); if (att.shouldExpand) { expands = count; } ++count; } expanding.emplace_back(expands); } } /// @brief inserts a document into the index int RocksDBVPackIndex::insert(transaction::Methods* trx, TRI_voc_rid_t revisionId, VPackSlice const& doc, bool isRollback) { std::vector> elements; int res; try { res = fillElement(trx, revisionId, doc, elements); } catch (...) { res = TRI_ERROR_OUT_OF_MEMORY; } if (res != TRI_ERROR_NO_ERROR) { return res; } RocksDBTransactionState* state = rocksutils::toRocksTransactionState(trx); rocksdb::Transaction* rtrx = state->rocksTransaction(); auto options = state->readOptions(); size_t const count = elements.size(); for (size_t i = 0; i < count; ++i) { RocksDBKey& key = elements[i].first; RocksDBValue& value = elements[i].second; if (_unique) { RocksDBValue existing = RocksDBValue::Empty(RocksDBEntryType::UniqueIndexValue); auto status = rtrx->Get(options, key.string(), existing.buffer()); if (!status.IsNotFound()) { res = TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED; } } if (res == TRI_ERROR_NO_ERROR) { auto s = rtrx->Put(key.string(), value.string()); auto status = rocksutils::convertStatus(s, rocksutils::StatusHint::index); if (!status.ok()) { res = status.errorNumber(); } } if (res != TRI_ERROR_NO_ERROR) { for (size_t j = 0; j < i; ++j) { rtrx->Delete(elements[j].first.string()); } if (res == TRI_ERROR_ARANGO_UNIQUE_CONSTRAINT_VIOLATED && !_unique) { // We ignore unique_constraint violated if we are not unique res = TRI_ERROR_NO_ERROR; } break; } } return res; } /// @brief removes a document from the index int RocksDBVPackIndex::remove(transaction::Methods* trx, TRI_voc_rid_t revisionId, VPackSlice const& doc, bool isRollback) { std::vector> elements; int res; try { res = fillElement(trx, revisionId, doc, elements); } catch (...) { res = TRI_ERROR_OUT_OF_MEMORY; } if (res != TRI_ERROR_NO_ERROR) { return res; } RocksDBTransactionState* state = rocksutils::toRocksTransactionState(trx); rocksdb::Transaction* rtrx = state->rocksTransaction(); size_t const count = elements.size(); for (size_t i = 0; i < count; ++i) { auto s = rtrx->Delete(elements[i].first.string()); auto status = rocksutils::convertStatus(s, rocksutils::StatusHint::index); if (!status.ok()) { res = status.errorNumber(); } } return res; } int RocksDBVPackIndex::unload() { // nothing to do return TRI_ERROR_NO_ERROR; } /// @brief called when the index is dropped int RocksDBVPackIndex::drop() { if (_unique) { return rocksutils::removeLargeRange( rocksutils::globalRocksDB(), RocksDBKeyBounds::UniqueIndex(_objectId)); } else { return rocksutils::removeLargeRange(rocksutils::globalRocksDB(), RocksDBKeyBounds::Index(_objectId)); } } /// @brief attempts to locate an entry in the index /// Warning: who ever calls this function is responsible for destroying /// the RocksDBVPackIndexIterator* results RocksDBVPackIndexIterator* RocksDBVPackIndex::lookup( transaction::Methods* trx, ManagedDocumentResult* mmdr, VPackSlice const searchValues, bool reverse) const { TRI_ASSERT(searchValues.isArray()); TRI_ASSERT(searchValues.length() <= _fields.size()); VPackBuilder leftSearch; VPackBuilder rightSearch; VPackSlice lastNonEq; leftSearch.openArray(); for (auto const& it : VPackArrayIterator(searchValues)) { TRI_ASSERT(it.isObject()); VPackSlice eq = it.get(StaticStrings::IndexEq); if (eq.isNone()) { lastNonEq = it; break; } leftSearch.add(eq); } VPackSlice leftBorder; VPackSlice rightBorder; if (lastNonEq.isNone()) { // We only have equality! rightSearch = leftSearch; leftSearch.add(VPackSlice::minKeySlice()); leftSearch.close(); rightSearch.add(VPackSlice::maxKeySlice()); rightSearch.close(); leftBorder = leftSearch.slice(); rightBorder = rightSearch.slice(); } else { // Copy rightSearch = leftSearch for right border rightSearch = leftSearch; // Define Lower-Bound VPackSlice lastLeft = lastNonEq.get(StaticStrings::IndexGe); if (!lastLeft.isNone()) { TRI_ASSERT(!lastNonEq.hasKey(StaticStrings::IndexGt)); leftSearch.add(lastLeft); leftSearch.add(VPackSlice::minKeySlice()); leftSearch.close(); VPackSlice search = leftSearch.slice(); leftBorder = search; } else { lastLeft = lastNonEq.get(StaticStrings::IndexGt); if (!lastLeft.isNone()) { leftSearch.add(lastLeft); leftSearch.add(VPackSlice::maxKeySlice()); leftSearch.close(); VPackSlice search = leftSearch.slice(); leftBorder = search; } else { // No lower bound set default to (null <= x) leftSearch.add(VPackSlice::minKeySlice()); leftSearch.close(); VPackSlice search = leftSearch.slice(); leftBorder = search; } } // Define upper-bound VPackSlice lastRight = lastNonEq.get(StaticStrings::IndexLe); if (!lastRight.isNone()) { TRI_ASSERT(!lastNonEq.hasKey(StaticStrings::IndexLt)); rightSearch.add(lastRight); rightSearch.add(VPackSlice::maxKeySlice()); rightSearch.close(); VPackSlice search = rightSearch.slice(); rightBorder = search; } else { lastRight = lastNonEq.get(StaticStrings::IndexLt); if (!lastRight.isNone()) { rightSearch.add(lastRight); rightSearch.add(VPackSlice::minKeySlice()); rightSearch.close(); VPackSlice search = rightSearch.slice(); rightBorder = search; } else { // No upper bound set default to (x <= INFINITY) rightSearch.add(VPackSlice::maxKeySlice()); rightSearch.close(); VPackSlice search = rightSearch.slice(); rightBorder = search; } } } // Secured by trx. The shared_ptr index stays valid in // _collection at least as long as trx is running. // Same for the iterator auto physical = static_cast(_collection->getPhysical()); auto idx = physical->primaryIndex(); return new RocksDBVPackIndexIterator(_collection, trx, mmdr, this, idx, reverse, leftBorder, rightBorder); } bool RocksDBVPackIndex::accessFitsIndex( arangodb::aql::AstNode const* access, arangodb::aql::AstNode const* other, arangodb::aql::AstNode const* op, arangodb::aql::Variable const* reference, std::unordered_map>& found, std::unordered_set& nonNullAttributes, bool isExecution) const { if (!canUseConditionPart(access, other, op, reference, nonNullAttributes, isExecution)) { return false; } arangodb::aql::AstNode const* what = access; std::pair> attributeData; if (op->type != arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) { if (!what->isAttributeAccessForVariable(attributeData) || attributeData.first != reference) { // this access is not referencing this collection return false; } if (arangodb::basics::TRI_AttributeNamesHaveExpansion( attributeData.second)) { // doc.value[*] == 'value' return false; } if (isAttributeExpanded(attributeData.second)) { // doc.value == 'value' (with an array index) return false; } } else { // ok, we do have an IN here... check if it's something like 'value' IN // doc.value[*] TRI_ASSERT(op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN); bool canUse = false; if (what->isAttributeAccessForVariable(attributeData) && attributeData.first == reference && !arangodb::basics::TRI_AttributeNamesHaveExpansion( attributeData.second) && attributeMatches(attributeData.second)) { // doc.value IN 'value' // can use this index canUse = true; } else { // check for 'value' IN doc.value AND 'value' IN doc.value[*] what = other; if (what->isAttributeAccessForVariable(attributeData) && attributeData.first == reference && isAttributeExpanded(attributeData.second) && attributeMatches(attributeData.second)) { canUse = true; } } if (!canUse) { return false; } } std::vector const& fieldNames = attributeData.second; for (size_t i = 0; i < _fields.size(); ++i) { if (_fields[i].size() != fieldNames.size()) { // attribute path length differs continue; } if (this->isAttributeExpanded(i) && op->type != arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) { // If this attribute is correct or not, it could only serve for IN continue; } bool match = arangodb::basics::AttributeName::isIdentical(_fields[i], fieldNames, true); if (match) { // mark ith attribute as being covered auto it = found.find(i); if (it == found.end()) { found.emplace(i, std::vector{op}); } else { (*it).second.emplace_back(op); } TRI_IF_FAILURE("PersistentIndex::accessFitsIndex") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } return true; } } return false; } void RocksDBVPackIndex::matchAttributes( arangodb::aql::AstNode const* node, arangodb::aql::Variable const* reference, std::unordered_map>& found, size_t& values, std::unordered_set& nonNullAttributes, bool isExecution) const { for (size_t i = 0; i < node->numMembers(); ++i) { auto op = node->getMember(i); switch (op->type) { case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ: case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT: case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE: case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT: case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE: TRI_ASSERT(op->numMembers() == 2); accessFitsIndex(op->getMember(0), op->getMember(1), op, reference, found, nonNullAttributes, isExecution); accessFitsIndex(op->getMember(1), op->getMember(0), op, reference, found, nonNullAttributes, isExecution); break; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN: if (accessFitsIndex(op->getMember(0), op->getMember(1), op, reference, found, nonNullAttributes, isExecution)) { auto m = op->getMember(1); if (m->isArray() && m->numMembers() > 1) { // attr IN [ a, b, c ] => this will produce multiple items, so // count them! values += m->numMembers() - 1; } } break; default: break; } } } bool RocksDBVPackIndex::supportsFilterCondition( arangodb::aql::AstNode const* node, arangodb::aql::Variable const* reference, size_t itemsInIndex, size_t& estimatedItems, double& estimatedCost) const { std::unordered_map> found; std::unordered_set nonNullAttributes; size_t values = 0; matchAttributes(node, reference, found, values, nonNullAttributes, false); bool lastContainsEquality = true; size_t attributesCovered = 0; size_t attributesCoveredByEquality = 0; double equalityReductionFactor = 20.0; estimatedCost = static_cast(itemsInIndex); for (size_t i = 0; i < _fields.size(); ++i) { auto it = found.find(i); if (it == found.end()) { // index attribute not covered by condition break; } // check if the current condition contains an equality condition auto const& nodes = (*it).second; bool containsEquality = false; for (size_t j = 0; j < nodes.size(); ++j) { if (nodes[j]->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ || nodes[j]->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) { containsEquality = true; break; } } if (!lastContainsEquality) { // unsupported condition. must abort break; } ++attributesCovered; if (containsEquality) { ++attributesCoveredByEquality; estimatedCost /= equalityReductionFactor; // decrease the effect of the equality reduction factor equalityReductionFactor *= 0.25; if (equalityReductionFactor < 2.0) { // equalityReductionFactor shouldn't get too low equalityReductionFactor = 2.0; } } else { // quick estimate for the potential reductions caused by the conditions if (nodes.size() >= 2) { // at least two (non-equality) conditions. probably a range with lower // and upper bound defined estimatedCost /= 7.5; } else { // one (non-equality). this is either a lower or a higher bound estimatedCost /= 2.0; } } lastContainsEquality = containsEquality; } if (values == 0) { values = 1; } if (attributesCoveredByEquality == _fields.size() && unique()) { // index is unique and condition covers all attributes by equality if (estimatedItems >= values) { // reduce costs due to uniqueness estimatedItems = values; estimatedCost = static_cast(estimatedItems); } else { // cost is already low... now slightly prioritize the unique index estimatedCost *= 0.995; } return true; } if (attributesCovered > 0 && (!_sparse || attributesCovered == _fields.size())) { // if the condition contains at least one index attribute and is not // sparse, // or the index is sparse and all attributes are covered by the condition, // then it can be used (note: additional checks for condition parts in // sparse indexes are contained in Index::canUseConditionPart) estimatedItems = static_cast((std::max)( static_cast(estimatedCost * values), static_cast(1))); estimatedCost *= static_cast(values); return true; } // no condition estimatedItems = itemsInIndex; estimatedCost = static_cast(estimatedItems); return false; } bool RocksDBVPackIndex::supportsSortCondition( arangodb::aql::SortCondition const* sortCondition, arangodb::aql::Variable const* reference, size_t itemsInIndex, double& estimatedCost, size_t& coveredAttributes) const { TRI_ASSERT(sortCondition != nullptr); if (!_sparse) { // only non-sparse indexes can be used for sorting if (!_useExpansion && sortCondition->isUnidirectional() && sortCondition->isOnlyAttributeAccess()) { coveredAttributes = sortCondition->coveredAttributes(reference, _fields); if (coveredAttributes >= sortCondition->numAttributes()) { // sort is fully covered by index. no additional sort costs! // forward iteration does not have high costs estimatedCost = itemsInIndex * 0.001; if (sortCondition->isDescending()) { // reverse iteration has higher costs than forward iteration estimatedCost *= 4; } return true; } else if (coveredAttributes > 0) { estimatedCost = (itemsInIndex / coveredAttributes) * std::log2(static_cast(itemsInIndex)); if (sortCondition->isAscending()) { // reverse iteration is more expensive estimatedCost *= 4; } return true; } } } coveredAttributes = 0; // by default no sort conditions are supported if (itemsInIndex > 0) { estimatedCost = itemsInIndex * std::log2(static_cast(itemsInIndex)); // slightly penalize this type of index against other indexes which // are in memory estimatedCost *= 1.05; } else { estimatedCost = 0.0; } return false; } IndexIterator* RocksDBVPackIndex::iteratorForCondition( transaction::Methods* trx, ManagedDocumentResult* mmdr, arangodb::aql::AstNode const* node, arangodb::aql::Variable const* reference, bool reverse) { VPackBuilder searchValues; searchValues.openArray(); bool needNormalize = false; if (node == nullptr) { // We only use this index for sort. Empty searchValue VPackArrayBuilder guard(&searchValues); TRI_IF_FAILURE("PersistentIndex::noSortIterator") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } } else { // Create the search Values for the lookup VPackArrayBuilder guard(&searchValues); std::unordered_map> found; std::unordered_set nonNullAttributes; size_t unused = 0; matchAttributes(node, reference, found, unused, nonNullAttributes, true); // found contains all attributes that are relevant for this node. // It might be less than fields(). // // Handle the first attributes. They can only be == or IN and only // one node per attribute auto getValueAccess = [&](arangodb::aql::AstNode const* comp, arangodb::aql::AstNode const*& access, arangodb::aql::AstNode const*& value) -> bool { access = comp->getMember(0); value = comp->getMember(1); std::pair> paramPair; if (!(access->isAttributeAccessForVariable(paramPair) && paramPair.first == reference)) { access = comp->getMember(1); value = comp->getMember(0); if (!(access->isAttributeAccessForVariable(paramPair) && paramPair.first == reference)) { // Both side do not have a correct AttributeAccess, this should not // happen and indicates // an error in the optimizer TRI_ASSERT(false); } return true; } return false; }; size_t usedFields = 0; for (; usedFields < _fields.size(); ++usedFields) { auto it = found.find(usedFields); if (it == found.end()) { // We are either done // or this is a range. // Continue with more complicated loop break; } auto comp = it->second[0]; TRI_ASSERT(comp->numMembers() == 2); arangodb::aql::AstNode const* access = nullptr; arangodb::aql::AstNode const* value = nullptr; getValueAccess(comp, access, value); // We found an access for this field if (comp->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ) { searchValues.openObject(); searchValues.add(VPackValue(StaticStrings::IndexEq)); TRI_IF_FAILURE("PersistentIndex::permutationEQ") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } } else if (comp->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) { if (isAttributeExpanded(usedFields)) { searchValues.openObject(); searchValues.add(VPackValue(StaticStrings::IndexEq)); TRI_IF_FAILURE("PersistentIndex::permutationArrayIN") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } } else { needNormalize = true; searchValues.openObject(); searchValues.add(VPackValue(StaticStrings::IndexIn)); } } else { // This is a one-sided range break; } // We have to add the value always, the key was added before value->toVelocyPackValue(searchValues); searchValues.close(); } // Now handle the next element, which might be a range if (usedFields < _fields.size()) { auto it = found.find(usedFields); if (it != found.end()) { auto rangeConditions = it->second; TRI_ASSERT(rangeConditions.size() <= 2); VPackObjectBuilder searchElement(&searchValues); for (auto& comp : rangeConditions) { TRI_ASSERT(comp->numMembers() == 2); arangodb::aql::AstNode const* access = nullptr; arangodb::aql::AstNode const* value = nullptr; bool isReverseOrder = getValueAccess(comp, access, value); // Add the key switch (comp->type) { case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT: if (isReverseOrder) { searchValues.add(VPackValue(StaticStrings::IndexGt)); } else { searchValues.add(VPackValue(StaticStrings::IndexLt)); } break; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE: if (isReverseOrder) { searchValues.add(VPackValue(StaticStrings::IndexGe)); } else { searchValues.add(VPackValue(StaticStrings::IndexLe)); } break; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT: if (isReverseOrder) { searchValues.add(VPackValue(StaticStrings::IndexLt)); } else { searchValues.add(VPackValue(StaticStrings::IndexGt)); } break; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE: if (isReverseOrder) { searchValues.add(VPackValue(StaticStrings::IndexLe)); } else { searchValues.add(VPackValue(StaticStrings::IndexGe)); } break; default: // unsupported right now. Should have been rejected by // supportsFilterCondition TRI_ASSERT(false); return nullptr; } value->toVelocyPackValue(searchValues); } } } } searchValues.close(); TRI_IF_FAILURE("PersistentIndex::noIterator") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } if (needNormalize) { VPackBuilder expandedSearchValues; expandInSearchValues(searchValues.slice(), expandedSearchValues); VPackSlice expandedSlice = expandedSearchValues.slice(); std::vector iterators; try { for (auto const& val : VPackArrayIterator(expandedSlice)) { auto iterator = lookup(trx, mmdr, val, reverse); try { iterators.push_back(iterator); } catch (...) { // avoid leak delete iterator; throw; } } if (reverse) { std::reverse(iterators.begin(), iterators.end()); } } catch (...) { for (auto& it : iterators) { delete it; } throw; } return new MultiIndexIterator(_collection, trx, mmdr, this, iterators); } VPackSlice searchSlice = searchValues.slice(); TRI_ASSERT(searchSlice.length() == 1); searchSlice = searchSlice.at(0); return lookup(trx, mmdr, searchSlice, reverse); } /// @brief specializes the condition for use with the index arangodb::aql::AstNode* RocksDBVPackIndex::specializeCondition( arangodb::aql::AstNode* node, arangodb::aql::Variable const* reference) const { std::unordered_map> found; std::unordered_set nonNullAttributes; size_t values = 0; matchAttributes(node, reference, found, values, nonNullAttributes, false); std::vector children; bool lastContainsEquality = true; for (size_t i = 0; i < _fields.size(); ++i) { auto it = found.find(i); if (it == found.end()) { // index attribute not covered by condition break; } // check if the current condition contains an equality condition auto& nodes = (*it).second; bool containsEquality = false; for (size_t j = 0; j < nodes.size(); ++j) { if (nodes[j]->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ || nodes[j]->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) { containsEquality = true; break; } } if (!lastContainsEquality) { // unsupported condition. must abort break; } std::sort(nodes.begin(), nodes.end(), [](arangodb::aql::AstNode const* lhs, arangodb::aql::AstNode const* rhs) -> bool { return sortWeight(lhs) < sortWeight(rhs); }); lastContainsEquality = containsEquality; std::unordered_set operatorsFound; for (auto& it : nodes) { // do not let duplicate or related operators pass if (isDuplicateOperator(it, operatorsFound)) { continue; } operatorsFound.emplace(static_cast(it->type)); children.emplace_back(it); } } while (node->numMembers() > 0) { node->removeMemberUnchecked(0); } for (auto& it : children) { node->addMember(it); } return node; } bool RocksDBVPackIndex::isDuplicateOperator( arangodb::aql::AstNode const* node, std::unordered_set const& operatorsFound) const { auto type = node->type; if (operatorsFound.find(static_cast(type)) != operatorsFound.end()) { // duplicate operator return true; } if (operatorsFound.find( static_cast(arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ)) != operatorsFound.end() || operatorsFound.find( static_cast(arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN)) != operatorsFound.end()) { return true; } bool duplicate = false; switch (type) { case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT: duplicate = operatorsFound.find(static_cast( arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE)) != operatorsFound.end(); break; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE: duplicate = operatorsFound.find(static_cast( arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT)) != operatorsFound.end(); break; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT: duplicate = operatorsFound.find(static_cast( arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE)) != operatorsFound.end(); break; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE: duplicate = operatorsFound.find(static_cast( arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT)) != operatorsFound.end(); break; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ: duplicate = operatorsFound.find(static_cast( arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN)) != operatorsFound.end(); break; case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN: duplicate = operatorsFound.find(static_cast( arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ)) != operatorsFound.end(); break; default: { // ignore } } return duplicate; }