//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Dr. Frank Celler //////////////////////////////////////////////////////////////////////////////// #include "EdgeIndex.h" #include "Aql/AstNode.h" #include "Aql/SortCondition.h" #include "Basics/Exceptions.h" #include "Basics/fasthash.h" #include "Basics/hashes.h" #include "Indexes/SimpleAttributeEqualityMatcher.h" #include "Utils/CollectionNameResolver.h" #include "VocBase/document-collection.h" #include "VocBase/transaction.h" #include #include using namespace arangodb; //////////////////////////////////////////////////////////////////////////////// /// @brief hashes an edge key //////////////////////////////////////////////////////////////////////////////// static uint64_t HashElementKey(void*, VPackSlice const* key) { // TODO: Can we unify all HashElementKey functions for VPack? TRI_ASSERT(key != nullptr); uint64_t hash = 0x87654321; if (!key->isString()) { // Illegal edge entry, key has to be string. TRI_ASSERT(false); return hash; } // we can get away with the fast hash function here, as edge // index values are restricted to strings return key->hash(hash); } //////////////////////////////////////////////////////////////////////////////// /// @brief hashes an edge (_from case) //////////////////////////////////////////////////////////////////////////////// static uint64_t HashElementEdgeFrom(void*, TRI_doc_mptr_t const* mptr, bool byKey) { TRI_ASSERT(mptr != nullptr); uint64_t hash = 0x87654321; if (!byKey) { hash = (uint64_t)mptr; hash = fasthash64(&hash, sizeof(hash), 0x56781234); } else { // Is identical to HashElementKey VPackSlice tmp(mptr->vpack()); tmp = tmp.get(TRI_VOC_ATTRIBUTE_FROM); TRI_ASSERT(tmp.isString()); // we can get away with the fast hash function here, as edge // index values are restricted to strings hash = tmp.hash(hash); } return hash; } //////////////////////////////////////////////////////////////////////////////// /// @brief hashes an edge (_to case) //////////////////////////////////////////////////////////////////////////////// static uint64_t HashElementEdgeTo(void*, TRI_doc_mptr_t const* mptr, bool byKey) { TRI_ASSERT(mptr != nullptr); uint64_t hash = 0x87654321; if (!byKey) { hash = (uint64_t)mptr; hash = fasthash64(&hash, sizeof(hash), 0x56781234); } else { // Is identical to HashElementKey VPackSlice tmp(mptr->vpack()); TRI_ASSERT(tmp.isObject()); tmp = tmp.get(TRI_VOC_ATTRIBUTE_TO); TRI_ASSERT(tmp.isString()); // we can get away with the fast hash function here, as edge // index values are restricted to strings hash = tmp.hash(hash); } return hash; } //////////////////////////////////////////////////////////////////////////////// /// @brief checks if key and element match (_from case) //////////////////////////////////////////////////////////////////////////////// static bool IsEqualKeyEdgeFrom(void*, VPackSlice const* left, TRI_doc_mptr_t const* right) { TRI_ASSERT(left != nullptr); TRI_ASSERT(right != nullptr); // left is a key // right is an element, that is a master pointer VPackSlice tmp(right->vpack()); tmp = tmp.get(TRI_VOC_ATTRIBUTE_FROM); TRI_ASSERT(tmp.isString()); return *left == tmp; } //////////////////////////////////////////////////////////////////////////////// /// @brief checks if key and element match (_to case) //////////////////////////////////////////////////////////////////////////////// static bool IsEqualKeyEdgeTo(void*, VPackSlice const* left, TRI_doc_mptr_t const* right) { TRI_ASSERT(left != nullptr); TRI_ASSERT(right != nullptr); // left is a key // right is an element, that is a master pointer VPackSlice tmp(right->vpack()); tmp = tmp.get(TRI_VOC_ATTRIBUTE_TO); TRI_ASSERT(tmp.isString()); return *left == tmp; } //////////////////////////////////////////////////////////////////////////////// /// @brief checks for elements are equal (_from and _to case) //////////////////////////////////////////////////////////////////////////////// static bool IsEqualElementEdge(void*, TRI_doc_mptr_t const* left, TRI_doc_mptr_t const* right) { return left == right; } //////////////////////////////////////////////////////////////////////////////// /// @brief checks for elements are equal (_from case) //////////////////////////////////////////////////////////////////////////////// static bool IsEqualElementEdgeFromByKey(void*, TRI_doc_mptr_t const* left, TRI_doc_mptr_t const* right) { TRI_ASSERT(left != nullptr); TRI_ASSERT(right != nullptr); VPackSlice lSlice(left->vpack()); lSlice = lSlice.get(TRI_VOC_ATTRIBUTE_FROM); TRI_ASSERT(lSlice.isString()); VPackSlice rSlice(right->vpack()); rSlice = rSlice.get(TRI_VOC_ATTRIBUTE_FROM); TRI_ASSERT(rSlice.isString()); return lSlice == rSlice; } //////////////////////////////////////////////////////////////////////////////// /// @brief checks for elements are equal (_to case) //////////////////////////////////////////////////////////////////////////////// static bool IsEqualElementEdgeToByKey(void*, TRI_doc_mptr_t const* left, TRI_doc_mptr_t const* right) { TRI_ASSERT(left != nullptr); TRI_ASSERT(right != nullptr); VPackSlice lSlice(left->vpack()); lSlice = lSlice.get(TRI_VOC_ATTRIBUTE_TO); TRI_ASSERT(lSlice.isString()); VPackSlice rSlice(right->vpack()); rSlice = rSlice.get(TRI_VOC_ATTRIBUTE_TO); TRI_ASSERT(rSlice.isString()); return lSlice == rSlice; } TRI_doc_mptr_t* EdgeIndexIterator::next() { while (true) { if (_position >= static_cast(_keys.length())) { // we're at the end of the lookup values return nullptr; } if (_buffer == nullptr) { // We start a new lookup TRI_ASSERT(_position == 0); _posInBuffer = 0; _last = nullptr; VPackSlice tmp = _keys.at(_position); if (tmp.isObject()) { tmp = tmp.get(TRI_SLICE_KEY_EQUAL); } _buffer = _index->lookupByKey(_trx, &tmp, _batchSize); // fallthrough intentional } else if (_posInBuffer >= _buffer->size()) { // We have to refill the buffer delete _buffer; _buffer = nullptr; _posInBuffer = 0; if (_last != nullptr) { _buffer = _index->lookupByKeyContinue(_trx, _last, _batchSize); } else { VPackSlice tmp = _keys.at(_position); if (tmp.isObject()) { tmp = tmp.get(TRI_SLICE_KEY_EQUAL); } _buffer = _index->lookupByKey(_trx, &tmp, _batchSize); } } if (!_buffer->empty()) { // found something _last = _buffer->back(); return _buffer->at(_posInBuffer++); } // found no result. now go to next lookup value in _keys ++_position; // reset the _last value _last = nullptr; } } void EdgeIndexIterator::reset() { _last = nullptr; _position = 0; _posInBuffer = 0; // Free the vector space, not the content delete _buffer; _buffer = nullptr; } TRI_doc_mptr_t* AnyDirectionEdgeIndexIterator::next() { TRI_doc_mptr_t* res = nullptr; if (_useInbound) { do { res = _inbound->next(); } while (res != nullptr && _seen.find(res) != _seen.end()); return res; } res = _outbound->next(); if (res == nullptr) { _useInbound = true; return next(); } _seen.emplace(res); return res; } void AnyDirectionEdgeIndexIterator::reset() { _useInbound = false; _seen.clear(); _outbound->reset(); _inbound->reset(); } EdgeIndex::EdgeIndex(TRI_idx_iid_t iid, TRI_document_collection_t* collection) : Index(iid, collection, std::vector>( {{{TRI_VOC_ATTRIBUTE_FROM, false}}, {{TRI_VOC_ATTRIBUTE_TO, false}}}), false, false), _edgesFrom(nullptr), _edgesTo(nullptr), _numBuckets(1) { TRI_ASSERT(iid != 0); if (collection != nullptr) { // document is a nullptr in the coordinator case _numBuckets = static_cast(collection->_info.indexBuckets()); } auto context = [this]() -> std::string { return this->context(); }; _edgesFrom = new TRI_EdgeIndexHash_t(HashElementKey, HashElementEdgeFrom, IsEqualKeyEdgeFrom, IsEqualElementEdge, IsEqualElementEdgeFromByKey, _numBuckets, 64, context); _edgesTo = new TRI_EdgeIndexHash_t( HashElementKey, HashElementEdgeTo, IsEqualKeyEdgeTo, IsEqualElementEdge, IsEqualElementEdgeToByKey, _numBuckets, 64, context); } //////////////////////////////////////////////////////////////////////////////// /// @brief create an index stub with a hard-coded selectivity estimate /// this is used in the cluster coordinator case //////////////////////////////////////////////////////////////////////////////// EdgeIndex::EdgeIndex(VPackSlice const& slice) : Index(slice), _edgesFrom(nullptr), _edgesTo(nullptr), _numBuckets(1) {} EdgeIndex::~EdgeIndex() { delete _edgesTo; delete _edgesFrom; } void EdgeIndex::buildSearchValue(TRI_edge_direction_e dir, std::string const& id, VPackBuilder& builder) { builder.openArray(); switch (dir) { case TRI_EDGE_OUT: builder.openArray(); builder.openObject(); builder.add(TRI_SLICE_KEY_EQUAL, VPackValue(id)); builder.close(); builder.close(); builder.add(VPackValue(VPackValueType::Null)); break; case TRI_EDGE_IN: builder.add(VPackValue(VPackValueType::Null)); builder.openArray(); builder.openObject(); builder.add(TRI_SLICE_KEY_EQUAL, VPackValue(id)); builder.close(); builder.close(); break; case TRI_EDGE_ANY: builder.openArray(); builder.openObject(); builder.add(TRI_SLICE_KEY_EQUAL, VPackValue(id)); builder.close(); builder.close(); builder.openArray(); builder.openObject(); builder.add(TRI_SLICE_KEY_EQUAL, VPackValue(id)); builder.close(); builder.close(); } builder.close(); } void EdgeIndex::buildSearchValueFromArray(TRI_edge_direction_e dir, VPackSlice const ids, VPackBuilder& builder) { TRI_ASSERT(ids.isArray()); builder.openArray(); switch (dir) { case TRI_EDGE_OUT: builder.openArray(); for (auto const& id : VPackArrayIterator(ids)) { if (id.isString()) { builder.openObject(); builder.add(TRI_SLICE_KEY_EQUAL, id); builder.close(); } } builder.close(); builder.add(VPackValue(VPackValueType::Null)); break; case TRI_EDGE_IN: builder.add(VPackValue(VPackValueType::Null)); builder.openArray(); for (auto const& id : VPackArrayIterator(ids)) { if (id.isString()) { builder.openObject(); builder.add(TRI_SLICE_KEY_EQUAL, id); builder.close(); } } builder.close(); break; case TRI_EDGE_ANY: builder.openArray(); for (auto const& id : VPackArrayIterator(ids)) { if (id.isString()) { builder.openObject(); builder.add(TRI_SLICE_KEY_EQUAL, id); builder.close(); } } builder.close(); builder.openArray(); for (auto const& id : VPackArrayIterator(ids)) { if (id.isString()) { builder.openObject(); builder.add(TRI_SLICE_KEY_EQUAL, id); builder.close(); } } builder.close(); } builder.close(); } //////////////////////////////////////////////////////////////////////////////// /// @brief return a selectivity estimate for the index //////////////////////////////////////////////////////////////////////////////// double EdgeIndex::selectivityEstimate() const { if (_edgesFrom == nullptr || _edgesTo == nullptr) { // use hard-coded selectivity estimate in case of cluster coordinator return _selectivityEstimate; } // return average selectivity of the two index parts double estimate = (_edgesFrom->selectivity() + _edgesTo->selectivity()) * 0.5; TRI_ASSERT(estimate >= 0.0 && estimate <= 1.00001); // floating-point tolerance return estimate; } //////////////////////////////////////////////////////////////////////////////// /// @brief return the memory usage for the index //////////////////////////////////////////////////////////////////////////////// size_t EdgeIndex::memory() const { TRI_ASSERT(_edgesFrom != nullptr); TRI_ASSERT(_edgesTo != nullptr); return _edgesFrom->memoryUsage() + _edgesTo->memoryUsage(); } //////////////////////////////////////////////////////////////////////////////// /// @brief return a VelocyPack representation of the index //////////////////////////////////////////////////////////////////////////////// void EdgeIndex::toVelocyPack(VPackBuilder& builder, bool withFigures) const { Index::toVelocyPack(builder, withFigures); // hard-coded builder.add("unique", VPackValue(false)); builder.add("sparse", VPackValue(false)); } //////////////////////////////////////////////////////////////////////////////// /// @brief return a VelocyPack representation of the index figures //////////////////////////////////////////////////////////////////////////////// void EdgeIndex::toVelocyPackFigures(VPackBuilder& builder) const { Index::toVelocyPackFigures(builder); builder.add("buckets", VPackValue(_numBuckets)); } int EdgeIndex::insert(arangodb::Transaction* trx, TRI_doc_mptr_t const* doc, bool isRollback) { auto element = const_cast(doc); _edgesFrom->insert(trx, element, true, isRollback); try { _edgesTo->insert(trx, element, true, isRollback); } catch (...) { _edgesFrom->remove(trx, element); throw; } return TRI_ERROR_NO_ERROR; } int EdgeIndex::remove(arangodb::Transaction* trx, TRI_doc_mptr_t const* doc, bool) { _edgesFrom->remove(trx, doc); _edgesTo->remove(trx, doc); return TRI_ERROR_NO_ERROR; } int EdgeIndex::batchInsert(arangodb::Transaction* trx, std::vector const* documents, size_t numThreads) { _edgesFrom->batchInsert( trx, reinterpret_cast const*>(documents), numThreads); _edgesTo->batchInsert( trx, reinterpret_cast const*>(documents), numThreads); return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief provides a size hint for the edge index //////////////////////////////////////////////////////////////////////////////// int EdgeIndex::sizeHint(arangodb::Transaction* trx, size_t size) { // we assume this is called when setting up the index and the index // is still empty TRI_ASSERT(_edgesFrom->size() == 0); // set an initial size for the index for some new nodes to be created // without resizing int err = _edgesFrom->resize(trx, static_cast(size + 2049)); if (err != TRI_ERROR_NO_ERROR) { return err; } // we assume this is called when setting up the index and the index // is still empty TRI_ASSERT(_edgesTo->size() == 0); // set an initial size for the index for some new nodes to be created // without resizing return _edgesTo->resize(trx, static_cast(size + 2049)); } //////////////////////////////////////////////////////////////////////////////// /// @brief checks whether the index supports the condition //////////////////////////////////////////////////////////////////////////////// bool EdgeIndex::supportsFilterCondition( arangodb::aql::AstNode const* node, arangodb::aql::Variable const* reference, size_t itemsInIndex, size_t& estimatedItems, double& estimatedCost) const { SimpleAttributeEqualityMatcher matcher( {{arangodb::basics::AttributeName(TRI_VOC_ATTRIBUTE_FROM, false)}, {arangodb::basics::AttributeName(TRI_VOC_ATTRIBUTE_TO, false)}}); return matcher.matchOne(this, node, reference, itemsInIndex, estimatedItems, estimatedCost); } //////////////////////////////////////////////////////////////////////////////// /// @brief creates an IndexIterator for the given Condition //////////////////////////////////////////////////////////////////////////////// IndexIterator* EdgeIndex::iteratorForCondition( arangodb::Transaction* trx, IndexIteratorContext* context, arangodb::aql::Ast* ast, arangodb::aql::AstNode const* node, arangodb::aql::Variable const* reference, bool reverse) const { TRI_ASSERT(node->type == aql::NODE_TYPE_OPERATOR_NARY_AND); SimpleAttributeEqualityMatcher matcher( {{arangodb::basics::AttributeName(TRI_VOC_ATTRIBUTE_FROM, false)}, {arangodb::basics::AttributeName(TRI_VOC_ATTRIBUTE_TO, false)}}); TRI_ASSERT(node->numMembers() == 1); auto comp = node->getMember(0); // assume a.b == value auto attrNode = comp->getMember(0); auto valNode = comp->getMember(1); if (attrNode->type != aql::NODE_TYPE_ATTRIBUTE_ACCESS) { // got value == a.b -> flip sides attrNode = comp->getMember(1); valNode = comp->getMember(0); } TRI_ASSERT(attrNode->type == aql::NODE_TYPE_ATTRIBUTE_ACCESS); if (comp->type == aql::NODE_TYPE_OPERATOR_BINARY_EQ) { // a.b == value return createIterator( trx, context, attrNode, std::vector({valNode})); } if (comp->type == aql::NODE_TYPE_OPERATOR_BINARY_IN) { // a.b IN values if (!valNode->isArray()) { return nullptr; } std::vector valNodes; size_t const n = valNode->numMembers(); valNodes.reserve(n); for (size_t i = 0; i < n; ++i) { valNodes.emplace_back(valNode->getMemberUnchecked(i)); TRI_IF_FAILURE("EdgeIndex::iteratorValNodes") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } } return createIterator(trx, context, attrNode, valNodes); } // operator type unsupported return nullptr; } //////////////////////////////////////////////////////////////////////////////// /// @brief specializes the condition for use with the index //////////////////////////////////////////////////////////////////////////////// arangodb::aql::AstNode* EdgeIndex::specializeCondition( arangodb::aql::AstNode* node, arangodb::aql::Variable const* reference) const { SimpleAttributeEqualityMatcher matcher( {{arangodb::basics::AttributeName(TRI_VOC_ATTRIBUTE_FROM, false)}, {arangodb::basics::AttributeName(TRI_VOC_ATTRIBUTE_TO, false)}}); return matcher.specializeOne(this, node, reference); } ////////////////////////////////////////////////////////////////////////////// /// @brief Transform the list of search slices to search values. /// This will multiply all IN entries and simply return all other /// entries. ////////////////////////////////////////////////////////////////////////////// void EdgeIndex::expandInSearchValues(VPackSlice const slice, VPackBuilder& builder) const { TRI_ASSERT(slice.isArray()); builder.openArray(); for (auto const& side : VPackArrayIterator(slice)) { if (side.isNull()) { builder.add(side); } else { TRI_ASSERT(side.isArray()); builder.openArray(); for (auto const& item : VPackArrayIterator(side)) { TRI_ASSERT(item.isObject()); if (item.hasKey(TRI_SLICE_KEY_EQUAL)) { TRI_ASSERT(!item.hasKey(TRI_SLICE_KEY_IN)); builder.add(item); } else { TRI_ASSERT(item.hasKey(TRI_SLICE_KEY_IN)); VPackSlice list = item.get(TRI_SLICE_KEY_IN); TRI_ASSERT(list.isArray()); for (auto const& it : VPackArrayIterator(list)) { builder.openObject(); builder.add(TRI_SLICE_KEY_EQUAL, it); builder.close(); } } } builder.close(); } } builder.close(); } //////////////////////////////////////////////////////////////////////////////// /// @brief creates an IndexIterator for the given VelocyPackSlices. /// The searchValue is a an Array with exactly two Entries. /// If the first is set it means we are searching for _from (OUTBOUND), /// if the second is set we are searching for _to (INBOUND). /// if both are set we are search for ANY direction. Result is made /// DISTINCT. /// Each defined slice that is set has to be list of keys to search for. /// Each key needs to have the following formats: /// /// 1) {"eq": } // The value in index is exactly this /// /// Reverse is not supported, hence ignored /// NOTE: The iterator is only valid as long as the slice points to /// a valid memory region. //////////////////////////////////////////////////////////////////////////////// IndexIterator* EdgeIndex::iteratorForSlice( arangodb::Transaction* trx, IndexIteratorContext*, arangodb::velocypack::Slice const searchValues, bool) const { if (!searchValues.isArray() || searchValues.length() != 2) { // Invalid searchValue return nullptr; } VPackSlice const from = searchValues.at(0); VPackSlice const to = searchValues.at(1); if (!from.isNull()) { TRI_ASSERT(from.isArray()); if (!to.isNull()) { // ANY search TRI_ASSERT(to.isArray()); auto left = std::make_unique(trx, _edgesFrom, from); auto right = std::make_unique(trx, _edgesTo, to); return new AnyDirectionEdgeIndexIterator(left.release(), right.release()); } // OUTBOUND search TRI_ASSERT(to.isNull()); return new EdgeIndexIterator(trx, _edgesFrom, from); } else { // INBOUND search TRI_ASSERT(to.isArray()); return new EdgeIndexIterator(trx, _edgesTo, to); } } //////////////////////////////////////////////////////////////////////////////// /// @brief create the iterator //////////////////////////////////////////////////////////////////////////////// IndexIterator* EdgeIndex::createIterator( arangodb::Transaction* trx, IndexIteratorContext* context, arangodb::aql::AstNode const* attrNode, std::vector const& valNodes) const { // only leave the valid elements in the vector VPackBuilder keys; keys.openArray(); for (auto const& valNode : valNodes) { if (!valNode->isStringValue()) { continue; } if (valNode->getStringLength() == 0) { continue; } keys.openObject(); keys.add(TRI_SLICE_KEY_EQUAL, VPackValue(valNode->getStringValue())); keys.close(); TRI_IF_FAILURE("EdgeIndex::collectKeys") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } } TRI_IF_FAILURE("EdgeIndex::noIterator") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } keys.close(); // _from or _to? bool const isFrom = (strcmp(attrNode->getStringValue(), TRI_VOC_ATTRIBUTE_FROM) == 0); return new EdgeIndexIterator(trx, isFrom ? _edgesFrom : _edgesTo, std::move(keys)); }