diff --git a/arangod/RocksDBEngine/CMakeLists.txt b/arangod/RocksDBEngine/CMakeLists.txt index 2ffe44cf55..8cdc24ef94 100644 --- a/arangod/RocksDBEngine/CMakeLists.txt +++ b/arangod/RocksDBEngine/CMakeLists.txt @@ -10,6 +10,7 @@ set(ROCKSDB_SOURCES RocksDBEngine/RocksDBIndex.cpp RocksDBEngine/RocksDBIndexFactory.cpp RocksDBEngine/RocksDBKey.cpp + RocksDBEngine/RocksDBPathBasedIndex.cpp RocksDBEngine/RocksDBPrimaryIndex.cpp RocksDBEngine/RocksDBPrimaryMockIndex.cpp RocksDBEngine/RocksDBTransactionCollection.cpp diff --git a/arangod/RocksDBEngine/RocksDBEdgeIndex.cpp b/arangod/RocksDBEngine/RocksDBEdgeIndex.cpp index e4d64d41e4..1edc9131de 100644 --- a/arangod/RocksDBEngine/RocksDBEdgeIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBEdgeIndex.cpp @@ -99,7 +99,8 @@ bool RocksDBEdgeIndexIterator::next(TokenCallback const& cb, size_t limit) { RocksDBKey prefix = RocksDBKey::EdgeIndexPrefix(_index->_objectId, fromTo.copyString()); - std::unique_ptr iter(rtrx->GetIterator(state->readOptions())); + std::unique_ptr iter( + rtrx->GetIterator(state->readOptions())); rocksdb::Slice rSlice(prefix.string()); iter->Seek(rSlice); diff --git a/arangod/RocksDBEngine/RocksDBPathBasedIndex.cpp b/arangod/RocksDBEngine/RocksDBPathBasedIndex.cpp new file mode 100644 index 0000000000..e5f78739f1 --- /dev/null +++ b/arangod/RocksDBEngine/RocksDBPathBasedIndex.cpp @@ -0,0 +1,365 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Jan Steemann +//////////////////////////////////////////////////////////////////////////////// + +#include "RocksDBPathBasedIndex.h" +#include "Aql/AstNode.h" +#include "Basics/FixedSizeAllocator.h" +#include "Basics/VelocyPackHelper.h" +#include "Logger/Logger.h" +#include "MMFiles/MMFilesIndexElement.h" + +#include +#include + +using namespace arangodb; + +/// @brief the _key attribute, which, when used in an index, will implictly make it unique +static std::vector const KeyAttribute + {arangodb::basics::AttributeName("_key", false)}; + +arangodb::aql::AstNode const* MMFilesPathBasedIndex::PermutationState::getValue() + const { + if (type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ) { + TRI_ASSERT(current == 0); + return value; + } else if (type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) { + TRI_ASSERT(n > 0); + TRI_ASSERT(current < n); + return value->getMember(current); + } + + TRI_ASSERT(false); + return nullptr; +} + +/// @brief create the index +MMFilesPathBasedIndex::MMFilesPathBasedIndex(TRI_idx_iid_t iid, + arangodb::LogicalCollection* collection, + VPackSlice const& info, size_t baseSize, bool allowPartialIndex) + : Index(iid, collection, info), + _useExpansion(false), + _allowPartialIndex(allowPartialIndex) { + TRI_ASSERT(!_fields.empty()); + + TRI_ASSERT(iid != 0); + + fillPaths(_paths, _expanding); + + for (auto const& it : _fields) { + if (TRI_AttributeNamesHaveExpansion(it)) { + _useExpansion = true; + break; + } + } + + _allocator.reset(new FixedSizeAllocator(baseSize + sizeof(MMFilesIndexElementValue) * numPaths())); +} + +/// @brief destroy the index +MMFilesPathBasedIndex::~MMFilesPathBasedIndex() { + _allocator->deallocateAll(); +} + +/// @brief whether or not the index is implicitly unique +/// this can be the case if the index is not declared as unique, but contains a +/// unique attribute such as _key +bool MMFilesPathBasedIndex::implicitlyUnique() const { + if (_unique) { + // a unique index is always unique + return true; + } + if (_useExpansion) { + // when an expansion such as a[*] is used, the index may not be unique, even + // if it contains attributes that are guaranteed to be unique + return false; + } + + for (auto const& it : _fields) { + // if _key is contained in the index fields definition, then the index is + // implicitly unique + if (it == KeyAttribute) { + return true; + } + } + + // _key not contained + return false; +} + +/// @brief helper function to insert a document into any index type +template +int MMFilesPathBasedIndex::fillElement(std::vector& elements, + TRI_voc_rid_t revisionId, + VPackSlice const& doc) { + if (doc.isNone()) { + LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "encountered invalid marker with slice of type None"; + return TRI_ERROR_INTERNAL; + } + + TRI_IF_FAILURE("FillElementIllegalSlice") { return TRI_ERROR_INTERNAL; } + + size_t const n = _paths.size(); + + if (!_useExpansion) { + // fast path for inserts... no array elements used + auto slices = buildIndexValue(doc); + + if (slices.size() == n) { + // if slices.size() != n, then the value is not inserted into the index + // because of index sparsity! + T* element = static_cast(_allocator->allocate()); + TRI_ASSERT(element != nullptr); + element = T::initialize(element, revisionId, slices); + + if (element == nullptr) { + return TRI_ERROR_OUT_OF_MEMORY; + } + TRI_IF_FAILURE("FillElementOOM") { + // clean up manually + _allocator->deallocate(element); + return TRI_ERROR_OUT_OF_MEMORY; + } + + try { + TRI_IF_FAILURE("FillElementOOM2") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); + } + + elements.emplace_back(element); + } catch (...) { + _allocator->deallocate(element); + return TRI_ERROR_OUT_OF_MEMORY; + } + } + } else { + // other path for handling array elements, too + std::vector>> toInsert; + std::vector> sliceStack; + + buildIndexValues(doc, 0, toInsert, sliceStack); + + if (!toInsert.empty()) { + elements.reserve(toInsert.size()); + + for (auto& info : toInsert) { + TRI_ASSERT(info.size() == n); + T* element = static_cast(_allocator->allocate()); + TRI_ASSERT(element != nullptr); + element = T::initialize(element, revisionId, info); + + if (element == nullptr) { + return TRI_ERROR_OUT_OF_MEMORY; + } + TRI_IF_FAILURE("FillElementOOM") { + // clean up manually + _allocator->deallocate(element); + return TRI_ERROR_OUT_OF_MEMORY; + } + + try { + TRI_IF_FAILURE("FillElementOOM2") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY); + } + + elements.emplace_back(element); + } catch (...) { + _allocator->deallocate(element); + return TRI_ERROR_OUT_OF_MEMORY; + } + } + } + } + + return TRI_ERROR_NO_ERROR; +} + +/// @brief helper function to create the sole index value insert +std::vector> MMFilesPathBasedIndex::buildIndexValue( + VPackSlice const documentSlice) { + size_t const n = _paths.size(); + + std::vector> result; + for (size_t i = 0; i < n; ++i) { + TRI_ASSERT(!_paths[i].empty()); + + VPackSlice slice = documentSlice.get(_paths[i]); + if (slice.isNone() || slice.isNull()) { + // attribute not found + if (_sparse) { + // if sparse we do not have to index, this is indicated by result + // being shorter than n + result.clear(); + break; + } + // null, note that this will be copied later! + result.emplace_back(arangodb::basics::VelocyPackHelper::NullValue(), 0); // fake offset 0 + } else { + result.emplace_back(slice, static_cast(slice.start() - documentSlice.start())); + } + } + return result; +} + +/// @brief helper function to create a set of index combinations to insert +void MMFilesPathBasedIndex::buildIndexValues( + VPackSlice const document, size_t level, + std::vector>>& toInsert, + std::vector>& sliceStack) { + // Invariant: level == sliceStack.size() + + // Stop the recursion: + if (level == _paths.size()) { + toInsert.push_back(sliceStack); + return; + } + + if (_expanding[level] == -1) { // the trivial, non-expanding case + VPackSlice slice = document.get(_paths[level]); + if (slice.isNone() || slice.isNull()) { + if (_sparse) { + return; + } + sliceStack.emplace_back(arangodb::basics::VelocyPackHelper::NullValue(), 0); + } else { + sliceStack.emplace_back(slice, static_cast(slice.start() - document.start())); + } + buildIndexValues(document, level + 1, toInsert, sliceStack); + sliceStack.pop_back(); + return; + } + + // Finally, the complex case, where we have to expand one entry. + // Note again that at most one step in the attribute path can be + // an array step. Furthermore, if _allowPartialIndex is true and + // anything goes wrong with this attribute path, we have to bottom out + // with None values to be able to use the index for a prefix match. + + // Trivial case to bottom out with Illegal types. + VPackSlice illegalSlice = arangodb::basics::VelocyPackHelper::IllegalValue(); + + auto finishWithNones = [&]() -> void { + if (!_allowPartialIndex || level == 0) { + return; + } + for (size_t i = level; i < _paths.size(); i++) { + sliceStack.emplace_back(illegalSlice, 0); + } + toInsert.push_back(sliceStack); + for (size_t i = level; i < _paths.size(); i++) { + sliceStack.pop_back(); + } + }; + size_t const n = _paths[level].size(); + // We have 0 <= _expanding[level] < n. + VPackSlice current(document); + for (size_t i = 0; i <= static_cast(_expanding[level]); i++) { + if (!current.isObject()) { + finishWithNones(); + return; + } + current = current.get(_paths[level][i]); + if (current.isNone()) { + finishWithNones(); + return; + } + } + // Now the expansion: + if (!current.isArray() || current.length() == 0) { + finishWithNones(); + return; + } + + std::unordered_set + seen(2, arangodb::basics::VelocyPackHelper::VPackHash(), + arangodb::basics::VelocyPackHelper::VPackEqual()); + + auto moveOn = [&](VPackSlice something) -> void { + auto it = seen.find(something); + if (it == seen.end()) { + seen.insert(something); + sliceStack.emplace_back(something, static_cast(something.start() - document.start())); + buildIndexValues(document, level + 1, toInsert, sliceStack); + sliceStack.pop_back(); + } + }; + for (auto const& member : VPackArrayIterator(current)) { + VPackSlice current2(member); + bool doneNull = false; + for (size_t i = _expanding[level] + 1; i < n; i++) { + if (!current2.isObject()) { + if (!_sparse) { + moveOn(arangodb::basics::VelocyPackHelper::NullValue()); + } + doneNull = true; + break; + } + current2 = current2.get(_paths[level][i]); + if (current2.isNone()) { + if (!_sparse) { + moveOn(arangodb::basics::VelocyPackHelper::NullValue()); + } + doneNull = true; + break; + } + } + if (!doneNull) { + moveOn(current2); + } + // Finally, if, because of sparsity, we have not inserted anything by now, + // we need to play the above trick with None because of the above mentioned + // reasons: + if (seen.empty()) { + finishWithNones(); + } + } +} + +/// @brief helper function to transform AttributeNames into strings. +void MMFilesPathBasedIndex::fillPaths(std::vector>& paths, + std::vector& expanding) { + paths.clear(); + expanding.clear(); + for (std::vector const& list : _fields) { + paths.emplace_back(); + std::vector& interior(paths.back()); + int expands = -1; + int count = 0; + for (auto const& att : list) { + interior.emplace_back(att.name); + if (att.shouldExpand) { + expands = count; + } + ++count; + } + expanding.emplace_back(expands); + } +} + +// template instanciations for fillElement +template +int MMFilesPathBasedIndex::fillElement(std::vector& elements, TRI_voc_rid_t revisionId, VPackSlice const& doc); + +template +int MMFilesPathBasedIndex::fillElement(std::vector& elements, TRI_voc_rid_t revisionId, VPackSlice const& doc); diff --git a/arangod/RocksDBEngine/RocksDBPathBasedIndex.h b/arangod/RocksDBEngine/RocksDBPathBasedIndex.h new file mode 100644 index 0000000000..02f4256b75 --- /dev/null +++ b/arangod/RocksDBEngine/RocksDBPathBasedIndex.h @@ -0,0 +1,127 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Jan Steemann +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_MMFILES_PATH_BASED_INDEX_H +#define ARANGOD_MMFILES_PATH_BASED_INDEX_H 1 + +#include "Basics/Common.h" +#include "Indexes/Index.h" +#include "VocBase/vocbase.h" +#include "VocBase/voc-types.h" +#include "RocksDBEngine/RocksDBIndex.h" + +namespace arangodb { +namespace aql { +enum AstNodeType : uint32_t; +} + +class FixedSizeAllocator; + +class RocksDBPathBasedIndex : public RocksDBIndex { + protected: + struct PermutationState { + PermutationState(arangodb::aql::AstNodeType type, + arangodb::aql::AstNode const* value, + size_t attributePosition, size_t n) + : type(type), + value(value), + attributePosition(attributePosition), + current(0), + n(n) { + TRI_ASSERT(n > 0); + } + + arangodb::aql::AstNode const* getValue() const; + + arangodb::aql::AstNodeType type; + arangodb::aql::AstNode const* value; + size_t const attributePosition; + size_t current; + size_t const n; + }; + + public: + RocksDBPathBasedIndex() = delete; + + RocksDBPathBasedIndex(TRI_idx_iid_t, arangodb::LogicalCollection*, + arangodb::velocypack::Slice const&, size_t baseSize, bool allowPartialIndex); + + ~RocksDBPathBasedIndex(); + + public: + + /// @brief return the attribute paths + std::vector> const& paths() + const { + return _paths; + } + + /// @brief return the attribute paths, a -1 entry means none is expanding, + /// otherwise the non-negative number is the index of the expanding one. + std::vector const& expanding() const { + return _expanding; + } + + bool implicitlyUnique() const override; + + protected: + /// @brief helper function to insert a document into any index type + template + int fillElement(std::vector& elements, + TRI_voc_rid_t revisionId, arangodb::velocypack::Slice const&); + + /// @brief return the number of paths + inline size_t numPaths() const { return _paths.size(); } + + private: + + /// @brief helper function to transform AttributeNames into string lists + void fillPaths(std::vector>& paths, + std::vector& expanding); + + /// @brief helper function to create a set of index combinations to insert + std::vector> buildIndexValue(VPackSlice const documentSlice); + + /// @brief helper function to create a set of index combinations to insert + void buildIndexValues(VPackSlice const document, size_t level, + std::vector>>& toInsert, + std::vector>& sliceStack); + + protected: + std::unique_ptr _allocator; + + /// @brief the attribute paths + std::vector> _paths; + + /// @brief ... and which of them expands + std::vector _expanding; + + /// @brief whether or not at least one attribute is expanded + bool _useExpansion; + + /// @brief whether or not partial indexing is allowed + bool _allowPartialIndex; +}; +} + +#endif