From b285cd26a32d53c091486cbb99a78cdac8d06dfa Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Wed, 6 Jul 2016 22:42:46 +0200 Subject: [PATCH 1/3] Internal cleanup of TraversalOptions. Now the options know the the expressions not the traverser any more. Preparation to use other indexes --- arangod/Aql/TraversalBlock.cpp | 6 +- arangod/Cluster/ClusterTraverser.cpp | 24 ++++---- arangod/Cluster/ClusterTraverser.h | 6 +- arangod/VocBase/SingleServerTraverser.cpp | 31 +++++------ arangod/VocBase/SingleServerTraverser.h | 8 +-- arangod/VocBase/Traverser.h | 67 ++++++++++++----------- 6 files changed, 68 insertions(+), 74 deletions(-) diff --git a/arangod/Aql/TraversalBlock.cpp b/arangod/Aql/TraversalBlock.cpp index d0877d5042..c44a0c18a8 100644 --- a/arangod/Aql/TraversalBlock.cpp +++ b/arangod/Aql/TraversalBlock.cpp @@ -53,7 +53,7 @@ TraversalBlock::TraversalBlock(ExecutionEngine* engine, TraversalNode const* ep) _pathReg(0), _expressions(ep->expressions()), _hasV8Expression(false) { - arangodb::traverser::TraverserOptions opts(_trx); + arangodb::traverser::TraverserOptions opts(_trx, _expressions); ep->fillTraversalOptions(opts); auto ast = ep->_plan->getAst(); @@ -91,10 +91,10 @@ TraversalBlock::TraversalBlock(ExecutionEngine* engine, TraversalNode const* ep) _traverser.reset(new arangodb::traverser::ClusterTraverser( ep->edgeColls(), opts, std::string(_trx->vocbase()->_name, strlen(_trx->vocbase()->_name)), - _trx, _expressions)); + _trx)); } else { _traverser.reset( - new arangodb::traverser::SingleServerTraverser(opts, _trx, _expressions)); + new arangodb::traverser::SingleServerTraverser(opts, _trx)); } if (!ep->usesInVariable()) { _vertexId = ep->getStartVertex(); diff --git a/arangod/Cluster/ClusterTraverser.cpp b/arangod/Cluster/ClusterTraverser.cpp index 9726fc4a5b..78f326589a 100644 --- a/arangod/Cluster/ClusterTraverser.cpp +++ b/arangod/Cluster/ClusterTraverser.cpp @@ -47,8 +47,8 @@ bool ClusterTraverser::VertexGetter::getVertex(std::string const& edgeId, std::string to = slice.get(StaticStrings::ToString).copyString(); result = std::move(to); } - auto exp = _traverser->_expressions->find(depth); - if (exp != _traverser->_expressions->end()) { + auto exp = _traverser->_opts.expressions->find(depth); + if (exp != _traverser->_opts.expressions->end()) { auto v = _traverser->_vertices.find(result); if (v == _traverser->_vertices.end()) { // If the vertex ist not in list it means it has not passed any @@ -95,8 +95,8 @@ bool ClusterTraverser::UniqueVertexGetter::getVertex( return false; } - auto exp = _traverser->_expressions->find(depth); - if (exp != _traverser->_expressions->end()) { + auto exp = _traverser->_opts.expressions->find(depth); + if (exp != _traverser->_opts.expressions->end()) { auto v = _traverser->_vertices.find(result); if (v == _traverser->_vertices.end()) { // If the vertex ist not in list it means it has not passed any @@ -137,8 +137,8 @@ void ClusterTraverser::ClusterEdgeGetter::getEdge( // We have to request the next level arangodb::GeneralResponse::ResponseCode responseCode; std::vector expEdges; - auto found = _traverser->_expressions->find(depth); - if (found != _traverser->_expressions->end()) { + auto found = _traverser->_opts.expressions->find(depth); + if (found != _traverser->_opts.expressions->end()) { expEdges = found->second; } @@ -254,8 +254,8 @@ void ClusterTraverser::ClusterEdgeGetter::getAllEdges( TRI_edge_direction_e dir; size_t eColIdx = 0; std::vector expEdges; - auto found = _traverser->_expressions->find(depth); - if (found != _traverser->_expressions->end()) { + auto found = _traverser->_opts.expressions->find(depth); + if (found != _traverser->_opts.expressions->end()) { expEdges = found->second; } @@ -348,8 +348,8 @@ void ClusterTraverser::setStartVertex(std::string const& id) { } } - auto exp = _expressions->find(0); - if (exp != _expressions->end() && + auto exp = _opts.expressions->find(0); + if (exp != _opts.expressions->end() && !vertexMatchesCondition(VPackSlice(it->second->data()), exp->second)) { // We can stop here. The start vertex does not match condition _done = true; @@ -378,8 +378,8 @@ void ClusterTraverser::fetchVertices(std::unordered_set& verticesTo _readDocuments += verticesToFetch.size(); std::vector expVertices; - auto found = _expressions->find(depth); - if (found != _expressions->end()) { + auto found = _opts.expressions->find(depth); + if (found != _opts.expressions->end()) { expVertices = found->second; } diff --git a/arangod/Cluster/ClusterTraverser.h b/arangod/Cluster/ClusterTraverser.h index c4dbfcec0a..e2cf495f21 100644 --- a/arangod/Cluster/ClusterTraverser.h +++ b/arangod/Cluster/ClusterTraverser.h @@ -40,10 +40,8 @@ class ClusterTraverser final : public Traverser { public: ClusterTraverser( std::vector edgeCollections, TraverserOptions& opts, - std::string const& dbname, Transaction* trx, - std::unordered_map> const* - expressions) - : Traverser(opts, expressions), + std::string const& dbname, Transaction* trx) + : Traverser(opts), _edgeCols(edgeCollections), _dbname(dbname), _trx(trx) { diff --git a/arangod/VocBase/SingleServerTraverser.cpp b/arangod/VocBase/SingleServerTraverser.cpp index 2cdd2e3bbd..14c6f177cc 100644 --- a/arangod/VocBase/SingleServerTraverser.cpp +++ b/arangod/VocBase/SingleServerTraverser.cpp @@ -56,12 +56,9 @@ static int FetchDocumentById(arangodb::Transaction* trx, return res; } -SingleServerTraverser::SingleServerTraverser( - TraverserOptions& opts, arangodb::Transaction* trx, - std::unordered_map> const* - expressions) - : Traverser(opts, expressions), _trx(trx) { - +SingleServerTraverser::SingleServerTraverser(TraverserOptions& opts, + arangodb::Transaction* trx) + : Traverser(opts), _trx(trx) { _edgeGetter = std::make_unique(this, opts, trx); if (opts.uniqueVertices == TraverserOptions::UniquenessLevel::GLOBAL) { _vertexGetter = std::make_unique(this); @@ -73,11 +70,11 @@ SingleServerTraverser::SingleServerTraverser( SingleServerTraverser::~SingleServerTraverser() {} bool SingleServerTraverser::edgeMatchesConditions(VPackSlice e, size_t depth) { - if (_hasEdgeConditions) { - TRI_ASSERT(_expressions != nullptr); - auto it = _expressions->find(depth); + if (_opts.hasEdgeConditions) { + TRI_ASSERT(_opts.expressions != nullptr); + auto it = _opts.expressions->find(depth); - if (it != _expressions->end()) { + if (it != _opts.expressions->end()) { for (auto const& exp : it->second) { TRI_ASSERT(exp != nullptr); @@ -93,11 +90,11 @@ bool SingleServerTraverser::edgeMatchesConditions(VPackSlice e, size_t depth) { bool SingleServerTraverser::vertexMatchesConditions(std::string const& v, size_t depth) { - if (_hasVertexConditions) { - TRI_ASSERT(_expressions != nullptr); - auto it = _expressions->find(depth); + if (_opts.hasVertexConditions) { + TRI_ASSERT(_opts.expressions != nullptr); + auto it = _opts.expressions->find(depth); - if (it != _expressions->end()) { + if (it != _opts.expressions->end()) { bool fetchVertex = true; aql::AqlValue vertex; for (auto const& exp : it->second) { @@ -229,11 +226,11 @@ void SingleServerTraverser::UniqueVertexGetter::reset(std::string const& startVe void SingleServerTraverser::setStartVertex(std::string const& v) { _pruneNext = false; - TRI_ASSERT(_expressions != nullptr); + TRI_ASSERT(_opts.expressions != nullptr); - auto it = _expressions->find(0); + auto it = _opts.expressions->find(0); - if (it != _expressions->end()) { + if (it != _opts.expressions->end()) { if (!it->second.empty()) { TRI_doc_mptr_t vertex; bool fetchVertex = true; diff --git a/arangod/VocBase/SingleServerTraverser.h b/arangod/VocBase/SingleServerTraverser.h index d378663b57..42dfb93e13 100644 --- a/arangod/VocBase/SingleServerTraverser.h +++ b/arangod/VocBase/SingleServerTraverser.h @@ -171,11 +171,9 @@ class SingleServerTraverser final : public Traverser { ////////////////////////////////////////////////////////////////////////////// public: - SingleServerTraverser( - TraverserOptions&, Transaction*, - std::unordered_map> const*); - - ~SingleServerTraverser(); + SingleServerTraverser(TraverserOptions&, Transaction*); + + ~SingleServerTraverser(); ////////////////////////////////////////////////////////////////////////////// /// @brief Reset the traverser to use another start vertex diff --git a/arangod/VocBase/Traverser.h b/arangod/VocBase/Traverser.h index 5bf94be98c..28bdfcba89 100644 --- a/arangod/VocBase/Traverser.h +++ b/arangod/VocBase/Traverser.h @@ -216,13 +216,43 @@ struct TraverserOptions { UniquenessLevel uniqueEdges; - explicit TraverserOptions(arangodb::Transaction* trx) + ////////////////////////////////////////////////////////////////////////////// + /// @brief a vector containing all information for early pruning + ////////////////////////////////////////////////////////////////////////////// + + std::unordered_map> const* expressions; + + ////////////////////////////////////////////////////////////////////////////// + /// @brief whether or not we have valid expressions + ////////////////////////////////////////////////////////////////////////////// + + bool hasEdgeConditions; + + bool hasVertexConditions; + + explicit TraverserOptions( + arangodb::Transaction* trx, + std::unordered_map> const* expr) : _trx(trx), minDepth(1), maxDepth(1), useBreadthFirst(false), uniqueVertices(UniquenessLevel::NONE), - uniqueEdges(UniquenessLevel::PATH) {} + uniqueEdges(UniquenessLevel::PATH), + expressions(expr), + hasEdgeConditions(false), + hasVertexConditions(false) { + TRI_ASSERT(expressions != nullptr); + for (auto& it : *expressions) { + for (auto& it2 : it.second) { + if (it2->isEdgeAccess) { + hasEdgeConditions = true; + } else { + hasVertexConditions = true; + } + } + } + } void setCollections(std::vector const&, TRI_edge_direction_e); void setCollections(std::vector const&, @@ -246,28 +276,13 @@ class Traverser { /// @brief Constructor. This is an abstract only class. ////////////////////////////////////////////////////////////////////////////// - Traverser(TraverserOptions& opts, - std::unordered_map> const* - expressions) + Traverser(TraverserOptions& opts) : _readDocuments(0), _filteredPaths(0), _pruneNext(false), _done(true), - _hasVertexConditions(false), - _hasEdgeConditions(false), - _opts(opts), - _expressions(expressions) { + _opts(opts) { - TRI_ASSERT(_expressions != nullptr); - for (auto& it : *_expressions) { - for (auto& it2 : it.second) { - if (it2->isEdgeAccess) { - _hasEdgeConditions = true; - } else { - _hasVertexConditions = true; - } - } - } } ////////////////////////////////////////////////////////////////////////////// @@ -411,26 +426,12 @@ class Traverser { bool _done; - ////////////////////////////////////////////////////////////////////////////// - /// @brief whether or not we have valid expressions - ////////////////////////////////////////////////////////////////////////////// - - bool _hasVertexConditions; - bool _hasEdgeConditions; - ////////////////////////////////////////////////////////////////////////////// /// @brief options for traversal ////////////////////////////////////////////////////////////////////////////// TraverserOptions _opts; - ////////////////////////////////////////////////////////////////////////////// - /// @brief a vector containing all information for early pruning - ////////////////////////////////////////////////////////////////////////////// - - std::unordered_map> const* - _expressions; - ////////////////////////////////////////////////////////////////////////////// /// @brief Function to fetch the real data of a vertex into an AQLValue ////////////////////////////////////////////////////////////////////////////// From d9e1f418712daae0cbc699d68dad0280bd0f7c23 Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Tue, 12 Jul 2016 14:46:24 +0200 Subject: [PATCH 2/3] Modified the SkiplistIndex lookup s.t. it does not build one large VPackBuilder containing all combinations of search values, but instead has an option to rewrite it in-place and just has to maintain the intervals. --- arangod/Indexes/IndexIterator.h | 22 + arangod/Indexes/SkiplistIndex.cpp | 921 +++++++++++++++++++++++------- arangod/Indexes/SkiplistIndex.h | 247 +++++++- lib/Basics/VelocyPackHelper.h | 52 +- 4 files changed, 1040 insertions(+), 202 deletions(-) diff --git a/arangod/Indexes/IndexIterator.h b/arangod/Indexes/IndexIterator.h index ad7515d6e2..63571d11ac 100644 --- a/arangod/Indexes/IndexIterator.h +++ b/arangod/Indexes/IndexIterator.h @@ -79,6 +79,28 @@ class IndexIterator { virtual void skip(uint64_t count, uint64_t& skipped); }; +//////////////////////////////////////////////////////////////////////////////// +/// @brief Special iterator if the condition cannot have any result +//////////////////////////////////////////////////////////////////////////////// + +class EmptyIndexIterator : public IndexIterator { + public: + EmptyIndexIterator() {} + ~EmptyIndexIterator() {} + + TRI_doc_mptr_t* next() override { + return nullptr; + } + + void nextBabies(std::vector&, size_t) override {} + + void reset() override {} + + void skip(uint64_t, uint64_t& skipped) override { + skipped = 0; + } +}; + //////////////////////////////////////////////////////////////////////////////// /// @brief a wrapper class to iterate over several IndexIterators. /// Each iterator is requested at the index itself. diff --git a/arangod/Indexes/SkiplistIndex.cpp b/arangod/Indexes/SkiplistIndex.cpp index c53aaa3301..0797f78ea6 100644 --- a/arangod/Indexes/SkiplistIndex.cpp +++ b/arangod/Indexes/SkiplistIndex.cpp @@ -115,6 +115,380 @@ static int CompareElementElement(TRI_index_element_t const* left, return arangodb::basics::VelocyPackHelper::compare(l, r, true); } +bool BaseSkiplistLookupBuilder::isEquality() const { + return _isEquality; +} + +VPackSlice const* BaseSkiplistLookupBuilder::getLowerLookup() const { + return &_lowerSlice; +} + +bool BaseSkiplistLookupBuilder::includeLower() const { + return _includeLower; +} + +VPackSlice const* BaseSkiplistLookupBuilder::getUpperLookup() const { + return &_upperSlice; +} + +bool BaseSkiplistLookupBuilder::includeUpper() const { + return _includeUpper; +} + +SkiplistLookupBuilder::SkiplistLookupBuilder( + Transaction* trx, + std::vector>& ops, + arangodb::aql::Variable const* var, bool reverse) : BaseSkiplistLookupBuilder(trx) { + _lowerBuilder->openArray(); + if (ops.empty()) { + // We only use this skiplist to sort. use empty array for lookup + _lowerBuilder->close(); + _lowerSlice = _lowerBuilder->slice(); + _upperSlice = _lowerBuilder->slice(); + return; + } + + auto const& last = ops.back(); + TRI_ASSERT(!last.empty()); + + std::pair> paramPair; + + if (last[0]->type != arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ && + last[0]->type != arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) { + _isEquality = false; + _upperBuilder->openArray(); + for (size_t i = 0; i < ops.size() - 1; ++i) { + auto const& oplist = ops[i]; + TRI_ASSERT(oplist.size() == 1); + auto const& op = oplist[0]; + TRI_ASSERT(op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ || + op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN); + TRI_ASSERT(op->numMembers() == 2); + auto value = op->getMember(0); + if (value->isAttributeAccessForVariable(paramPair) && + paramPair.first == var) { + value = op->getMember(1); + TRI_ASSERT(!(value->isAttributeAccessForVariable(paramPair) && + paramPair.first == var)); + } + value->toVelocyPackValue(*(_lowerBuilder.get())); + value->toVelocyPackValue(*(_upperBuilder.get())); + } + auto const& last = ops.back(); + for (auto const& op : last) { + bool isReverseOrder = true; + TRI_ASSERT(op->numMembers() == 2); + + auto value = op->getMember(0); + if (value->isAttributeAccessForVariable(paramPair) && + paramPair.first == var) { + value = op->getMember(1); + TRI_ASSERT(!(value->isAttributeAccessForVariable(paramPair) && + paramPair.first == var)); + isReverseOrder = false; + } + switch (op->type) { + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT: + if (isReverseOrder) { + _includeLower = false; + } else { + _includeUpper = false; + } + // Fall through intentional + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE: + if (isReverseOrder) { + value->toVelocyPackValue(*(_lowerBuilder.get())); + } else { + value->toVelocyPackValue(*(_upperBuilder.get())); + } + break; + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT: + if (isReverseOrder) { + _includeUpper = false; + } else { + _includeLower = false; + } + // Fall through intentional + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE: + if (isReverseOrder) { + value->toVelocyPackValue(*(_upperBuilder.get())); + } else { + value->toVelocyPackValue(*(_lowerBuilder.get())); + } + break; + default: + TRI_ASSERT(false); + } + } + _lowerBuilder->close(); + _lowerSlice = _lowerBuilder->slice(); + + _upperBuilder->close(); + _upperSlice = _upperBuilder->slice(); + } else { + for (auto const& oplist : ops) { + TRI_ASSERT(oplist.size() == 1); + auto const& op = oplist[0]; + TRI_ASSERT(op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ || + op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN); + TRI_ASSERT(op->numMembers() == 2); + auto value = op->getMember(0); + if (value->isAttributeAccessForVariable(paramPair) && + paramPair.first == var) { + value = op->getMember(1); + TRI_ASSERT(!(value->isAttributeAccessForVariable(paramPair) && + paramPair.first == var)); + } + value->toVelocyPackValue(*(_lowerBuilder.get())); + } + _lowerBuilder->close(); + _lowerSlice = _lowerBuilder->slice(); + _upperSlice = _lowerBuilder->slice(); + } +} + +bool SkiplistLookupBuilder::next() { + // The first search value is created during creation. + // So next is always false. + return false; +} + + +SkiplistInLookupBuilder::SkiplistInLookupBuilder( + Transaction* trx, + std::vector>& ops, + arangodb::aql::Variable const* var, bool reverse) : BaseSkiplistLookupBuilder(trx), _dataBuilder(trx) { + TRI_ASSERT(!ops.empty()); // We certainly do not need IN here + VPackBuilder tmp; + std::set> + unique_set( + (arangodb::basics::VelocyPackHelper::VPackSorted(reverse))); + std::pair> paramPair; + + _dataBuilder->clear(); + _dataBuilder->openArray(); + + // The == and IN part + for (size_t i = 0; i < ops.size() - 1; ++i) { + auto const& oplist = ops[i]; + TRI_ASSERT(oplist.size() == 1); + auto const& op = oplist[0]; + TRI_ASSERT(op->numMembers() == 2); + auto value = op->getMember(0); + bool valueLeft = true; + if (value->isAttributeAccessForVariable(paramPair) && + paramPair.first == var) { + valueLeft = false; + value = op->getMember(1); + TRI_ASSERT(!(value->isAttributeAccessForVariable(paramPair) && + paramPair.first == var)); + } + if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) { + if (valueLeft) { + // Case: value IN x.a + // This is identical to == for the index. + value->toVelocyPackValue(*(_dataBuilder.get())); + } else { + // Case: x.a IN value + TRI_ASSERT(value->numMembers() > 0); + tmp.clear(); + unique_set.clear(); + value->toVelocyPackValue(tmp); + for (auto const& it : VPackArrayIterator(tmp.slice())) { + unique_set.emplace(it); + } + _inPositions.emplace_back(i, 0, unique_set.size()); + _dataBuilder->openArray(); + for (auto const& it : unique_set) { + _dataBuilder->add(it); + } + _dataBuilder->close(); + } + } else { + TRI_ASSERT(op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ); + value->toVelocyPackValue(*(_dataBuilder.get())); + } + } + auto const& last = ops.back(); + arangodb::aql::AstNode const* lower = nullptr; + arangodb::aql::AstNode const* upper = nullptr; + + _isEquality = false; + + for (auto const& op : last) { + bool isReverseOrder = true; + TRI_ASSERT(op->numMembers() == 2); + + auto value = op->getMember(0); + if (value->isAttributeAccessForVariable(paramPair) && + paramPair.first == var) { + value = op->getMember(1); + TRI_ASSERT(!(value->isAttributeAccessForVariable(paramPair) && + paramPair.first == var)); + isReverseOrder = false; + } + switch (op->type) { + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT: + if (isReverseOrder) { + _includeLower = false; + } else { + _includeUpper = false; + } + // Fall through intentional + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE: + if (isReverseOrder) { + TRI_ASSERT(lower == nullptr); + lower = value; + } else { + TRI_ASSERT(upper == nullptr); + upper = value; + } + break; + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT: + if (isReverseOrder) { + _includeUpper = false; + } else { + _includeLower = false; + } + // Fall through intentional + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE: + if (isReverseOrder) { + TRI_ASSERT(upper == nullptr); + upper = value; + } else { + TRI_ASSERT(lower == nullptr); + lower = value; + } + break; + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN: + TRI_ASSERT(upper == nullptr); + TRI_ASSERT(lower == nullptr); + TRI_ASSERT(value->numMembers() > 0); + tmp.clear(); + unique_set.clear(); + value->toVelocyPackValue(tmp); + for (auto const& it : VPackArrayIterator(tmp.slice())) { + unique_set.emplace(it); + } + _inPositions.emplace_back(ops.size() - 1, 0, unique_set.size()); + _dataBuilder->openArray(); + for (auto const& it : unique_set) { + _dataBuilder->add(it); + } + _dataBuilder->close(); + _isEquality = true; + _dataBuilder->close(); + buildSearchValues(); + return; + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ: + TRI_ASSERT(upper == nullptr); + TRI_ASSERT(lower == nullptr); + value->toVelocyPackValue(*(_dataBuilder.get())); + _isEquality = true; + _dataBuilder->close(); + buildSearchValues(); + return; + default: + TRI_ASSERT(false); + } + } + _dataBuilder->openArray(); + if (lower == nullptr) { + _dataBuilder->add(arangodb::basics::VelocyPackHelper::NullValue()); + } else { + lower->toVelocyPackValue(*(_dataBuilder.get())); + } + + if (upper == nullptr) { + _dataBuilder->add(arangodb::basics::VelocyPackHelper::NullValue()); + } else { + upper->toVelocyPackValue(*(_dataBuilder.get())); + } + _dataBuilder->close(); + _dataBuilder->close(); + buildSearchValues(); +} + +bool SkiplistInLookupBuilder::next() { + if (!forwardInPosition()) { + return false; + } + buildSearchValues(); + return true; +} + +bool SkiplistInLookupBuilder::forwardInPosition() { + std::list::reverse_iterator it = _inPositions.rbegin(); + while (it != _inPositions.rend()) { + it->current++; + TRI_ASSERT(it->max > 0); + if (it->current < it->max) { + return true; + // Okay we increased this, next search value; + } + it->current = 0; + it++; + } + // If we get here all positions are reset to 0. + // We are done, no further combination + return false; +} + +void SkiplistInLookupBuilder::buildSearchValues() { + auto inPos = _inPositions.begin(); + _lowerBuilder->clear(); + _lowerBuilder->openArray(); + + VPackSlice data = _dataBuilder->slice(); + if (!_isEquality) { + _upperBuilder->clear(); + _upperBuilder->openArray(); + + for (size_t i = 0; i < data.length() - 1; ++i) { + if (inPos != _inPositions.end() && i == inPos->field) { + _lowerBuilder->add(data.at(i).at(inPos->current)); + _upperBuilder->add(data.at(i).at(inPos->current)); + inPos++; + } else { + _lowerBuilder->add(data.at(i)); + _upperBuilder->add(data.at(i)); + } + } + + VPackSlice bounds = data.at(data.length() - 1); + TRI_ASSERT(bounds.isArray()); + TRI_ASSERT(bounds.length() == 2); + VPackSlice b = bounds.at(0); + if (!b.isNull()) { + _lowerBuilder->add(b); + } + _lowerBuilder->close(); + _lowerSlice = _lowerBuilder->slice(); + + b = bounds.at(1); + if (!b.isNull()) { + _upperBuilder->add(b); + } + + _upperBuilder->close(); + _upperSlice = _upperBuilder->slice(); + } else { + for (size_t i = 0; i < data.length(); ++i) { + if (inPos != _inPositions.end() && i == inPos->field) { + _lowerBuilder->add(data.at(i).at(inPos->current)); + inPos++; + } else { + _lowerBuilder->add(data.at(i)); + } + } + _lowerBuilder->close(); + _lowerSlice = _lowerBuilder->slice(); + _upperSlice = _lowerBuilder->slice(); + } +} + //////////////////////////////////////////////////////////////////////////////// /// @brief Reset the cursor //////////////////////////////////////////////////////////////////////////////// @@ -155,6 +529,164 @@ TRI_doc_mptr_t* SkiplistIterator::next() { return tmp->document()->document(); } + +//////////////////////////////////////////////////////////////////////////////// +/// @brief Checks if the interval is valid. It is declared invalid if +/// one border is nullptr or the right is lower than left. +//////////////////////////////////////////////////////////////////////////////// + +bool SkiplistIterator2::intervalValid(Node* left, Node* right) const { + if (left == nullptr) { + return false; + } + if (right == nullptr) { + return false; + } + if (left == right) { + // Exactly one result. Improve speed on unique indexes + return true; + } + if (_CmpElmElm(left->document(), right->document(), + arangodb::basics::SKIPLIST_CMP_TOTORDER) > 0) { + return false; + } + return true; +} + + + +//////////////////////////////////////////////////////////////////////////////// +/// @brief Reset the cursor +//////////////////////////////////////////////////////////////////////////////// + +void SkiplistIterator2::reset() { + // If _interals is empty at this point + // the cursor does not contain any + // document at all. Reset is pointless + if (!_intervals.empty()) { + // We reset to the first interval and reset the cursor + _currentInterval = 0; + if (_reverse) { + _cursor = _intervals[0].second; + } else { + _cursor = _intervals[0].first; + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// @brief Get the next element in the skiplist +//////////////////////////////////////////////////////////////////////////////// + +TRI_doc_mptr_t* SkiplistIterator2::next() { + if (_cursor == nullptr) { + // We are exhausted already, sorry + return nullptr; + } + TRI_ASSERT(_currentInterval < _intervals.size()); + auto const& interval = _intervals[_currentInterval]; + Node* tmp = _cursor; + if (_reverse) { + if (_cursor == interval.first) { + forwardCursor(); + } else { + _cursor = _cursor->prevNode(); + } + } else { + if (_cursor == interval.second) { + forwardCursor(); + } else { + _cursor = _cursor->nextNode(); + } + } + TRI_ASSERT(tmp != nullptr); + TRI_ASSERT(tmp->document() != nullptr); + return tmp->document()->document(); +} + +void SkiplistIterator2::forwardCursor() { + _currentInterval++; + if (_currentInterval < _intervals.size()) { + auto const& interval = _intervals[_currentInterval]; + if (_reverse) { + _cursor = interval.second; + } else { + _cursor = interval.first; + } + return; + } + _cursor = nullptr; + if (_builder->next()) { + initNextInterval(); + } +} + +void SkiplistIterator2::initNextInterval() { + // We will always point the cursor to the resulting interval if any. + // We do not take responsibility for the Nodes! + Node* rightBorder = nullptr; + Node* leftBorder = nullptr; + while (true) { + if (_builder->isEquality()) { + rightBorder = _skiplistIndex->rightKeyLookup(_builder->getLowerLookup()); + if (rightBorder == _skiplistIndex->startNode()) { + // No matching elements. Next interval + if (!_builder->next()) { + // No next interval. We are done. + return; + } + // Builder moved forward. Try again. + continue; + } + leftBorder = _skiplistIndex->leftKeyLookup(_builder->getLowerLookup()); + leftBorder = leftBorder->nextNode(); + // NOTE: rightBorder < leftBorder => no Match. + // Will be checked by interval valid + } else { + if (_builder->includeLower()) { + leftBorder = _skiplistIndex->leftKeyLookup(_builder->getLowerLookup()); + // leftKeyLookup guarantees that we find the element before search. + } else { + leftBorder = _skiplistIndex->rightKeyLookup(_builder->getLowerLookup()); + // leftBorder is identical or smaller than search + } + // This is the first element not to be returned, but the next one + // Also save for the startNode, it should never be contained in the index. + leftBorder = leftBorder->nextNode(); + + if (_builder->includeUpper()) { + rightBorder = _skiplistIndex->rightKeyLookup(_builder->getUpperLookup()); + } else { + rightBorder = _skiplistIndex->leftKeyLookup(_builder->getUpperLookup()); + } + if (rightBorder == _skiplistIndex->startNode()) { + // No match make interval invalid + rightBorder = nullptr; + } + // else rightBorder is correct + } + if (!intervalValid(leftBorder, rightBorder)) { + // No matching elements. Next interval + if (!_builder->next()) { + // No next interval. We are done. + return; + } + // Builder moved forward. Try again. + continue; + } + TRI_ASSERT(_currentInterval == _intervals.size()); + _intervals.emplace_back(leftBorder, rightBorder); + if (_reverse) { + _cursor = rightBorder; + } else { + _cursor = leftBorder; + } + // Next valid interal initialized. Return; + return; + } +} + + //////////////////////////////////////////////////////////////////////////////// /// @brief create the skiplist index //////////////////////////////////////////////////////////////////////////////// @@ -438,6 +970,7 @@ SkiplistIterator* SkiplistIndex::lookup(arangodb::Transaction* trx, rightSearch->add(lastRight); rightSearch->close(); VPackSlice search = rightSearch->slice(); + rightBorder = _skiplistIndex->leftKeyLookup(&search); if (rightBorder == _skiplistIndex->startNode()) { // No match make interval invalid @@ -687,6 +1220,203 @@ void SkiplistIndex::matchAttributes( } } + +bool SkiplistIndex::accessFitsIndex( + arangodb::aql::AstNode const* access, arangodb::aql::AstNode const* other, + arangodb::aql::AstNode const* op, arangodb::aql::Variable const* reference, + std::vector>& found) const { + if (!this->canUseConditionPart(access, other, op, reference, true)) { + return false; + } + + arangodb::aql::AstNode const* what = access; + std::pair> attributeData; + + if (op->type != arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) { + if (!what->isAttributeAccessForVariable(attributeData) || + attributeData.first != reference) { + // this access is not referencing this collection + return false; + } + if (arangodb::basics::TRI_AttributeNamesHaveExpansion( + attributeData.second)) { + // doc.value[*] == 'value' + return false; + } + if (isAttributeExpanded(attributeData.second)) { + // doc.value == 'value' (with an array index) + return false; + } + } else { + // ok, we do have an IN here... check if it's something like 'value' IN + // doc.value[*] + TRI_ASSERT(op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN); + bool canUse = false; + + if (what->isAttributeAccessForVariable(attributeData) && + attributeData.first == reference && + !arangodb::basics::TRI_AttributeNamesHaveExpansion( + attributeData.second) && + attributeMatches(attributeData.second)) { + // doc.value IN 'value' + // can use this index + canUse = true; + } else { + // check for 'value' IN doc.value AND 'value' IN doc.value[*] + what = other; + if (what->isAttributeAccessForVariable(attributeData) && + attributeData.first == reference && + isAttributeExpanded(attributeData.second) && + attributeMatches(attributeData.second)) { + canUse = true; + } + } + + if (!canUse) { + return false; + } + } + + std::vector const& fieldNames = + attributeData.second; + + for (size_t i = 0; i < _fields.size(); ++i) { + if (_fields[i].size() != fieldNames.size()) { + // attribute path length differs + continue; + } + + if (this->isAttributeExpanded(i) && + op->type != arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) { + // If this attribute is correct or not, it could only serve for IN + continue; + } + + bool match = arangodb::basics::AttributeName::isIdentical(_fields[i], + fieldNames, true); + + if (match) { + // mark ith attribute as being covered + found[i].emplace_back(op); + + TRI_IF_FAILURE("SkiplistIndex::accessFitsIndex") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + + return true; + } + } + + return false; +} + +bool SkiplistIndex::findMatchingConditions( + arangodb::aql::AstNode const* node, + arangodb::aql::Variable const* reference, + std::vector>& mapping, + bool& usesIn) const { + usesIn = false; + + for (size_t i = 0; i < node->numMembers(); ++i) { + auto op = node->getMember(i); + + switch (op->type) { + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ: + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT: + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE: + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT: + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE: + TRI_ASSERT(op->numMembers() == 2); + accessFitsIndex(op->getMember(0), op->getMember(1), op, reference, + mapping); + accessFitsIndex(op->getMember(1), op->getMember(0), op, reference, + mapping); + break; + + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN: + { + auto m = op->getMember(1); + if (accessFitsIndex(op->getMember(0), m, op, reference, + mapping)) { + if (m->numMembers() == 0) { + // We want to do an IN []. + // No results + // Even if we cannot use the index. + return false; + } + } + break; + } + + default: + TRI_ASSERT(false); + break; + } + } + + for (size_t i = 0; i < mapping.size(); ++i) { + auto const& conditions = mapping[i]; + if (conditions.empty()) { + // We do not have any condition for this field. + // Remove it and everything afterwards. + mapping.resize(i); + TRI_ASSERT(i == mapping.size()); + break; + } + TRI_ASSERT(conditions.size() <= 2); + auto const& first = conditions[0]; + switch (first->type) { + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN: + if (first->getMember(1)->isArray()) { + usesIn = true; + } + //Fall through intentional + case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ: + TRI_ASSERT(conditions.size() == 1); + break; + default: + // All conditions after this cannot be used. + // shrink and break outer for loop + mapping.resize(i + 1); + TRI_ASSERT(i + 1 == mapping.size()); + goto forloopBreak; + } + } + for (auto const& it : mapping) { + TRI_ASSERT(!it.empty()); + } + +forloopBreak: + return true; +} + + +IndexIterator* SkiplistIndex::iteratorForCondition( + arangodb::Transaction* trx, IndexIteratorContext*, + arangodb::aql::AstNode const* node, + arangodb::aql::Variable const* reference, bool reverse) const { + std::vector> mapping; + bool usesIn = false; + if (node != nullptr) { + mapping.resize(_fields.size()); // We use the default constructor. Mapping will have _fields many entries. + TRI_ASSERT(mapping.size() == _fields.size()); + if (!findMatchingConditions(node, reference, mapping, usesIn)) { + return new EmptyIndexIterator(); + } + } + if (usesIn) { + auto builder = + std::make_unique(trx, mapping, reference, reverse); + return new SkiplistIterator2(_skiplistIndex, CmpElmElm, reverse, builder.release()); + } + auto builder = + std::make_unique(trx, mapping, reference, reverse); + return new SkiplistIterator2(_skiplistIndex, CmpElmElm, reverse, builder.release()); +} + + + bool SkiplistIndex::supportsFilterCondition( arangodb::aql::AstNode const* node, arangodb::aql::Variable const* reference, size_t itemsInIndex, @@ -820,197 +1550,6 @@ bool SkiplistIndex::supportsSortCondition( return false; } -IndexIterator* SkiplistIndex::iteratorForCondition( - arangodb::Transaction* trx, IndexIteratorContext* context, - arangodb::aql::AstNode const* node, - arangodb::aql::Variable const* reference, bool reverse) const { - - TransactionBuilderLeaser searchValues(trx); - searchValues->openArray(); - bool needNormalize = false; - if (node == nullptr) { - // We only use this index for sort. Empty searchValue - searchValues->openArray(); - searchValues->close(); - - TRI_IF_FAILURE("SkiplistIndex::noSortIterator") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - } else { - // Create the search Values for the lookup - VPackArrayBuilder guard(searchValues.builder()); - - std::unordered_map> found; - size_t unused = 0; - matchAttributes(node, reference, found, unused, true); - - // found contains all attributes that are relevant for this node. - // It might be less than fields(). - // - // Handle the first attributes. They can only be == or IN and only - // one node per attribute - - auto getValueAccess = [&](arangodb::aql::AstNode const* comp, - arangodb::aql::AstNode const*& access, - arangodb::aql::AstNode const*& value) -> bool { - access = comp->getMember(0); - value = comp->getMember(1); - std::pair> paramPair; - if (!(access->isAttributeAccessForVariable(paramPair) && - paramPair.first == reference)) { - access = comp->getMember(1); - value = comp->getMember(0); - if (!(access->isAttributeAccessForVariable(paramPair) && - paramPair.first == reference)) { - // Both side do not have a correct AttributeAccess, this should not - // happen and indicates - // an error in the optimizer - TRI_ASSERT(false); - } - return true; - } - return false; - }; - - size_t usedFields = 0; - for (; usedFields < _fields.size(); ++usedFields) { - auto it = found.find(usedFields); - if (it == found.end()) { - // We are either done - // or this is a range. - // Continue with more complicated loop - break; - } - - auto comp = it->second[0]; - TRI_ASSERT(comp->numMembers() == 2); - arangodb::aql::AstNode const* access = nullptr; - arangodb::aql::AstNode const* value = nullptr; - getValueAccess(comp, access, value); - // We found an access for this field - - if (comp->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ) { - searchValues->openObject(); - searchValues->add(VPackValue(StaticStrings::IndexEq)); - TRI_IF_FAILURE("SkiplistIndex::permutationEQ") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - } else if (comp->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) { - if (isAttributeExpanded(usedFields)) { - searchValues->openObject(); - searchValues->add(VPackValue(StaticStrings::IndexEq)); - TRI_IF_FAILURE("SkiplistIndex::permutationArrayIN") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - } else { - needNormalize = true; - searchValues->openObject(); - searchValues->add(VPackValue(StaticStrings::IndexIn)); - } - } else { - // This is a one-sided range - break; - } - // We have to add the value always, the key was added before - value->toVelocyPackValue(*searchValues.builder()); - searchValues->close(); - } - - // Now handle the next element, which might be a range - if (usedFields < _fields.size()) { - auto it = found.find(usedFields); - if (it != found.end()) { - auto rangeConditions = it->second; - TRI_ASSERT(rangeConditions.size() <= 2); - VPackObjectBuilder searchElement(searchValues.builder()); - for (auto& comp : rangeConditions) { - TRI_ASSERT(comp->numMembers() == 2); - arangodb::aql::AstNode const* access = nullptr; - arangodb::aql::AstNode const* value = nullptr; - bool isReverseOrder = getValueAccess(comp, access, value); - // Add the key - switch (comp->type) { - case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LT: - if (isReverseOrder) { - searchValues->add(VPackValue(StaticStrings::IndexGt)); - } else { - searchValues->add(VPackValue(StaticStrings::IndexLt)); - } - break; - case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_LE: - if (isReverseOrder) { - searchValues->add(VPackValue(StaticStrings::IndexGe)); - } else { - searchValues->add(VPackValue(StaticStrings::IndexLe)); - } - break; - case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT: - if (isReverseOrder) { - searchValues->add(VPackValue(StaticStrings::IndexLt)); - } else { - searchValues->add(VPackValue(StaticStrings::IndexGt)); - } - break; - case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE: - if (isReverseOrder) { - searchValues->add(VPackValue(StaticStrings::IndexLe)); - } else { - searchValues->add(VPackValue(StaticStrings::IndexGe)); - } - break; - default: - // unsupported right now. Should have been rejected by - // supportsFilterCondition - TRI_ASSERT(false); - return nullptr; - } - value->toVelocyPackValue(*searchValues.builder()); - } - } - } - } - searchValues->close(); - - TRI_IF_FAILURE("SkiplistIndex::noIterator") { - THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); - } - - if (needNormalize) { - TransactionBuilderLeaser expandedSearchValues(trx); - expandInSearchValues(searchValues->slice(), *expandedSearchValues.builder()); - VPackSlice expandedSlice = expandedSearchValues->slice(); - std::vector iterators; - iterators.reserve(expandedSlice.length()); - try { - for (auto const& val : VPackArrayIterator(expandedSlice)) { - auto iterator = lookup(trx, val, reverse); - try { - iterators.push_back(iterator); - } catch (...) { - // avoid leak - delete iterator; - throw; - } - } - if (reverse) { - std::reverse(iterators.begin(), iterators.end()); - } - } - catch (...) { - for (auto& it : iterators) { - delete it; - } - throw; - } - return new MultiIndexIterator(iterators); - } - VPackSlice searchSlice = searchValues->slice(); - TRI_ASSERT(searchSlice.length() == 1); - searchSlice = searchSlice.at(0); - return lookup(trx, searchSlice, reverse); -} - //////////////////////////////////////////////////////////////////////////////// /// @brief specializes the condition for use with the index //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/Indexes/SkiplistIndex.h b/arangod/Indexes/SkiplistIndex.h index 6d454cdc21..4bd813bf9a 100644 --- a/arangod/Indexes/SkiplistIndex.h +++ b/arangod/Indexes/SkiplistIndex.h @@ -29,9 +29,12 @@ #include "Basics/SkipList.h" #include "Indexes/IndexIterator.h" #include "Indexes/PathBasedIndex.h" +#include "Utils/Transaction.h" #include "VocBase/vocbase.h" #include "VocBase/voc-types.h" +#include + namespace arangodb { namespace aql { class SortCondition; @@ -41,6 +44,118 @@ struct Variable; class SkiplistIndex; class Transaction; + +/// @brief Abstract Builder for lookup values in skiplist index + +class BaseSkiplistLookupBuilder { + protected: + bool _isEquality; + bool _includeLower; + bool _includeUpper; + + TransactionBuilderLeaser _lowerBuilder; + arangodb::velocypack::Slice _lowerSlice; + + TransactionBuilderLeaser _upperBuilder; + arangodb::velocypack::Slice _upperSlice; + + public: + BaseSkiplistLookupBuilder(Transaction* trx) : + _lowerBuilder(trx), _upperBuilder(trx) + { + _isEquality = true; + _includeUpper = true; + _includeLower = true; + + _lowerBuilder->clear(); + _upperBuilder->clear(); + } + + virtual ~BaseSkiplistLookupBuilder() {}; + + /// @brief Compute the next lookup values + /// If returns false there is no further lookup + virtual bool next() = 0; + + /// @brief Returns if we only have equality checks (== or IN) + bool isEquality() const; + + /// @brief Get the lookup value for the lower bound. + arangodb::velocypack::Slice const* getLowerLookup() const; + + /// @brief Test if the lower bound should be included. + /// If there is no lower bound given returns true + /// as well. + bool includeLower() const; + + /// @brief Get the lookup value for the upper bound. + arangodb::velocypack::Slice const* getUpperLookup() const; + + /// @brief Test if the upper bound should be included. + /// If there is no upper bound given returns true + /// as well. + bool includeUpper() const; +}; + +/// @brief Builder for lookup values in skiplist index +/// Offers lower and upper bound lookup values +/// and handles multiplication of IN search values. +/// Also makes sure that the lookup values are +/// returned in the correct ordering. And no +/// lookup is returned twice. + +class SkiplistLookupBuilder : public BaseSkiplistLookupBuilder { + + public: + SkiplistLookupBuilder( + Transaction* trx, + std::vector>&, + arangodb::aql::Variable const*, bool); + + ~SkiplistLookupBuilder() {} + +/// @brief Compute the next lookup values +/// If returns false there is no further lookup + bool next() override; + +}; + +class SkiplistInLookupBuilder : public BaseSkiplistLookupBuilder { + + private: + + struct PosStruct { + size_t field; + size_t current; + size_t max; + + PosStruct(size_t f, size_t c, size_t m) : field(f), current(c), max(m) {} + }; + + TransactionBuilderLeaser _dataBuilder; + /// @brief keeps track of the positions in the in-lookup + /// values. (field, inPosition, maxPosition) + std::list _inPositions; + + public: + SkiplistInLookupBuilder( + Transaction* trx, + std::vector>&, + arangodb::aql::Variable const*, bool); + + ~SkiplistInLookupBuilder() {} + +/// @brief Compute the next lookup values +/// If returns false there is no further lookup + bool next() override; + + private: + + bool forwardInPosition(); + + void buildSearchValues(); +}; + //////////////////////////////////////////////////////////////////////////////// /// @brief Iterator structure for skip list. We require a start and stop node /// @@ -96,6 +211,110 @@ class SkiplistIterator : public IndexIterator { void reset() override; }; +//////////////////////////////////////////////////////////////////////////////// +/// @brief Iterator structure for skip list. We require a start and stop node +/// +/// Intervals are open in the sense that both end points are not members +/// of the interval. This means that one has to use SkipList::nextNode +/// on the start node to get the first element and that the stop node +/// can be NULL. Note that it is ensured that all intervals in an iterator +/// are non-empty. +//////////////////////////////////////////////////////////////////////////////// + +class SkiplistIterator2 : public IndexIterator { + private: + // Shorthand for the skiplist node + typedef arangodb::basics::SkipListNode Node; + + typedef arangodb::basics::SkipList TRI_Skiplist; + + private: + + TRI_Skiplist const* _skiplistIndex; + bool _reverse; + Node* _cursor; + + // The pair.first is the left border + // The pair.second is the right border + // Both borders are inclusive + std::vector> _intervals; + size_t _currentInterval; + + BaseSkiplistLookupBuilder* _builder; + + std::function _CmpElmElm; + + public: + SkiplistIterator2( + TRI_Skiplist const* skiplist, + std::function CmpElmElm, + bool reverse, BaseSkiplistLookupBuilder* builder) + : _skiplistIndex(skiplist), + _reverse(reverse), + _cursor(nullptr), + _currentInterval(0), + _builder(builder), + _CmpElmElm(CmpElmElm) { + TRI_ASSERT(_builder != nullptr); + initNextInterval(); // Initializes the cursor + TRI_ASSERT((_intervals.empty() && _cursor == nullptr) || + (!_intervals.empty() && _cursor != nullptr)); + } + + ~SkiplistIterator2() { + delete _builder; + } + + // always holds the last node returned, initially equal to + // the _leftEndPoint (or the + // _rightEndPoint in the reverse case), + // can be nullptr if the iterator is exhausted. + + public: + + //////////////////////////////////////////////////////////////////////////////// + /// @brief Get the next element in the skiplist + //////////////////////////////////////////////////////////////////////////////// + + TRI_doc_mptr_t* next() override; + + //////////////////////////////////////////////////////////////////////////////// + /// @brief Reset the cursor + //////////////////////////////////////////////////////////////////////////////// + + void reset() override; + + private: + + //////////////////////////////////////////////////////////////////////////////// + /// @brief Initialize left and right endpoints with current lookup + /// value. Also points the _cursor to the border of this interval. + //////////////////////////////////////////////////////////////////////////////// + + void initNextInterval(); + + //////////////////////////////////////////////////////////////////////////////// + /// @brief Forward the cursor to the next interval. If there was no + /// interval the next one is computed. If the _cursor has + /// nullptr after this call the iterator is exhausted. + //////////////////////////////////////////////////////////////////////////////// + + void forwardCursor(); + + //////////////////////////////////////////////////////////////////////////////// + /// @brief Checks if the interval is valid. It is declared invalid if + /// one border is nullptr or the right is lower than left. + //////////////////////////////////////////////////////////////////////////////// + + bool intervalValid(Node*, Node*) const; +}; + + + class SkiplistIndex final : public PathBasedIndex { struct KeyElementComparator { int operator()(VPackSlice const* leftKey, @@ -196,28 +415,38 @@ class SkiplistIndex final : public PathBasedIndex { std::unordered_map>&, bool) const; + bool accessFitsIndex( + arangodb::aql::AstNode const*, arangodb::aql::AstNode const*, + arangodb::aql::AstNode const*, arangodb::aql::Variable const*, + std::vector>&) const; + + void matchAttributes( arangodb::aql::AstNode const*, arangodb::aql::Variable const*, std::unordered_map>&, size_t&, bool) const; - private: - - // Shorthand for the skiplist node - typedef arangodb::basics::SkipListNode Node; - - ElementElementComparator CmpElmElm; - - KeyElementComparator CmpKeyElm; + bool findMatchingConditions( + arangodb::aql::AstNode const*, arangodb::aql::Variable const*, + std::vector>&, bool&) const; //////////////////////////////////////////////////////////////////////////////// /// @brief Checks if the interval is valid. It is declared invalid if /// one border is nullptr or the right is lower than left. //////////////////////////////////////////////////////////////////////////////// + // Shorthand for the skiplist node + typedef arangodb::basics::SkipListNode Node; + bool intervalValid(Node* left, Node* right) const; + private: + + ElementElementComparator CmpElmElm; + + KeyElementComparator CmpKeyElm; + ////////////////////////////////////////////////////////////////////////////// /// @brief the actual skiplist index ////////////////////////////////////////////////////////////////////////////// diff --git a/lib/Basics/VelocyPackHelper.h b/lib/Basics/VelocyPackHelper.h index 257554dcd6..20df3ad668 100644 --- a/lib/Basics/VelocyPackHelper.h +++ b/lib/Basics/VelocyPackHelper.h @@ -93,14 +93,16 @@ class VelocyPackHelper { template struct VPackLess { - VPackLess(arangodb::velocypack::Options const* options = &arangodb::velocypack::Options::Defaults, + VPackLess(arangodb::velocypack::Options const* options = + &arangodb::velocypack::Options::Defaults, arangodb::velocypack::Slice const* lhsBase = nullptr, arangodb::velocypack::Slice const* rhsBase = nullptr) : options(options), lhsBase(lhsBase), rhsBase(rhsBase) {} inline bool operator()(arangodb::velocypack::Slice const& lhs, arangodb::velocypack::Slice const& rhs) const { - return VelocyPackHelper::compare(lhs, rhs, useUtf8, options, lhsBase, rhsBase) < 0; + return VelocyPackHelper::compare(lhs, rhs, useUtf8, options, lhsBase, + rhsBase) < 0; } arangodb::velocypack::Options const* options; @@ -108,6 +110,52 @@ class VelocyPackHelper { arangodb::velocypack::Slice const* rhsBase; }; + template + struct VPackGreater { + VPackGreater(arangodb::velocypack::Options const* options = + &arangodb::velocypack::Options::Defaults, + arangodb::velocypack::Slice const* lhsBase = nullptr, + arangodb::velocypack::Slice const* rhsBase = nullptr) + : options(options), lhsBase(lhsBase), rhsBase(rhsBase) {} + + inline bool operator()(arangodb::velocypack::Slice const& lhs, + arangodb::velocypack::Slice const& rhs) const { + return VelocyPackHelper::compare(lhs, rhs, useUtf8, options, lhsBase, + rhsBase) > 0; + } + + arangodb::velocypack::Options const* options; + arangodb::velocypack::Slice const* lhsBase; + arangodb::velocypack::Slice const* rhsBase; + }; + + template + struct VPackSorted { + VPackSorted(bool reverse, arangodb::velocypack::Options const* options = + &arangodb::velocypack::Options::Defaults, + arangodb::velocypack::Slice const* lhsBase = nullptr, + arangodb::velocypack::Slice const* rhsBase = nullptr) + : _reverse(reverse), + options(options), + lhsBase(lhsBase), + rhsBase(rhsBase) {} + + inline bool operator()(arangodb::velocypack::Slice const& lhs, + arangodb::velocypack::Slice const& rhs) const { + if (_reverse) { + return VelocyPackHelper::compare(lhs, rhs, useUtf8, options, lhsBase, + rhsBase) > 0; + } + return VelocyPackHelper::compare(lhs, rhs, useUtf8, options, lhsBase, + rhsBase) < 0; + } + + bool _reverse; + arangodb::velocypack::Options const* options; + arangodb::velocypack::Slice const* lhsBase; + arangodb::velocypack::Slice const* rhsBase; + }; + struct AttributeSorterUTF8 { bool operator()(std::string const& l, std::string const& r) const; }; From 403ba8a30ceccd4957eeb61347b6cd46e9de95c5 Mon Sep 17 00:00:00 2001 From: Jan Steemann Date: Tue, 12 Jul 2016 15:57:19 +0200 Subject: [PATCH 3/3] fixed issue #1937 --- CHANGELOG | 23 ++++++++++++++- arangod/Aql/AstNode.cpp | 2 +- js/server/tests/aql/aql-array-access.js | 39 +++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index a036cdaf9d..7726daa5f6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -27,8 +27,13 @@ devel v3.0.3 (XXXX-XX-XX) ------------------- +* fixed issue #1937 + * fixed issue #1936 +* improved performance of arangorestore in clusters with synchronous + replication + v3.0.2 (2016-07-09) ------------------- @@ -442,9 +447,25 @@ v3.0.0-rc1 (2015-06-10) using a backwards-compatible "legacy mode" -v2.8.10 (XXXX-XX-XX) +v2.8.11 (XXXX-XX-XX) -------------------- +* fixed issue #1937 + + +v2.8.10 (2016-07-01) +-------------------- + +* make sure next local _rev value used for a document is at least as high as the + _rev value supplied by external sources such as replication + +* make adding a collection in both read- and write-mode to a transaction behave as + expected (write includes read). This prevents the `unregister collection used in + transaction` error + +* fixed sometimes invalid result for `byExample(...).count()` when an index plus + post-filtering was used + * fixed "collection is a nullptr" issue when starting a traversal from a transaction * honor the value of startup option `--database.wait-for-sync` (that is used to control diff --git a/arangod/Aql/AstNode.cpp b/arangod/Aql/AstNode.cpp index 241d24baa1..c62c0da96a 100644 --- a/arangod/Aql/AstNode.cpp +++ b/arangod/Aql/AstNode.cpp @@ -2365,7 +2365,7 @@ void AstNode::stringify(arangodb::basics::StringBuffer* buffer, bool verbose, auto returnNode = getMember(4); if (returnNode != nullptr && returnNode != Ast::getNodeNop()) { buffer->appendText(TRI_CHAR_LENGTH_PAIR(" RETURN ")); - returnNode->getMember(0)->stringify(buffer, verbose, failIfLong); + returnNode->stringify(buffer, verbose, failIfLong); } buffer->appendChar(')'); diff --git a/js/server/tests/aql/aql-array-access.js b/js/server/tests/aql/aql-array-access.js index 62edb522ad..259c0905c2 100644 --- a/js/server/tests/aql/aql-array-access.js +++ b/js/server/tests/aql/aql-array-access.js @@ -705,6 +705,45 @@ function arrayAccessTestSuite () { var expected = [ 4, 8, 12 ]; var result = AQL_EXECUTE("RETURN @value[**** FILTER CURRENT % 2 == 0 LIMIT 3 RETURN CURRENT * 2]", { value : data }).json; assertEqual([ expected ], result); + }, + + testCollapseWithData : function () { + var data = [ { + "last": "2016-07-12", + "name": "a1", + "st": [ { + "last": "2016-07-12", + "name": "a11", + "id": "a2", + "dx": [ { + "last": "2016-07-12", + "name": "a21", + "id": "a3", + "docs": [] + }, { + "last": "2016-07-12", + "name": "a22", + "id": "a4", + "docs": [] + } ] + } ] + } ]; + + var expected = [ + { + "st" : [ + { "id" : "a2", "date" : "2016-07-12" } + ], + "dx" : [ + { "id" : "a3", "date" : "2016-07-12" }, + { "id" : "a4", "date" : "2016-07-12" } + ], + "docs" : [ ] + } + ]; + + var result = AQL_EXECUTE("FOR c IN @value COLLECT st = c.st[* RETURN { id: CURRENT.id, date: CURRENT.last }], dx = c.st[*].dx[* RETURN { id: CURRENT.id, date: CURRENT.last }][**], docs = c.st[*].dx[*][**].docs[* RETURN { id: CURRENT.id, date: CURRENT.last }][**] RETURN { st , dx, docs }", { value : data }).json; + assertEqual(expected, result); } };