From 5b3be69e101428c0cfa15da0f57cf1a153f2b4e7 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Tue, 29 Nov 2016 11:48:32 +0100 Subject: [PATCH 01/53] WIP first working version of geoindex via aql --- arangod/Aql/OptimizerRules.cpp | 2 +- arangod/Indexes/GeoIndex.cpp | 51 +++++++++++++++++++++++++++++++++ arangod/Indexes/GeoIndex.h | 35 ++++++++++++++++++++++ arangod/Indexes/IndexIterator.h | 24 ++++++++++++++++ 4 files changed, 111 insertions(+), 1 deletion(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 56ac6be5c5..56269b2397 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4081,7 +4081,7 @@ void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, << " of collection:" << result1.get()._collection->getName() << " are geoindexed"; - break; //remove this to make use of the index + //break; //remove this to make use of the index auto cnode = result1.get()._collectionNode; auto& idxPtr = result1.get()._index; diff --git a/arangod/Indexes/GeoIndex.cpp b/arangod/Indexes/GeoIndex.cpp index 64e526824a..911ab6f952 100644 --- a/arangod/Indexes/GeoIndex.cpp +++ b/arangod/Indexes/GeoIndex.cpp @@ -28,6 +28,57 @@ #include "VocBase/transaction.h" using namespace arangodb; +GeoIndexIterator::GeoIndexIterator(LogicalCollection* collection, + arangodb::Transaction* trx, + ManagedDocumentResult* mmdr, + GeoIndex const* index, + arangodb::aql::AstNode const* node, + arangodb::aql::Variable const* reference) + : IndexIterator(collection, trx, mmdr, index), + _index(index), + // lookup will hold the inforamtion if this is a cursor for + // near/within and the reference point + //_lookups(trx, node, reference, index->fields()), + _lookupResult(nullptr), + _posInBuffer(0) { + //_index->lookup(_trx, _lookups.lookup(), _buffer); +} + +IndexLookupResult GeoIndexIterator::next() { + if (!_lookupResult){ + _lookupResult = _index->nearQuery(_trx,0,0,10); + } + //implement + if (_posInBuffer < _lookupResult->length){ + //is data the revision id? + return IndexLookupResult(GeoIndex::toRevision(_lookupResult->coordinates[_posInBuffer++].data)); + } + // if there are no more results we return the default constructed IndexLookupResult + return IndexLookupResult{}; +} + +// optional +// void GeoIndexIterator::nextBabies(std::vector& result, size_t atMost) { +// //implement provide fast implementation +// } + +/// @brief creates an IndexIterator for the given Condition +IndexIterator* GeoIndex::iteratorForCondition( + arangodb::Transaction* trx, + ManagedDocumentResult* mmdr, + arangodb::aql::AstNode const* node, + arangodb::aql::Variable const* reference, bool) const { + TRI_IF_FAILURE("HashIndex::noIterator") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + return new GeoIndexIterator(_collection, trx, mmdr, this, node, reference); +} + + +void GeoIndexIterator::reset() { + _lookupResult = nullptr; + _posInBuffer = 0; +} GeoIndex::GeoIndex(TRI_idx_iid_t iid, arangodb::LogicalCollection* collection, VPackSlice const& info) diff --git a/arangod/Indexes/GeoIndex.h b/arangod/Indexes/GeoIndex.h index 1d62a954f6..3d0d309778 100644 --- a/arangod/Indexes/GeoIndex.h +++ b/arangod/Indexes/GeoIndex.h @@ -27,6 +27,7 @@ #include "Basics/Common.h" #include "GeoIndex/GeoIndex.h" #include "Indexes/Index.h" +#include "Indexes/IndexIterator.h" #include "VocBase/vocbase.h" #include "VocBase/voc-types.h" @@ -37,6 +38,34 @@ static_assert(sizeof(GeoCoordinate::data) >= sizeof(TRI_voc_rid_t), "invalid size of GeoCoordinate.data"); namespace arangodb { +class GeoIndex; + +class GeoIndexIterator final : public IndexIterator { + public: + +/// @brief Construct an GeoIndexIterator based on Ast Conditions + GeoIndexIterator(LogicalCollection* collection, arangodb::Transaction* trx, + ManagedDocumentResult* mmdr, + GeoIndex const* index, + arangodb::aql::AstNode const*, + arangodb::aql::Variable const*); + + ~GeoIndexIterator() = default; + + char const* typeName() const override { return "geo-index-iterator"; } + + IndexLookupResult next() override; + + //void nextBabies(std::vector&, size_t) override; + + void reset() override; + + private: + GeoIndex const* _index; + //LookupBuilder _lookups; + GeoCoordinates* _lookupResult; + size_t _posInBuffer; +}; class GeoIndex final : public Index { public: @@ -66,6 +95,12 @@ class GeoIndex final : public Index { return TRI_IDX_TYPE_GEO2_INDEX; } + IndexIterator* iteratorForCondition(arangodb::Transaction*, + ManagedDocumentResult*, + arangodb::aql::AstNode const*, + arangodb::aql::Variable const*, + bool) const override; + bool allowExpansion() const override { return false; } bool canBeDropped() const override { return true; } diff --git a/arangod/Indexes/IndexIterator.h b/arangod/Indexes/IndexIterator.h index 84614a0120..d69b1d4724 100644 --- a/arangod/Indexes/IndexIterator.h +++ b/arangod/Indexes/IndexIterator.h @@ -21,6 +21,30 @@ /// @author Michael Hackstein //////////////////////////////////////////////////////////////////////////////// +// In order to implement a new IndexIterator the folling functions need to be +// implmeneted. +// +// typeName() returns a string descibing the type of the indexIterator +// +// The next() function of the IndexIterator returns IndexLookupResults that are +// created from RevisionIds. If there is nothing more to return a default +// constructed IndesLookupResult is returend. +// +// reset() resets the iterator +// +// optional - default implementation provided: +// +// nextBabies() gets more than one result, the function is meant to increase +// performance when receiving a single result from the index is more expensive +// per item than the item costs when receiving multiple results. +// +// skip(trySkip, skipped) tries to skip the next trySkip elements +// +// When finished you need to implement the fuction: +// virtual IndexIterator* iteratorForCondition(...) +// So a there is a way to create an iterator for the index + + #ifndef ARANGOD_INDEXES_INDEX_ITERATOR_H #define ARANGOD_INDEXES_INDEX_ITERATOR_H 1 From 86c21eb733346b6825947d40b44cc732442ba24c Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Tue, 29 Nov 2016 14:13:35 +0100 Subject: [PATCH 02/53] make use of geo index cursor api --- arangod/Indexes/GeoIndex.cpp | 58 ++++++++++++++++++++++++++---------- arangod/Indexes/GeoIndex.h | 17 +++++++---- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/arangod/Indexes/GeoIndex.cpp b/arangod/Indexes/GeoIndex.cpp index 911ab6f952..8e941bf237 100644 --- a/arangod/Indexes/GeoIndex.cpp +++ b/arangod/Indexes/GeoIndex.cpp @@ -26,6 +26,7 @@ #include "Basics/StringRef.h" #include "Basics/VelocyPackHelper.h" #include "VocBase/transaction.h" +#include "Indexes/GeoIndex.h" using namespace arangodb; GeoIndexIterator::GeoIndexIterator(LogicalCollection* collection, @@ -39,28 +40,54 @@ GeoIndexIterator::GeoIndexIterator(LogicalCollection* collection, // lookup will hold the inforamtion if this is a cursor for // near/within and the reference point //_lookups(trx, node, reference, index->fields()), - _lookupResult(nullptr), - _posInBuffer(0) { - //_index->lookup(_trx, _lookups.lookup(), _buffer); -} + _cursor(nullptr) + {} IndexLookupResult GeoIndexIterator::next() { - if (!_lookupResult){ - _lookupResult = _index->nearQuery(_trx,0,0,10); + if (!_cursor){ + createCursor(0,0); } - //implement - if (_posInBuffer < _lookupResult->length){ - //is data the revision id? - return IndexLookupResult(GeoIndex::toRevision(_lookupResult->coordinates[_posInBuffer++].data)); + + auto coords = std::unique_ptr(::GeoIndex_ReadCursor(_cursor,1)); + if(coords && coords->length){ + auto revision = ::GeoIndex::toRevision(coords->coordinates[0].data); + return IndexLookupResult{revision}; } // if there are no more results we return the default constructed IndexLookupResult return IndexLookupResult{}; } -// optional -// void GeoIndexIterator::nextBabies(std::vector& result, size_t atMost) { -// //implement provide fast implementation -// } +void GeoIndexIterator::nextBabies(std::vector& result, size_t batchSize) { + if (!_cursor){ + createCursor(0,0); + } + + result.clear(); + if (batchSize > 0) { + auto coords = std::unique_ptr(::GeoIndex_ReadCursor(_cursor,batchSize)); + size_t length = coords ? coords->length : 0; + if (!length){ + return; + } + + for(std::size_t index = 0; index < length; ++index){ + result.emplace_back(IndexLookupResult(::GeoIndex::toRevision(coords->coordinates[index].data))); + } + } +} + +::GeoCursor* GeoIndexIterator::replaceCursor(::GeoCursor* c){ + if(_cursor){ + ::GeoIndex_CursorFree(_cursor); + } + _cursor = c; + return _cursor; +} + +::GeoCursor* GeoIndexIterator::createCursor(double lat, double lon){ + ::GeoCoordinate coor{lat, lon, 0}; + return replaceCursor(::GeoIndex_NewCursor(_index->_geoIndex, &coor)); +} /// @brief creates an IndexIterator for the given Condition IndexIterator* GeoIndex::iteratorForCondition( @@ -76,8 +103,7 @@ IndexIterator* GeoIndex::iteratorForCondition( void GeoIndexIterator::reset() { - _lookupResult = nullptr; - _posInBuffer = 0; + replaceCursor(nullptr); } GeoIndex::GeoIndex(TRI_idx_iid_t iid, arangodb::LogicalCollection* collection, diff --git a/arangod/Indexes/GeoIndex.h b/arangod/Indexes/GeoIndex.h index 3d0d309778..daa0a26a93 100644 --- a/arangod/Indexes/GeoIndex.h +++ b/arangod/Indexes/GeoIndex.h @@ -42,7 +42,7 @@ class GeoIndex; class GeoIndexIterator final : public IndexIterator { public: - + /// @brief Construct an GeoIndexIterator based on Ast Conditions GeoIndexIterator(LogicalCollection* collection, arangodb::Transaction* trx, ManagedDocumentResult* mmdr, @@ -50,24 +50,29 @@ class GeoIndexIterator final : public IndexIterator { arangodb::aql::AstNode const*, arangodb::aql::Variable const*); - ~GeoIndexIterator() = default; - + ~GeoIndexIterator() { + replaceCursor(nullptr); + }; + char const* typeName() const override { return "geo-index-iterator"; } IndexLookupResult next() override; - //void nextBabies(std::vector&, size_t) override; + void nextBabies(std::vector&, size_t) override; void reset() override; private: + ::GeoCursor* replaceCursor(::GeoCursor* c); + ::GeoCursor* createCursor(double lat, double lon); + GeoIndex const* _index; + ::GeoCursor* _cursor; //LookupBuilder _lookups; - GeoCoordinates* _lookupResult; - size_t _posInBuffer; }; class GeoIndex final : public Index { +friend class GeoIndexIterator; public: GeoIndex() = delete; From 8bafcdfe92d2cc524fdda635ddea4cb57ef20145 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Wed, 30 Nov 2016 14:10:16 +0100 Subject: [PATCH 03/53] GeoIndexIterator now takes parameters via ConditionNode --- arangod/Aql/OptimizerRules.cpp | 39 ++++++++++++++++++--- arangod/Indexes/GeoIndex.cpp | 63 ++++++++++++++++++++++++++++------ arangod/Indexes/GeoIndex.h | 9 ++++- 3 files changed, 96 insertions(+), 15 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 56269b2397..57a5621ab6 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4072,7 +4072,9 @@ void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, LOG(OBILEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; + bool firstPairContainsVars = true; if(!result1){ + firstPairContainsVars = false; result1 = std::move(result2); } @@ -4086,9 +4088,39 @@ void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, auto cnode = result1.get()._collectionNode; auto& idxPtr = result1.get()._index; - //create new index node and register it - auto condition = std::make_unique(plan->getAst()); //What is this condition exactly about - condition->normalize(plan); + std::unique_ptr condition; + + auto getVars = [&](std::pair& pair){ + auto ast = plan->getAst(); + + auto varAstNode = ast->createNodeReference(cnode->outVariable()); + + + auto latKey = ast->createNodeAttributeAccess(varAstNode, "latitude",8); + auto latEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ,latKey, pair.first); + + auto lonKey = ast->createNodeAttributeAccess(varAstNode, "longitude",9); + auto lonEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ,lonKey, pair.second); + + auto nAryAnd = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_AND); + nAryAnd->reserve(2); + nAryAnd->addMember(latEq); + nAryAnd->addMember(lonEq); + + auto unAryOr = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_OR, nAryAnd); + + auto condition = std::make_unique(ast); + condition->andCombine(unAryOr); + condition->normalize(plan); + return condition; + }; + + if(firstPairContainsVars){ + condition = getVars(argPair2); + } else { + condition = getVars(argPair1); + } + auto inode = new IndexNode( plan, plan->nextId(), cnode->vocbase(), cnode->collection(), cnode->outVariable(), @@ -4102,7 +4134,6 @@ void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, //signal that plan has been changed modified=true; - } opt->addPlan(plan, rule, modified); diff --git a/arangod/Indexes/GeoIndex.cpp b/arangod/Indexes/GeoIndex.cpp index 8e941bf237..fb03aed301 100644 --- a/arangod/Indexes/GeoIndex.cpp +++ b/arangod/Indexes/GeoIndex.cpp @@ -21,31 +21,74 @@ /// @author Dr. Frank Celler //////////////////////////////////////////////////////////////////////////////// -#include "GeoIndex.h" -#include "Logger/Logger.h" +#include "Aql/Ast.h" +#include "Aql/AstNode.h" +#include "Aql/SortCondition.h" #include "Basics/StringRef.h" #include "Basics/VelocyPackHelper.h" -#include "VocBase/transaction.h" +#include "GeoIndex.h" #include "Indexes/GeoIndex.h" +#include "Logger/Logger.h" +#include "VocBase/transaction.h" using namespace arangodb; GeoIndexIterator::GeoIndexIterator(LogicalCollection* collection, arangodb::Transaction* trx, ManagedDocumentResult* mmdr, GeoIndex const* index, - arangodb::aql::AstNode const* node, - arangodb::aql::Variable const* reference) + arangodb::aql::AstNode const* cond, + arangodb::aql::Variable const* var) : IndexIterator(collection, trx, mmdr, index), _index(index), + _cursor(nullptr), + _condition(cond), + _variable(var), + _lat(0), + _lon(0), + _near(true), + _withinRange(0), + _withinInverse(false) // lookup will hold the inforamtion if this is a cursor for // near/within and the reference point //_lookups(trx, node, reference, index->fields()), - _cursor(nullptr) - {} + { + evaluateCondition(); + } + +void GeoIndexIterator::evaluateCondition() { + LOG(ERR) << "ENTER evaluate Condition"; + + if (_condition) { + LOG(ERR) << "The Condition is"; + _condition->dump(0); + auto numMembers = _condition->numMembers(); + + if(numMembers >= 2){ + _lat = _condition->getMember(0)->getMember(1)->getDoubleValue(); + LOG(ERR) << "lat: " << _lat; + _lon = _condition->getMember(1)->getMember(1)->getDoubleValue(); + LOG(ERR) << "lon: " << _lon; + } + + if (numMembers == 2){ //near + _near = true; + } else if (numMembers == 3) { //within + _near = false; + _withinRange = _condition->getMember(2)->getMember(1)->getDoubleValue(); + } else { + LOG(ERR) << "Invalid Number of arguments"; + } + + } else { + LOG(ERR) << "No Condition passed to constructor"; + } + + LOG(ERR) << "EXIT evaluate Condition"; +} IndexLookupResult GeoIndexIterator::next() { if (!_cursor){ - createCursor(0,0); + createCursor(_lat,_lon); } auto coords = std::unique_ptr(::GeoIndex_ReadCursor(_cursor,1)); @@ -59,7 +102,7 @@ IndexLookupResult GeoIndexIterator::next() { void GeoIndexIterator::nextBabies(std::vector& result, size_t batchSize) { if (!_cursor){ - createCursor(0,0); + createCursor(_lat,_lon); } result.clear(); @@ -75,7 +118,7 @@ void GeoIndexIterator::nextBabies(std::vector& result, size_t } } } - + ::GeoCursor* GeoIndexIterator::replaceCursor(::GeoCursor* c){ if(_cursor){ ::GeoIndex_CursorFree(_cursor); diff --git a/arangod/Indexes/GeoIndex.h b/arangod/Indexes/GeoIndex.h index daa0a26a93..91ec2f1fca 100644 --- a/arangod/Indexes/GeoIndex.h +++ b/arangod/Indexes/GeoIndex.h @@ -65,10 +65,17 @@ class GeoIndexIterator final : public IndexIterator { private: ::GeoCursor* replaceCursor(::GeoCursor* c); ::GeoCursor* createCursor(double lat, double lon); + void evaluateCondition(); //called in constructor GeoIndex const* _index; ::GeoCursor* _cursor; - //LookupBuilder _lookups; + arangodb::aql::AstNode const* _condition; + arangodb::aql::Variable const* _variable; + double _lat; + double _lon; + bool _near; + double _withinRange; + double _withinInverse; }; class GeoIndex final : public Index { From 80c89d5f970a41b16ad3a5ddd1972b2134858bbe Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Wed, 30 Nov 2016 15:25:11 +0100 Subject: [PATCH 04/53] geo condition is now build with a extra parameter for within --- arangod/Aql/OptimizerRules.cpp | 91 ++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 57a5621ab6..42dcda7ec3 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3929,8 +3929,35 @@ struct GeoIndexInfo { std::vector _latitude; }; +std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info, + AstNode* lat, AstNode* lon, AstNode* withRange = nullptr){ + auto ast = plan->getAst(); + auto varAstNode = ast->createNodeReference(info._collectionNode->outVariable()); + auto nAryAnd = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_AND); + nAryAnd->reserve(withRange ? 3 : 2); + auto latKey = ast->createNodeAttributeAccess(varAstNode, "latitude",8); + auto latEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, latKey, lat); + nAryAnd->addMember(latEq); + + auto lonKey = ast->createNodeAttributeAccess(varAstNode, "longitude",9); + auto lonEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, lonKey, lon); + nAryAnd->addMember(lonEq); + + if(withRange){ + auto withKey = ast->createNodeAttributeAccess(varAstNode, "within",6); + auto withEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, withKey, withRange); + nAryAnd->addMember(withEq); + } + + auto unAryOr = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_OR, nAryAnd); + + auto condition = std::make_unique(ast); + condition->andCombine(unAryOr); + condition->normalize(plan); + return condition; +} // TODO - remove debug code #ifdef OBIDEBUG @@ -4048,19 +4075,19 @@ void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, // we're looking for "DISTANCE()", which is a function call // with an empty parameters array - if ( func->externalName != "DISTANCE" || funcNode->numMembers() != 1 ) { + if ( func->externalName != "DISTANCE" || funcNode->numMembers() != 1 ) { continue; } LOG(OBILEVEL) << " FOUND DISTANCE RULE"; - auto const& distanceArgs = funcNode->getMember(0); - if(distanceArgs->numMembers() != 4){ + auto const& functionArguments = funcNode->getMember(0); + if(functionArguments->numMembers() < 4){ continue; } - std::pair argPair1 = { distanceArgs->getMember(0), distanceArgs->getMember(1) }; - std::pair argPair2 = { distanceArgs->getMember(2), distanceArgs->getMember(3) }; + std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; + std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; auto result1 = geoDistanceFunctionArgCheck(argPair1, node, plan); auto result2 = geoDistanceFunctionArgCheck(argPair2, node, plan); @@ -4072,53 +4099,31 @@ void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, LOG(OBILEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; - bool firstPairContainsVars = true; - if(!result1){ - firstPairContainsVars = false; - result1 = std::move(result2); + std::pair* constantPair; + GeoIndexInfo info; + if(result1){ + info = std::move(result1.get()); + constantPair = &argPair2; + } else { + info = std::move(result2.get()); + constantPair = &argPair1; } - LOG(OBILEVEL) << " attributes: " << result1.get()._longitude[0] - << ", " << result1.get()._longitude - << " of collection:" << result1.get()._collection->getName() + LOG(OBILEVEL) << " attributes: " << info._longitude[0] + << ", " << info._longitude + << " of collection:" << info._collection->getName() << " are geoindexed"; //break; //remove this to make use of the index - auto cnode = result1.get()._collectionNode; - auto& idxPtr = result1.get()._index; + auto cnode = info._collectionNode; + auto& idxPtr = info._index; std::unique_ptr condition; - - auto getVars = [&](std::pair& pair){ - auto ast = plan->getAst(); - - auto varAstNode = ast->createNodeReference(cnode->outVariable()); - - - auto latKey = ast->createNodeAttributeAccess(varAstNode, "latitude",8); - auto latEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ,latKey, pair.first); - - auto lonKey = ast->createNodeAttributeAccess(varAstNode, "longitude",9); - auto lonEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ,lonKey, pair.second); - - auto nAryAnd = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_AND); - nAryAnd->reserve(2); - nAryAnd->addMember(latEq); - nAryAnd->addMember(lonEq); - - auto unAryOr = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_OR, nAryAnd); - - auto condition = std::make_unique(ast); - condition->andCombine(unAryOr); - condition->normalize(plan); - return condition; - }; - - if(firstPairContainsVars){ - condition = getVars(argPair2); + if(functionArguments->numMembers() == 4){ + condition = buildGeoCondition(plan,info, constantPair->first, constantPair->second); } else { - condition = getVars(argPair1); + condition = buildGeoCondition(plan,info, constantPair->first, constantPair->second, functionArguments->getMember(4)); } auto inode = new IndexNode( From 972af3af4cbc98379c1e1bdf69d77d3177c77a4a Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Wed, 30 Nov 2016 15:44:46 +0100 Subject: [PATCH 05/53] add within to geoindex iterator --- arangod/Indexes/GeoIndex.cpp | 15 ++++++++++----- arangod/Indexes/GeoIndex.h | 1 + 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/arangod/Indexes/GeoIndex.cpp b/arangod/Indexes/GeoIndex.cpp index fb03aed301..04d536e982 100644 --- a/arangod/Indexes/GeoIndex.cpp +++ b/arangod/Indexes/GeoIndex.cpp @@ -41,6 +41,7 @@ GeoIndexIterator::GeoIndexIterator(LogicalCollection* collection, : IndexIterator(collection, trx, mmdr, index), _index(index), _cursor(nullptr), + _coor(), _condition(cond), _variable(var), _lat(0), @@ -93,8 +94,10 @@ IndexLookupResult GeoIndexIterator::next() { auto coords = std::unique_ptr(::GeoIndex_ReadCursor(_cursor,1)); if(coords && coords->length){ - auto revision = ::GeoIndex::toRevision(coords->coordinates[0].data); - return IndexLookupResult{revision}; + if(_near || GeoIndex_distance(&_coor, &coords->coordinates[0]) <= _withinRange ){ + auto revision = ::GeoIndex::toRevision(coords->coordinates[0].data); + return IndexLookupResult{revision}; + } } // if there are no more results we return the default constructed IndexLookupResult return IndexLookupResult{}; @@ -114,7 +117,9 @@ void GeoIndexIterator::nextBabies(std::vector& result, size_t } for(std::size_t index = 0; index < length; ++index){ - result.emplace_back(IndexLookupResult(::GeoIndex::toRevision(coords->coordinates[index].data))); + while (_near || GeoIndex_distance(&_coor, &coords->coordinates[index]) <= _withinRange ){ + result.emplace_back(IndexLookupResult(::GeoIndex::toRevision(coords->coordinates[index].data))); + } } } } @@ -128,8 +133,8 @@ void GeoIndexIterator::nextBabies(std::vector& result, size_t } ::GeoCursor* GeoIndexIterator::createCursor(double lat, double lon){ - ::GeoCoordinate coor{lat, lon, 0}; - return replaceCursor(::GeoIndex_NewCursor(_index->_geoIndex, &coor)); + _coor = GeoCoordinate{lat, lon, 0}; + return replaceCursor(::GeoIndex_NewCursor(_index->_geoIndex, &_coor)); } /// @brief creates an IndexIterator for the given Condition diff --git a/arangod/Indexes/GeoIndex.h b/arangod/Indexes/GeoIndex.h index 91ec2f1fca..0cc9d80ce4 100644 --- a/arangod/Indexes/GeoIndex.h +++ b/arangod/Indexes/GeoIndex.h @@ -69,6 +69,7 @@ class GeoIndexIterator final : public IndexIterator { GeoIndex const* _index; ::GeoCursor* _cursor; + ::GeoCoordinate _coor; arangodb::aql::AstNode const* _condition; arangodb::aql::Variable const* _variable; double _lat; From 2110736d3644260e94a3271b47e570548c8c6089 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Wed, 30 Nov 2016 16:50:02 +0100 Subject: [PATCH 06/53] fix logical error in nextBabies --- arangod/Indexes/GeoIndex.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arangod/Indexes/GeoIndex.cpp b/arangod/Indexes/GeoIndex.cpp index 04d536e982..57dbcf0b0e 100644 --- a/arangod/Indexes/GeoIndex.cpp +++ b/arangod/Indexes/GeoIndex.cpp @@ -88,6 +88,7 @@ void GeoIndexIterator::evaluateCondition() { } IndexLookupResult GeoIndexIterator::next() { + LOG(ERR) << "ENTER next"; if (!_cursor){ createCursor(_lat,_lon); } @@ -104,6 +105,7 @@ IndexLookupResult GeoIndexIterator::next() { } void GeoIndexIterator::nextBabies(std::vector& result, size_t batchSize) { + LOG(ERR) << "ENTER nextBabies"; if (!_cursor){ createCursor(_lat,_lon); } @@ -117,11 +119,14 @@ void GeoIndexIterator::nextBabies(std::vector& result, size_t } for(std::size_t index = 0; index < length; ++index){ - while (_near || GeoIndex_distance(&_coor, &coords->coordinates[index]) <= _withinRange ){ + if (_near || GeoIndex_distance(&_coor, &coords->coordinates[index]) <= _withinRange ){ result.emplace_back(IndexLookupResult(::GeoIndex::toRevision(coords->coordinates[index].data))); + } else { + break; } } } + LOG(ERR) << "EXIT nextBabies"; } ::GeoCursor* GeoIndexIterator::replaceCursor(::GeoCursor* c){ From 5dbf5e14e343b7fd1384e8d0f2373f55135c53f9 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Thu, 1 Dec 2016 10:03:24 +0100 Subject: [PATCH 07/53] refactor geoOptimization into smaller fucntions this prepares the creation of the within rules --- arangod/Aql/OptimizerRules.cpp | 214 ++++++++++++++++++--------------- 1 file changed, 117 insertions(+), 97 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 42dcda7ec3..f818e5cd82 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4026,6 +4026,119 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionN return boost::none; } + +bool applyGeoOptimization(bool near, ExecutionPlan* plan, ExecutionNode* node, AstNode const* funcNode, bool asc){ + auto const& functionArguments = funcNode->getMember(0); + if(functionArguments->numMembers() < 4){ + return false; + } + + std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; + std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; + + auto result1 = geoDistanceFunctionArgCheck(argPair1, node, plan); + auto result2 = geoDistanceFunctionArgCheck(argPair2, node, plan); + + // xor only one argument pair shall have a geoIndex + if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ + return false; + } + + LOG(OBILEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; + + std::pair* constantPair; + GeoIndexInfo info; + if(result1){ + info = std::move(result1.get()); + constantPair = &argPair2; + } else { + info = std::move(result2.get()); + constantPair = &argPair1; + } + + LOG(OBILEVEL) << " attributes: " << info._longitude[0] + << ", " << info._longitude + << " of collection:" << info._collection->getName() + << " are geoindexed"; + + //break; //remove this to make use of the index + + auto cnode = info._collectionNode; + auto& idxPtr = info._index; + + std::unique_ptr condition; + if(functionArguments->numMembers() == 4){ + condition = buildGeoCondition(plan,info, constantPair->first, constantPair->second); + } else { + condition = buildGeoCondition(plan,info, constantPair->first, constantPair->second, functionArguments->getMember(4)); + } + + auto inode = new IndexNode( + plan, plan->nextId(), cnode->vocbase(), + cnode->collection(), cnode->outVariable(), + std::vector{Transaction::IndexHandle{idxPtr}}, + condition.get(), asc); + plan->registerNode(inode); + condition.release(); + + plan->unlinkNode(node); + plan->replaceNode(cnode,inode); + + //signal that plan has been changed + return true; +}; + + +AstNode const* identifyGeoOptimizationCandidate(bool sort, ExecutionPlan* plan, ExecutionNode* n){ + if(sort){ + auto node = static_cast(n); + auto const& elements = node->getElements(); + + // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion + if ( !(elements.size() == 1 && elements[0].second)) { + return nullptr; + } + + //variable of sort expression + auto const variable = elements[0].first; + TRI_ASSERT(variable != nullptr); + + //// find the expression that is bound to the variable + // get the expression node that holds the cacluation + auto setter = plan->getVarSetBy(variable->id); + if (setter == nullptr || setter->getType() != EN::CALCULATION) { + return nullptr; + } + + // downcast to calculation node and get expression + auto cn = static_cast(setter); + auto const expression = cn->expression(); + + // the expression must exist and it must be a function call + if (expression == nullptr || expression->node() == nullptr || + expression->node()->type != NODE_TYPE_FCALL) { + // not the right type of node + return nullptr; + } + + //get the ast node of the expression + AstNode const* funcNode = expression->node(); + auto func = static_cast(funcNode->getData()); + + // we're looking for "DISTANCE()", which is a function call + // with an empty parameters array + if ( func->externalName != "DISTANCE" || funcNode->numMembers() != 1 ) { + return nullptr; + } + return funcNode; + } else { + return nullptr; + } + + +}; + + void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule) { @@ -4039,108 +4152,15 @@ void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, plan->findNodesOfType(nodes, EN::SORT, true); for (auto const& n : nodes) { - auto node = static_cast(n); - auto const& elements = node->getElements(); - - // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion - if ( !(elements.size() == 1 && elements[0].second)) { + auto funcNode = identifyGeoOptimizationCandidate(true, plan, n); + if(!funcNode){ continue; } - - //variable of sort expression - auto const variable = elements[0].first; - TRI_ASSERT(variable != nullptr); - - //// find the expression that is bound to the variable - // get the expression node that holds the cacluation - auto setter = plan->getVarSetBy(variable->id); - if (setter == nullptr || setter->getType() != EN::CALCULATION) { - continue; - } - - // downcast to calculation node and get expression - auto cn = static_cast(setter); - auto const expression = cn->expression(); - - // the expression must exist and it must be a function call - if (expression == nullptr || expression->node() == nullptr || - expression->node()->type != NODE_TYPE_FCALL) { - // not the right type of node - continue; - } - - //get the ast node of the expression - AstNode const* funcNode = expression->node(); - auto func = static_cast(funcNode->getData()); - - // we're looking for "DISTANCE()", which is a function call - // with an empty parameters array - if ( func->externalName != "DISTANCE" || funcNode->numMembers() != 1 ) { - continue; - } - LOG(OBILEVEL) << " FOUND DISTANCE RULE"; - - auto const& functionArguments = funcNode->getMember(0); - if(functionArguments->numMembers() < 4){ - continue; + if (applyGeoOptimization(true, plan, n, funcNode, true)){ + modified = true; } - - std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; - std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; - - auto result1 = geoDistanceFunctionArgCheck(argPair1, node, plan); - auto result2 = geoDistanceFunctionArgCheck(argPair2, node, plan); - - // xor only one argument pair shall have a geoIndex - if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ - continue; - } - - LOG(OBILEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; - - std::pair* constantPair; - GeoIndexInfo info; - if(result1){ - info = std::move(result1.get()); - constantPair = &argPair2; - } else { - info = std::move(result2.get()); - constantPair = &argPair1; - } - - LOG(OBILEVEL) << " attributes: " << info._longitude[0] - << ", " << info._longitude - << " of collection:" << info._collection->getName() - << " are geoindexed"; - - //break; //remove this to make use of the index - - auto cnode = info._collectionNode; - auto& idxPtr = info._index; - - std::unique_ptr condition; - if(functionArguments->numMembers() == 4){ - condition = buildGeoCondition(plan,info, constantPair->first, constantPair->second); - } else { - condition = buildGeoCondition(plan,info, constantPair->first, constantPair->second, functionArguments->getMember(4)); - } - - auto inode = new IndexNode( - plan, plan->nextId(), cnode->vocbase(), - cnode->collection(), cnode->outVariable(), - std::vector{Transaction::IndexHandle{idxPtr}}, - condition.get(), !elements[0].second); - plan->registerNode(inode); - condition.release(); - - plan->unlinkNode(n); - plan->replaceNode(cnode,inode); - - //signal that plan has been changed - modified=true; } - opt->addPlan(plan, rule, modified); LOG(OBILEVEL) << "EXIT GEO RULE"; From e619ef3e4e5907f781d0a59133a4bb4222811f2e Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Thu, 1 Dec 2016 12:21:51 +0100 Subject: [PATCH 08/53] now inspect sort and filter nodes --- arangod/Aql/OptimizerRules.cpp | 116 ++++++++++++++++++++------------- 1 file changed, 71 insertions(+), 45 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index f818e5cd82..8b1243fd72 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3959,6 +3959,9 @@ std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& return condition; } + + +// GEO RULES ////////////////////////////////////////////////////////////////// // TODO - remove debug code #ifdef OBIDEBUG #define OBILEVEL ERR @@ -4089,70 +4092,82 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, ExecutionNode* node, A }; -AstNode const* identifyGeoOptimizationCandidate(bool sort, ExecutionPlan* plan, ExecutionNode* n){ - if(sort){ - auto node = static_cast(n); - auto const& elements = node->getElements(); +AstNode const* identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, ExecutionPlan* plan, ExecutionNode* n){ + ExecutionNode* setter = nullptr; - // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion - if ( !(elements.size() == 1 && elements[0].second)) { - return nullptr; + switch(type){ + case EN::SORT: { + auto node = static_cast(n); + auto const& elements = node->getElements(); + + // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion + if ( !(elements.size() == 1 && elements[0].second)) { + return nullptr; + } + + //variable of sort expression + auto const variable = elements[0].first; + TRI_ASSERT(variable != nullptr); + + //// find the expression that is bound to the variable + // get the expression node that holds the cacluation + setter = plan->getVarSetBy(variable->id); } + break; - //variable of sort expression - auto const variable = elements[0].first; - TRI_ASSERT(variable != nullptr); + case EN::FILTER: { + auto node = static_cast(n); - //// find the expression that is bound to the variable - // get the expression node that holds the cacluation - auto setter = plan->getVarSetBy(variable->id); - if (setter == nullptr || setter->getType() != EN::CALCULATION) { - return nullptr; + // filter nodes always have one input variable + auto varsUsedHere = node->getVariablesUsedHere(); + TRI_ASSERT(varsUsedHere.size() == 1); + + // now check who introduced our variable + auto variable = varsUsedHere[0]; + setter = plan->getVarSetBy(variable->id); } + break; - // downcast to calculation node and get expression - auto cn = static_cast(setter); - auto const expression = cn->expression(); - - // the expression must exist and it must be a function call - if (expression == nullptr || expression->node() == nullptr || - expression->node()->type != NODE_TYPE_FCALL) { - // not the right type of node + default: return nullptr; - } + } - //get the ast node of the expression - AstNode const* funcNode = expression->node(); - auto func = static_cast(funcNode->getData()); + if (setter == nullptr || setter->getType() != EN::CALCULATION) { + return nullptr; + } + // downcast to calculation node and get expression + auto cn = static_cast(setter); + auto const expression = cn->expression(); - // we're looking for "DISTANCE()", which is a function call - // with an empty parameters array - if ( func->externalName != "DISTANCE" || funcNode->numMembers() != 1 ) { - return nullptr; - } - return funcNode; - } else { + // the expression must exist and it must be a function call + if (expression == nullptr || expression->node() == nullptr || + expression->node()->type != NODE_TYPE_FCALL) { + // not the right type of node return nullptr; } + //get the ast node of the expression + AstNode const* funcNode = expression->node(); + auto func = static_cast(funcNode->getData()); + // we're looking for "DISTANCE()", which is a function call + // with an empty parameters array + if ( func->externalName != "DISTANCE" || funcNode->numMembers() != 1 ) { + return nullptr; + } + return funcNode; + + + + return nullptr; }; - -void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, - ExecutionPlan* plan, - Optimizer::Rule const* rule) { - - LOG(OBILEVEL) << "ENTER GEO RULE"; - +void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* plan, bool& modified){ SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; - bool modified = false; - plan->findNodesOfType(nodes, EN::SORT, true); - for (auto const& n : nodes) { - auto funcNode = identifyGeoOptimizationCandidate(true, plan, n); + auto funcNode = identifyGeoOptimizationCandidate(EN::SORT, plan, n); if(!funcNode){ continue; } @@ -4161,6 +4176,17 @@ void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, modified = true; } } +} + +void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, + ExecutionPlan* plan, + Optimizer::Rule const* rule) { + + LOG(OBILEVEL) << "ENTER GEO RULE"; + + bool modified = false; + checkNodesForGeoOptimization(EN::SORT, plan, modified); + checkNodesForGeoOptimization(EN::FILTER, plan, modified); opt->addPlan(plan, rule, modified); LOG(OBILEVEL) << "EXIT GEO RULE"; From 8bb719c6151bfa3c52172f97292b1f755c7e925a Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Thu, 1 Dec 2016 15:59:10 +0100 Subject: [PATCH 09/53] add first tests for geoindex --- arangod/Aql/OptimizerRules.cpp | 5 +- js/server/tests/aql/aql-optimizer-geoindex.js | 194 ++++++++++++++++++ 2 files changed, 195 insertions(+), 4 deletions(-) create mode 100644 js/server/tests/aql/aql-optimizer-geoindex.js diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 8b1243fd72..4250fa1fba 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4155,11 +4155,8 @@ AstNode const* identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Ex if ( func->externalName != "DISTANCE" || funcNode->numMembers() != 1 ) { return nullptr; } + return funcNode; - - - - return nullptr; }; void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* plan, bool& modified){ diff --git a/js/server/tests/aql/aql-optimizer-geoindex.js b/js/server/tests/aql/aql-optimizer-geoindex.js new file mode 100644 index 0000000000..612a6cbfbd --- /dev/null +++ b/js/server/tests/aql/aql-optimizer-geoindex.js @@ -0,0 +1,194 @@ +/*jshint globalstrict:false, strict:false, maxlen: 500 */ +/*global assertEqual, assertFalse, assertTrue, assertNotEqual, AQL_EXPLAIN, AQL_EXECUTE */ + +//////////////////////////////////////////////////////////////////////////////// +/// @brief tests for optimizer rules +/// +/// @file +/// +/// DISCLAIMER +/// +/// Copyright 2010-2012 triagens GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Jan Christoph Uhde +/// @author Copyright 2016, ArangoDB GmbH, Cologne, Germany +//////////////////////////////////////////////////////////////////////////////// + +var internal = require("internal"); +var jsunity = require("jsunity"); +var helper = require("@arangodb/aql-helper"); +var isEqual = helper.isEqual; +var findExecutionNodes = helper.findExecutionNodes; +var findReferencedNodes = helper.findReferencedNodes; +var getQueryMultiplePlansAndExecutions = helper.getQueryMultiplePlansAndExecutions; +var removeAlwaysOnClusterRules = helper.removeAlwaysOnClusterRules; + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test suite +//////////////////////////////////////////////////////////////////////////////// + +function optimizerRuleTestSuite() { + var ruleName = "use-geoindex"; + var secondRuleName = "use-geoindexes"; + var removeCalculationNodes = "remove-unnecessary-calculations-2"; + var colName = "UnitTestsAqlOptimizer" + ruleName.replace(/-/g, "_"); + var colNameOther = colName + "_XX"; + + // various choices to control the optimizer: + var paramNone = { optimizer: { rules: [ "-all" ] } }; + var paramIndexFromSort = { optimizer: { rules: [ "-all", "+" + ruleName ] } }; + var paramIndexRange = { optimizer: { rules: [ "-all", "+" + secondRuleName ] } }; + var paramIndexFromSort_IndexRange = { optimizer: { rules: [ "-all", "+" + ruleName, "+" + secondRuleName ] } }; + var paramIndexFromSort_IndexRange_RemoveCalculations = { + optimizer: { rules: [ "-all", "+" + ruleName, "+" + secondRuleName, "+" + removeCalculationNodes ] } + }; + var paramIndexFromSort_RemoveCalculations = { + optimizer: { rules: [ "-all", "+" + ruleName, "+" + removeCalculationNodes ] } + }; + + var geocol; + var sortArray = function (l, r) { + if (l[0] !== r[0]) { + return l[0] < r[0] ? -1 : 1; + } + if (l[1] !== r[1]) { + return l[1] < r[1] ? -1 : 1; + } + return 0; + }; + var hasSortNode = function (plan) { + assertEqual(findExecutionNodes(plan, "SortNode").length, 1, "Has SortNode"); + }; + var hasNoSortNode = function (plan) { + assertEqual(findExecutionNodes(plan, "SortNode").length, 0, "Has no SortNode"); + }; + var hasNoIndexNode = function (plan) { + assertEqual(findExecutionNodes(plan, "IndexNode").length, 0, "Has no IndexNode"); + }; + var hasNoResultsNode = function (plan) { + assertEqual(findExecutionNodes(plan, "NoResultsNode").length, 1, "Has NoResultsNode"); + }; + var hasCalculationNodes = function (plan, countXPect) { + assertEqual(findExecutionNodes(plan, "CalculationNode").length, + countXPect, "Has " + countXPect + " CalculationNode"); + }; + var hasIndexNode = function (plan) { + var rn = findExecutionNodes(plan, "IndexNode"); + assertEqual(rn.length, 1, "Has IndexNode"); + return; + }; + var isNodeType = function(node, type) { + assertEqual(node.type, type, "check whether this node is of type "+type); + }; + + return { + + //////////////////////////////////////////////////////////////////////////////// + /// @brief set up + //////////////////////////////////////////////////////////////////////////////// + + setUp : function () { + var loopto = 10; + + internal.db._drop(colName); + geocol = internal.db._create(colName); + geocol.ensureIndex({type:"geo", fields:["lat","lon"]}) + for (lat=-40; lat <=40 ; ++lat){ + for (lon=-40; lon <= 40; ++lon){ + geocol.insert({lat,lon}); + } + } + }, + + //////////////////////////////////////////////////////////////////////////////// + /// @brief tear down + //////////////////////////////////////////////////////////////////////////////// + + tearDown : function () { + internal.db._drop(colName); + internal.db._drop(colNameOther); + geocol = null; + }, + + testRuleBasics : function () { + geocol.ensureIndex({ type: "hash", fields: [ "y", "z" ], unique: false }); + + var queries = [ + //query clust sort filter + [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) RETURN 1", false, false, false ], + [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) RETURN 1", false, false, false ] +// [ "FOR d IN geocol FILTER distance(d.lat,d.lon, 0 ,0 ) < 1 RETURN 1", false, false, false ] + ]; + + queries.forEach(function(query) { + var result = AQL_EXPLAIN(query[0]); + + // //optimized on cluster + // if (query[1]) { + // assertNotEqual(-1, removeAlwaysOnClusterRules(result.plan.rules).indexOf(ruleName), query[0]); + // } + // else { + // assertEqual(-1, removeAlwaysOnClusterRules(result.plan.rules).indexOf(ruleName), query[0]); + // } + + //sort nodes + if (query[2]) { + hasSortNode(result); + } else { + hasNoSortNode(result); + } + + //filter nodes + if (query[2]) { + hasSortNode(result); + } else { + hasNoSortNode(result); + } + + }); + }, // testRuleBasics + + testRuleSort : function () { + geocol.ensureIndex({ type: "hash", fields: [ "y", "z" ], unique: false }); + + var queries = [ + //query clust sort filter + [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) RETURN 5", false, false, false ], + ]; + + var expected = [ + [(0,0), (-1,0), (0,1), (1,0), (0,-1)] + ] + + queries.forEach(function(query, qindex) { + var result = AQL_EXECUTE(query[0]); + for(var rindex=0; rindex < result.size; rindex){ + assertEqual(expected[qindex][rindex], (result.lat , result.lon)); + } + }); + } // testRuleSort + + }; // test dictionary (return) +} // optimizerRuleTestSuite + +//////////////////////////////////////////////////////////////////////////////// +/// @brief executes the test suite +//////////////////////////////////////////////////////////////////////////////// + +jsunity.run(optimizerRuleTestSuite); + +return jsunity.done(); From 56614ac8c811b79e55cde95c4b7b34e16a4ce239 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Thu, 1 Dec 2016 16:30:09 +0100 Subject: [PATCH 10/53] switch unittests to chai and add failing test for FILTER condition --- arangod/Aql/OptimizerRules.cpp | 1 + js/server/tests/aql/aql-optimizer-geoindex.js | 32 +++++++++++++------ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 4250fa1fba..d0c796138b 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4102,6 +4102,7 @@ AstNode const* identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Ex // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion if ( !(elements.size() == 1 && elements[0].second)) { + //test on second makes sure the SORT is ascending return nullptr; } diff --git a/js/server/tests/aql/aql-optimizer-geoindex.js b/js/server/tests/aql/aql-optimizer-geoindex.js index 612a6cbfbd..987230bc31 100644 --- a/js/server/tests/aql/aql-optimizer-geoindex.js +++ b/js/server/tests/aql/aql-optimizer-geoindex.js @@ -1,6 +1,9 @@ /*jshint globalstrict:false, strict:false, maxlen: 500 */ /*global assertEqual, assertFalse, assertTrue, assertNotEqual, AQL_EXPLAIN, AQL_EXECUTE */ +// execute with: +// ./scripts/unittest shell_server_aql --test js/server/tests/aql/aql-optimizer-geoindex.js + //////////////////////////////////////////////////////////////////////////////// /// @brief tests for optimizer rules /// @@ -28,6 +31,7 @@ /// @author Copyright 2016, ArangoDB GmbH, Cologne, Germany //////////////////////////////////////////////////////////////////////////////// +const expect = require('chai').expect; var internal = require("internal"); var jsunity = require("jsunity"); var helper = require("@arangodb/aql-helper"); @@ -76,6 +80,12 @@ function optimizerRuleTestSuite() { var hasNoSortNode = function (plan) { assertEqual(findExecutionNodes(plan, "SortNode").length, 0, "Has no SortNode"); }; + var hasFilterNode = function (plan) { + assertEqual(findExecutionNodes(plan, "FilterNode").length, 1, "Has FilterNode"); + }; + var hasNoFilterNode = function (plan) { + assertEqual(findExecutionNodes(plan, "FilterNode").length, 0, "Has no FilterNode"); + }; var hasNoIndexNode = function (plan) { assertEqual(findExecutionNodes(plan, "IndexNode").length, 0, "Has no IndexNode"); }; @@ -129,8 +139,9 @@ function optimizerRuleTestSuite() { var queries = [ //query clust sort filter - [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) RETURN 1", false, false, false ], - [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) RETURN 1", false, false, false ] + [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC RETURN 1", false, false, false ], + [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC RETURN 1", false, false, false ], + [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 RETURN 1", false, false, false ], // [ "FOR d IN geocol FILTER distance(d.lat,d.lon, 0 ,0 ) < 1 RETURN 1", false, false, false ] ]; @@ -153,10 +164,10 @@ function optimizerRuleTestSuite() { } //filter nodes - if (query[2]) { - hasSortNode(result); + if (query[3]) { + hasFilterNode(result); } else { - hasNoSortNode(result); + hasNoFilterNode(result); } }); @@ -166,18 +177,21 @@ function optimizerRuleTestSuite() { geocol.ensureIndex({ type: "hash", fields: [ "y", "z" ], unique: false }); var queries = [ - //query clust sort filter - [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) RETURN 5", false, false, false ], + [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC RETURN 5", false, false, false ], + [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC RETURN 5", false, false, false ], + [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 2 RETURN 5", false, false, false ], ]; var expected = [ - [(0,0), (-1,0), (0,1), (1,0), (0,-1)] + [(0,0), (-1,0), (0,1), (1,0), (0,-1)], + [(0,0), (-1,0), (0,1), (1,0), (0,-1)], + [(0,0), (-1,0), (0,1), (1,0), (0,-1)], ] queries.forEach(function(query, qindex) { var result = AQL_EXECUTE(query[0]); for(var rindex=0; rindex < result.size; rindex){ - assertEqual(expected[qindex][rindex], (result.lat , result.lon)); + expect.expected[qindex][rindex].to.be.equal((result.lat , result.lon)); } }); } // testRuleSort From 08ef943c83c7cebb1d9c64da87f530aed5bd8ded Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Thu, 1 Dec 2016 23:01:50 +0100 Subject: [PATCH 11/53] fix tests --- js/server/tests/aql/aql-optimizer-geoindex.js | 106 ++++++++++-------- 1 file changed, 58 insertions(+), 48 deletions(-) diff --git a/js/server/tests/aql/aql-optimizer-geoindex.js b/js/server/tests/aql/aql-optimizer-geoindex.js index 987230bc31..55d80c2b88 100644 --- a/js/server/tests/aql/aql-optimizer-geoindex.js +++ b/js/server/tests/aql/aql-optimizer-geoindex.js @@ -46,6 +46,12 @@ var removeAlwaysOnClusterRules = helper.removeAlwaysOnClusterRules; //////////////////////////////////////////////////////////////////////////////// function optimizerRuleTestSuite() { + // quickly disable tests here + var enabled = { + basics : true, + sort : true + } + var ruleName = "use-geoindex"; var secondRuleName = "use-geoindexes"; var removeCalculationNodes = "remove-unnecessary-calculations-2"; @@ -135,65 +141,69 @@ function optimizerRuleTestSuite() { }, testRuleBasics : function () { - geocol.ensureIndex({ type: "hash", fields: [ "y", "z" ], unique: false }); + if(enabled.basics){ + geocol.ensureIndex({ type: "hash", fields: [ "y", "z" ], unique: false }); - var queries = [ - //query clust sort filter - [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC RETURN 1", false, false, false ], - [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC RETURN 1", false, false, false ], - [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 RETURN 1", false, false, false ], -// [ "FOR d IN geocol FILTER distance(d.lat,d.lon, 0 ,0 ) < 1 RETURN 1", false, false, false ] - ]; + var queries = [ + //query clust sort filter + [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 1 RETURN d", false, false, false ], + [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 1 RETURN d", false, false, false ], + //[ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d", false, false, false ], + ]; - queries.forEach(function(query) { - var result = AQL_EXPLAIN(query[0]); + queries.forEach(function(query) { + var result = AQL_EXPLAIN(query[0]); - // //optimized on cluster - // if (query[1]) { - // assertNotEqual(-1, removeAlwaysOnClusterRules(result.plan.rules).indexOf(ruleName), query[0]); - // } - // else { - // assertEqual(-1, removeAlwaysOnClusterRules(result.plan.rules).indexOf(ruleName), query[0]); - // } + // //optimized on cluster + // if (query[1]) { + // assertNotEqual(-1, removeAlwaysOnClusterRules(result.plan.rules).indexOf(ruleName), query[0]); + // } + // else { + // assertEqual(-1, removeAlwaysOnClusterRules(result.plan.rules).indexOf(ruleName), query[0]); + // } - //sort nodes - if (query[2]) { - hasSortNode(result); - } else { - hasNoSortNode(result); - } + //sort nodes + if (query[2]) { + hasSortNode(result); + } else { + hasNoSortNode(result); + } - //filter nodes - if (query[3]) { - hasFilterNode(result); - } else { - hasNoFilterNode(result); - } + //filter nodes + if (query[3]) { + hasFilterNode(result); + } else { + hasNoFilterNode(result); + } - }); + }); + } }, // testRuleBasics testRuleSort : function () { - geocol.ensureIndex({ type: "hash", fields: [ "y", "z" ], unique: false }); + if(enabled.sort){ + var queries = [ + [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 5 RETURN d", false, false, false ], + [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 5 RETURN d", false, false, false ], + [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 2 RETURN d", false, false, false ], + ]; - var queries = [ - [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC RETURN 5", false, false, false ], - [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC RETURN 5", false, false, false ], - [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 2 RETURN 5", false, false, false ], - ]; + var expected = [ + [[0,0], [-1,0], [0,1], [1,0], [0,-1]], + [[0,0], [-1,0], [0,1], [1,0], [0,-1]], + [[0,0], [-1,0], [0,1], [1,0], [0,-1]], + ] - var expected = [ - [(0,0), (-1,0), (0,1), (1,0), (0,-1)], - [(0,0), (-1,0), (0,1), (1,0), (0,-1)], - [(0,0), (-1,0), (0,1), (1,0), (0,-1)], - ] - - queries.forEach(function(query, qindex) { - var result = AQL_EXECUTE(query[0]); - for(var rindex=0; rindex < result.size; rindex){ - expect.expected[qindex][rindex].to.be.equal((result.lat , result.lon)); - } - }); + queries.forEach(function(query, qindex) { + var result = AQL_EXECUTE(query[0]); + pairs = result.json.map(function(res){ + return [res.lat,res.lon]; + }); + internal.print(pairs) + assertEqual(expected[qindex].sort(),pairs.sort()) + //expect(expected[qindex].sort()).to.be.equal(result.json.sort()) + }); + } } // testRuleSort }; // test dictionary (return) From 56b6be851cc4dfa2a41a62d17259daebc8d052a4 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Fri, 2 Dec 2016 10:44:42 +0100 Subject: [PATCH 12/53] add functions providing capability to check ast for parts of geoindex rules --- arangod/Aql/OptimizerRules.cpp | 177 +++++++++++++----- js/server/tests/aql/aql-optimizer-geoindex.js | 1 + 2 files changed, 129 insertions(+), 49 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index d0c796138b..d713efffee 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3918,21 +3918,32 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, opt->addPlan(plan, rule, modified); } - - - -struct GeoIndexInfo { - EnumerateCollectionNode* _collectionNode; - Collection const* _collection; - std::shared_ptr _index; - std::vector _longitude; - std::vector _latitude; +struct GeoIndexInfo{ + operator bool() const { return node; } + GeoIndexInfo() + : collectionNode(nullptr) + , collection(nullptr) + , node(nullptr) + , index(nullptr) + , range(0) + , within(false) + , lessgreaterequal(false) + {} + EnumerateCollectionNode* collectionNode; + Collection const* collection; + AstNode const* node; + std::shared_ptr index; + double range; + bool within; + bool lessgreaterequal; + std::vector longitude; + std::vector latitude; }; std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info, AstNode* lat, AstNode* lon, AstNode* withRange = nullptr){ auto ast = plan->getAst(); - auto varAstNode = ast->createNodeReference(info._collectionNode->outVariable()); + auto varAstNode = ast->createNodeReference(info.collectionNode->outVariable()); auto nAryAnd = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_AND); nAryAnd->reserve(withRange ? 3 : 2); @@ -3969,7 +3980,7 @@ std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& #define OBILEVEL TRACE #endif static boost::optional -geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionNode* ex, ExecutionPlan* plan){ +geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionNode* ex, ExecutionPlan* plan, GeoIndexInfo info){ using SV = std::vector; LOG(OBILEVEL) << " enter argument check"; // first and second should be based on the same document - need to provide the document @@ -4020,7 +4031,12 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionN //check access paths of attribues in ast and those in index match if( index.fieldNames()[0] == accessPath1 && index.fieldNames()[1] == accessPath2 ){ - return GeoIndexInfo{collNode, coll, indexShardPtr, std::move(accessPath1), std::move(accessPath2) }; + info.collectionNode = collNode; + info.collection = coll; + info.index = indexShardPtr; + info.longitude = std::move(accessPath1); + info.latitude = std::move(accessPath2); + return info; } } } @@ -4030,8 +4046,8 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionN } -bool applyGeoOptimization(bool near, ExecutionPlan* plan, ExecutionNode* node, AstNode const* funcNode, bool asc){ - auto const& functionArguments = funcNode->getMember(0); +bool applyGeoOptimization(bool near, ExecutionPlan* plan, ExecutionNode* node, GeoIndexInfo& info, bool asc){ + auto const& functionArguments = info.node->getMember(0); if(functionArguments->numMembers() < 4){ return false; } @@ -4039,8 +4055,8 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, ExecutionNode* node, A std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; - auto result1 = geoDistanceFunctionArgCheck(argPair1, node, plan); - auto result2 = geoDistanceFunctionArgCheck(argPair2, node, plan); + auto result1 = geoDistanceFunctionArgCheck(argPair1, node, plan, info); + auto result2 = geoDistanceFunctionArgCheck(argPair2, node, plan, info); // xor only one argument pair shall have a geoIndex if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ @@ -4050,30 +4066,30 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, ExecutionNode* node, A LOG(OBILEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; std::pair* constantPair; - GeoIndexInfo info; + GeoIndexInfo res; if(result1){ - info = std::move(result1.get()); + res = std::move(result1.get()); constantPair = &argPair2; } else { - info = std::move(result2.get()); + res = std::move(result2.get()); constantPair = &argPair1; } - LOG(OBILEVEL) << " attributes: " << info._longitude[0] - << ", " << info._longitude - << " of collection:" << info._collection->getName() + LOG(OBILEVEL) << " attributes: " << res.longitude[0] + << ", " << res.longitude + << " of collection:" << res.collection->getName() << " are geoindexed"; //break; //remove this to make use of the index - auto cnode = info._collectionNode; - auto& idxPtr = info._index; + auto cnode = res.collectionNode; + auto& idxPtr = res.index; std::unique_ptr condition; if(functionArguments->numMembers() == 4){ - condition = buildGeoCondition(plan,info, constantPair->first, constantPair->second); + condition = buildGeoCondition(plan,res, constantPair->first, constantPair->second); } else { - condition = buildGeoCondition(plan,info, constantPair->first, constantPair->second, functionArguments->getMember(4)); + condition = buildGeoCondition(plan,res, constantPair->first, constantPair->second, functionArguments->getMember(4)); } auto inode = new IndexNode( @@ -4092,9 +4108,65 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, ExecutionNode* node, A }; -AstNode const* identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, ExecutionPlan* plan, ExecutionNode* n){ - ExecutionNode* setter = nullptr; +GeoIndexInfo isDistanceFunction(AstNode const* node){ + // the expression must exist and it must be a function call + auto rv = GeoIndexInfo{}; + if(node->type != NODE_TYPE_FCALL) { + return rv; + } + + //get the ast node of the expression + auto func = static_cast(node->getData()); + + // we're looking for "DISTANCE()", which is a function call + // with an empty parameters array + if ( func->externalName != "DISTANCE" || node->numMembers() != 1 ) { + return rv; + } + rv.node = node; + return rv; +} + +GeoIndexInfo isGeoFilterExpression(AstNode const* node){ + // binary compare must be on top + auto rv = GeoIndexInfo{}; + if( node->type != NODE_TYPE_OPERATOR_BINARY_GE + && node->type != NODE_TYPE_OPERATOR_BINARY_GT + && node->type != NODE_TYPE_OPERATOR_BINARY_LE + && node->type != NODE_TYPE_OPERATOR_BINARY_LT) { + return rv; + } + + // binary expression has 2 members + if(node->numMembers() != 2){ + return rv; + } + + auto first = node->getMember(0); + auto second = node->getMember(0); + + auto first_dist_fun = isDistanceFunction(first); + if(first_dist_fun && true){ + first_dist_fun.within = true; + first_dist_fun.range = 1.0; //fixme + return first_dist_fun; + } + + auto second_dist_fun = isDistanceFunction(second); + if (second_dist_fun && true){ + second_dist_fun.within = true; + second_dist_fun.range = 1.0; //fixme + return second_dist_fun; + } + + return rv; +} + + +GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, ExecutionPlan* plan, ExecutionNode* n){ + ExecutionNode* setter = nullptr; + auto rv = GeoIndexInfo{}; switch(type){ case EN::SORT: { auto node = static_cast(n); @@ -4103,7 +4175,7 @@ AstNode const* identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Ex // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion if ( !(elements.size() == 1 && elements[0].second)) { //test on second makes sure the SORT is ascending - return nullptr; + return rv; } //variable of sort expression @@ -4130,34 +4202,41 @@ AstNode const* identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Ex break; default: - return nullptr; + return rv; } + + // common part - extract astNode from setter witch is a calculation node if (setter == nullptr || setter->getType() != EN::CALCULATION) { - return nullptr; + return rv; } // downcast to calculation node and get expression auto cn = static_cast(setter); auto const expression = cn->expression(); - // the expression must exist and it must be a function call - if (expression == nullptr || expression->node() == nullptr || - expression->node()->type != NODE_TYPE_FCALL) { + // the expression must exist and it must have an astNode + if (expression == nullptr || expression->node() == nullptr){ // not the right type of node - return nullptr; + return rv; + } + AstNode const* node = expression->node(); + + + switch(type){ + case EN::SORT: { + return isDistanceFunction(node); + } + break; + + case EN::FILTER: { + return isGeoFilterExpression(node); + } + break; + + default: + return rv; } - //get the ast node of the expression - AstNode const* funcNode = expression->node(); - auto func = static_cast(funcNode->getData()); - - // we're looking for "DISTANCE()", which is a function call - // with an empty parameters array - if ( func->externalName != "DISTANCE" || funcNode->numMembers() != 1 ) { - return nullptr; - } - - return funcNode; }; void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* plan, bool& modified){ @@ -4165,12 +4244,12 @@ void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* p SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::SORT, true); for (auto const& n : nodes) { - auto funcNode = identifyGeoOptimizationCandidate(EN::SORT, plan, n); - if(!funcNode){ + auto geoRequestDescripton = identifyGeoOptimizationCandidate(EN::SORT, plan, n); + if(!geoRequestDescripton){ continue; } LOG(OBILEVEL) << " FOUND DISTANCE RULE"; - if (applyGeoOptimization(true, plan, n, funcNode, true)){ + if (applyGeoOptimization(true, plan, n, geoRequestDescripton, true)){ modified = true; } } diff --git a/js/server/tests/aql/aql-optimizer-geoindex.js b/js/server/tests/aql/aql-optimizer-geoindex.js index 55d80c2b88..811066bff4 100644 --- a/js/server/tests/aql/aql-optimizer-geoindex.js +++ b/js/server/tests/aql/aql-optimizer-geoindex.js @@ -196,6 +196,7 @@ function optimizerRuleTestSuite() { queries.forEach(function(query, qindex) { var result = AQL_EXECUTE(query[0]); + expect(expected[qindex].length).to.be.equal(result.json.length) pairs = result.json.map(function(res){ return [res.lat,res.lon]; }); From fab9af483ec209f00b96355fc71d09749268d3c5 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Fri, 2 Dec 2016 11:00:39 +0100 Subject: [PATCH 13/53] add extra log topic for development --- lib/Logger/LogTopic.cpp | 1 + lib/Logger/Logger.h | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/Logger/LogTopic.cpp b/lib/Logger/LogTopic.cpp index f0a553b056..a3581b4430 100644 --- a/lib/Logger/LogTopic.cpp +++ b/lib/Logger/LogTopic.cpp @@ -47,6 +47,7 @@ LogTopic Logger::COMMUNICATION("communication", LogLevel::INFO); LogTopic Logger::COMPACTOR("compactor"); LogTopic Logger::CONFIG("config"); LogTopic Logger::DATAFILES("datafiles", LogLevel::INFO); +LogTopic Logger::DEVEL("development", LogLevel::DEBUG); LogTopic Logger::GRAPHS("graphs", LogLevel::INFO); LogTopic Logger::HEARTBEAT("heartbeat", LogLevel::INFO); LogTopic Logger::MMAP("mmap"); diff --git a/lib/Logger/Logger.h b/lib/Logger/Logger.h index 2fd7776ff6..7d85c32a4e 100644 --- a/lib/Logger/Logger.h +++ b/lib/Logger/Logger.h @@ -129,12 +129,13 @@ class Logger { public: static LogTopic AGENCY; static LogTopic AGENCYCOMM; - static LogTopic COLLECTOR; - static LogTopic COMPACTOR; - static LogTopic COMMUNICATION; - static LogTopic CONFIG; static LogTopic CLUSTER; + static LogTopic COLLECTOR; + static LogTopic COMMUNICATION; + static LogTopic COMPACTOR; + static LogTopic CONFIG; static LogTopic DATAFILES; + static LogTopic DEVEL; static LogTopic GRAPHS; static LogTopic HEARTBEAT; static LogTopic MMAP; From f27a22db063de6a129bc185201f4939b12876804 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Fri, 2 Dec 2016 11:20:09 +0100 Subject: [PATCH 14/53] fix debug code --- arangod/Aql/OptimizerRules.cpp | 39 ++++++++++++++++------------------ arangod/Indexes/GeoIndex.cpp | 20 ++++++++--------- 2 files changed, 28 insertions(+), 31 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index d713efffee..411e1e725f 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3918,6 +3918,9 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, opt->addPlan(plan, rule, modified); } + + +// GEO RULES ////////////////////////////////////////////////////////////////// struct GeoIndexInfo{ operator bool() const { return node; } GeoIndexInfo() @@ -3970,23 +3973,14 @@ std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& return condition; } - - -// GEO RULES ////////////////////////////////////////////////////////////////// -// TODO - remove debug code -#ifdef OBIDEBUG - #define OBILEVEL ERR -#else - #define OBILEVEL TRACE -#endif static boost::optional geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionNode* ex, ExecutionPlan* plan, GeoIndexInfo info){ using SV = std::vector; - LOG(OBILEVEL) << " enter argument check"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << " enter argument check"; // first and second should be based on the same document - need to provide the document // in order to see which collection is bound to it and if that collections supports geo-index if( !pair.first->isAttributeAccessForVariable() || !pair.second->isAttributeAccessForVariable()){ - LOG(OBILEVEL) << " not both args are of type attribute access"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << " not both args are of type attribute access"; return boost::none; } @@ -3997,14 +3991,14 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionN SV accessPath1{pair.first->getString()}; SV accessPath2{pair.second->getString()}; - LOG(OBILEVEL) << " got setter"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << " got setter"; if(setter1 == setter2){ if(setter1->getType() == EN::ENUMERATE_COLLECTION){ auto collNode = reinterpret_cast(setter1); auto coll = collNode->collection(); //what kind of indexes does it have on what attributes auto lcoll = coll->getCollection(); // TODO - check collection for suitable geo-indexes - LOG(OBILEVEL) << " SETTER IS ENUMERATE_COLLECTION: " << coll->getName(); + LOG_TOPIC(DEBUG, Logger::DEVEL) << " SETTER IS ENUMERATE_COLLECTION: " << coll->getName(); for(auto indexShardPtr : lcoll->getIndexes()){ // get real index arangodb::Index& index = *indexShardPtr.get(); @@ -4015,7 +4009,7 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionN continue; } -#ifdef OBIDEBUG + ///////////////////////////////////////////////// //FIXME - REMOVE DEBUG CODE LATER auto vecs = std::vector>{index.fieldNames(), std::vector{accessPath1, accessPath2}}; for(auto vec : vecs ){ @@ -4027,7 +4021,7 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionN std::cout << std::endl; } } -#endif + ///////////////////////////////////////////////// //check access paths of attribues in ast and those in index match if( index.fieldNames()[0] == accessPath1 && index.fieldNames()[1] == accessPath2 ){ @@ -4063,7 +4057,7 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, ExecutionNode* node, G return false; } - LOG(OBILEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; std::pair* constantPair; GeoIndexInfo res; @@ -4075,7 +4069,7 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, ExecutionNode* node, G constantPair = &argPair1; } - LOG(OBILEVEL) << " attributes: " << res.longitude[0] + LOG_TOPIC(DEBUG, Logger::DEVEL) << " attributes: " << res.longitude[0] << ", " << res.longitude << " of collection:" << res.collection->getName() << " are geoindexed"; @@ -4124,6 +4118,7 @@ GeoIndexInfo isDistanceFunction(AstNode const* node){ if ( func->externalName != "DISTANCE" || node->numMembers() != 1 ) { return rv; } + LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND DISTANCE FUNCTION"; rv.node = node; return rv; } @@ -4150,6 +4145,7 @@ GeoIndexInfo isGeoFilterExpression(AstNode const* node){ if(first_dist_fun && true){ first_dist_fun.within = true; first_dist_fun.range = 1.0; //fixme + LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND WITHIN"; return first_dist_fun; } @@ -4157,6 +4153,7 @@ GeoIndexInfo isGeoFilterExpression(AstNode const* node){ if (second_dist_fun && true){ second_dist_fun.within = true; second_dist_fun.range = 1.0; //fixme + LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND WITHIN"; return second_dist_fun; } @@ -4248,7 +4245,7 @@ void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* p if(!geoRequestDescripton){ continue; } - LOG(OBILEVEL) << " FOUND DISTANCE RULE"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND NEAR OR WITHIN"; if (applyGeoOptimization(true, plan, n, geoRequestDescripton, true)){ modified = true; } @@ -4259,14 +4256,14 @@ void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule) { - LOG(OBILEVEL) << "ENTER GEO RULE"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER GEO RULE"; bool modified = false; checkNodesForGeoOptimization(EN::SORT, plan, modified); checkNodesForGeoOptimization(EN::FILTER, plan, modified); opt->addPlan(plan, rule, modified); - LOG(OBILEVEL) << "EXIT GEO RULE"; - LOG(OBILEVEL) << ""; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << ""; } diff --git a/arangod/Indexes/GeoIndex.cpp b/arangod/Indexes/GeoIndex.cpp index 57dbcf0b0e..c8a014a73b 100644 --- a/arangod/Indexes/GeoIndex.cpp +++ b/arangod/Indexes/GeoIndex.cpp @@ -57,18 +57,18 @@ GeoIndexIterator::GeoIndexIterator(LogicalCollection* collection, } void GeoIndexIterator::evaluateCondition() { - LOG(ERR) << "ENTER evaluate Condition"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER evaluate Condition"; if (_condition) { - LOG(ERR) << "The Condition is"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "The Condition is"; _condition->dump(0); auto numMembers = _condition->numMembers(); if(numMembers >= 2){ _lat = _condition->getMember(0)->getMember(1)->getDoubleValue(); - LOG(ERR) << "lat: " << _lat; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "lat: " << _lat; _lon = _condition->getMember(1)->getMember(1)->getDoubleValue(); - LOG(ERR) << "lon: " << _lon; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "lon: " << _lon; } if (numMembers == 2){ //near @@ -77,18 +77,18 @@ void GeoIndexIterator::evaluateCondition() { _near = false; _withinRange = _condition->getMember(2)->getMember(1)->getDoubleValue(); } else { - LOG(ERR) << "Invalid Number of arguments"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "Invalid Number of arguments"; } } else { - LOG(ERR) << "No Condition passed to constructor"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "No Condition passed to constructor"; } - LOG(ERR) << "EXIT evaluate Condition"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT evaluate Condition"; } IndexLookupResult GeoIndexIterator::next() { - LOG(ERR) << "ENTER next"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER next"; if (!_cursor){ createCursor(_lat,_lon); } @@ -105,7 +105,7 @@ IndexLookupResult GeoIndexIterator::next() { } void GeoIndexIterator::nextBabies(std::vector& result, size_t batchSize) { - LOG(ERR) << "ENTER nextBabies"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER nextBabies"; if (!_cursor){ createCursor(_lat,_lon); } @@ -126,7 +126,7 @@ void GeoIndexIterator::nextBabies(std::vector& result, size_t } } } - LOG(ERR) << "EXIT nextBabies"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT nextBabies"; } ::GeoCursor* GeoIndexIterator::replaceCursor(::GeoCursor* c){ From e0220be12d11a23328410aa8cbad6bc6c867a860 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Fri, 2 Dec 2016 11:53:31 +0100 Subject: [PATCH 15/53] stronger split between identification of candidates and application of geo index rule --- arangod/Aql/OptimizerRules.cpp | 46 +++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 411e1e725f..d217b3dca5 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3925,20 +3925,24 @@ struct GeoIndexInfo{ operator bool() const { return node; } GeoIndexInfo() : collectionNode(nullptr) + , sortOrFilterNode(nullptr) , collection(nullptr) , node(nullptr) , index(nullptr) , range(0) , within(false) , lessgreaterequal(false) + , invalid(false) {} EnumerateCollectionNode* collectionNode; + ExecutionNode* sortOrFilterNode; Collection const* collection; AstNode const* node; std::shared_ptr index; double range; bool within; bool lessgreaterequal; + bool invalid; //use it std::vector longitude; std::vector latitude; }; @@ -4040,7 +4044,7 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionN } -bool applyGeoOptimization(bool near, ExecutionPlan* plan, ExecutionNode* node, GeoIndexInfo& info, bool asc){ +bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info, bool asc){ auto const& functionArguments = info.node->getMember(0); if(functionArguments->numMembers() < 4){ return false; @@ -4049,8 +4053,8 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, ExecutionNode* node, G std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; - auto result1 = geoDistanceFunctionArgCheck(argPair1, node, plan, info); - auto result2 = geoDistanceFunctionArgCheck(argPair2, node, plan, info); + auto result1 = geoDistanceFunctionArgCheck(argPair1, info.sortOrFilterNode, plan, info); + auto result2 = geoDistanceFunctionArgCheck(argPair2, info.sortOrFilterNode, plan, info); // xor only one argument pair shall have a geoIndex if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ @@ -4094,7 +4098,7 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, ExecutionNode* node, G plan->registerNode(inode); condition.release(); - plan->unlinkNode(node); + plan->unlinkNode(info.sortOrFilterNode); plan->replaceNode(cnode,inode); //signal that plan has been changed @@ -4148,7 +4152,7 @@ GeoIndexInfo isGeoFilterExpression(AstNode const* node){ LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND WITHIN"; return first_dist_fun; } - + auto second_dist_fun = isDistanceFunction(second); if (second_dist_fun && true){ second_dist_fun.within = true; @@ -4221,34 +4225,35 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec switch(type){ case EN::SORT: { - return isDistanceFunction(node); + rv = isDistanceFunction(node); + rv.sortOrFilterNode = n; } break; case EN::FILTER: { - return isGeoFilterExpression(node); + rv = isGeoFilterExpression(node); + rv.sortOrFilterNode = n; } break; default: - return rv; + ; } + return rv; }; -void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* plan, bool& modified){ +void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* plan, std::vector& infos){ SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; - plan->findNodesOfType(nodes, EN::SORT, true); + plan->findNodesOfType(nodes, type, true); for (auto const& n : nodes) { - auto geoRequestDescripton = identifyGeoOptimizationCandidate(EN::SORT, plan, n); - if(!geoRequestDescripton){ + auto geoIndexInfo = identifyGeoOptimizationCandidate(type, plan, n); + if(!geoIndexInfo){ continue; } + infos.push_back(std::move(geoIndexInfo)); LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND NEAR OR WITHIN"; - if (applyGeoOptimization(true, plan, n, geoRequestDescripton, true)){ - modified = true; - } } } @@ -4258,9 +4263,16 @@ void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER GEO RULE"; + std::vector infos; + checkNodesForGeoOptimization(EN::SORT, plan, infos); + checkNodesForGeoOptimization(EN::FILTER, plan, infos); + bool modified = false; - checkNodesForGeoOptimization(EN::SORT, plan, modified); - checkNodesForGeoOptimization(EN::FILTER, plan, modified); + for(auto& info : infos){ + if (applyGeoOptimization(true, plan, info, true)){ + modified = true; + } + } opt->addPlan(plan, rule, modified); LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE"; From 72d4790c68087bdafc6cff653647e56b966e41cf Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Fri, 2 Dec 2016 12:22:07 +0100 Subject: [PATCH 16/53] remove boost::optional and make use of nodetype --- arangod/Aql/OptimizerRules.cpp | 42 ++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index d217b3dca5..4bff4e767a 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3922,27 +3922,30 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, // GEO RULES ////////////////////////////////////////////////////////////////// struct GeoIndexInfo{ - operator bool() const { return node; } + operator bool() const { return node && valid; } + void invalidate() { valid = false; } GeoIndexInfo() : collectionNode(nullptr) - , sortOrFilterNode(nullptr) + , executionNode(nullptr) , collection(nullptr) , node(nullptr) , index(nullptr) , range(0) + , executionNodeType(EN::ILLEGAL) , within(false) , lessgreaterequal(false) - , invalid(false) + , valid(true) {} EnumerateCollectionNode* collectionNode; - ExecutionNode* sortOrFilterNode; + ExecutionNode* executionNode; Collection const* collection; AstNode const* node; std::shared_ptr index; double range; + ExecutionNode::NodeType executionNodeType; bool within; bool lessgreaterequal; - bool invalid; //use it + bool valid; //use it std::vector longitude; std::vector latitude; }; @@ -3977,15 +3980,16 @@ std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& return condition; } -static boost::optional -geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionNode* ex, ExecutionPlan* plan, GeoIndexInfo info){ +GeoIndexInfo +geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionPlan* plan, GeoIndexInfo info){ using SV = std::vector; LOG_TOPIC(DEBUG, Logger::DEVEL) << " enter argument check"; // first and second should be based on the same document - need to provide the document // in order to see which collection is bound to it and if that collections supports geo-index if( !pair.first->isAttributeAccessForVariable() || !pair.second->isAttributeAccessForVariable()){ LOG_TOPIC(DEBUG, Logger::DEVEL) << " not both args are of type attribute access"; - return boost::none; + info.invalidate(); + return info; } // expect access of the for doc.attribute @@ -4040,7 +4044,8 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionN } } - return boost::none; + info.invalidate(); + return info; } @@ -4053,8 +4058,8 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info, bo std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; - auto result1 = geoDistanceFunctionArgCheck(argPair1, info.sortOrFilterNode, plan, info); - auto result2 = geoDistanceFunctionArgCheck(argPair2, info.sortOrFilterNode, plan, info); + auto result1 = geoDistanceFunctionArgCheck(argPair1, plan, info); + auto result2 = geoDistanceFunctionArgCheck(argPair2, plan, info); // xor only one argument pair shall have a geoIndex if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ @@ -4066,10 +4071,10 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info, bo std::pair* constantPair; GeoIndexInfo res; if(result1){ - res = std::move(result1.get()); + res = std::move(result1); constantPair = &argPair2; } else { - res = std::move(result2.get()); + res = std::move(result2); constantPair = &argPair1; } @@ -4098,7 +4103,9 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info, bo plan->registerNode(inode); condition.release(); - plan->unlinkNode(info.sortOrFilterNode); + if(info.executionNodeType == EN::SORT){ + plan->unlinkNode(info.executionNode); + } plan->replaceNode(cnode,inode); //signal that plan has been changed @@ -4226,20 +4233,21 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec switch(type){ case EN::SORT: { rv = isDistanceFunction(node); - rv.sortOrFilterNode = n; } break; case EN::FILTER: { rv = isGeoFilterExpression(node); - rv.sortOrFilterNode = n; } break; default: - ; + rv.invalidate(); // not required but make sure the result is invalid } + rv.executionNode = n; + rv.executionNodeType = type; + return rv; }; From d858b9aa71b93b856bd24664ce53632115ff56b4 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Fri, 2 Dec 2016 16:15:23 +0100 Subject: [PATCH 17/53] further implement within --- arangod/Aql/OptimizerRules.cpp | 101 +++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 44 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 4bff4e767a..5ecd5b3070 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3927,7 +3927,6 @@ struct GeoIndexInfo{ GeoIndexInfo() : collectionNode(nullptr) , executionNode(nullptr) - , collection(nullptr) , node(nullptr) , index(nullptr) , range(0) @@ -3936,18 +3935,17 @@ struct GeoIndexInfo{ , lessgreaterequal(false) , valid(true) {} - EnumerateCollectionNode* collectionNode; - ExecutionNode* executionNode; - Collection const* collection; - AstNode const* node; - std::shared_ptr index; - double range; - ExecutionNode::NodeType executionNodeType; - bool within; - bool lessgreaterequal; - bool valid; //use it - std::vector longitude; - std::vector latitude; + EnumerateCollectionNode* collectionNode; // node that will be replaced by (geo) IndexNode + ExecutionNode* executionNode; // start node hat is a sort or filter + AstNode const* node; // AstNode that contains the sort/filter condition + std::shared_ptr index; //pointer to geoindex + AstNode const* range; // range for within + ExecutionNode::NodeType executionNodeType; // type of execution node sort or filter + bool within; // is this a within lookup + bool lessgreaterequal; // is this a check for le/ge (true) or lt/gt (false) + bool valid; // contains this node a valid condition + std::vector longitude; // access path to longitude + std::vector latitude; // access path to latitude }; std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info, @@ -4031,10 +4029,9 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionP } ///////////////////////////////////////////////// - //check access paths of attribues in ast and those in index match + //check access paths of attributes in ast and those in index match if( index.fieldNames()[0] == accessPath1 && index.fieldNames()[1] == accessPath2 ){ info.collectionNode = collNode; - info.collection = coll; info.index = indexShardPtr; info.longitude = std::move(accessPath1); info.latitude = std::move(accessPath2); @@ -4050,6 +4047,9 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionP bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info, bool asc){ + + // FIXME - this code should go to the candidate finding ///////////////////// + // get it running first auto const& functionArguments = info.node->getMember(0); if(functionArguments->numMembers() < 4){ return false; @@ -4080,13 +4080,12 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info, bo LOG_TOPIC(DEBUG, Logger::DEVEL) << " attributes: " << res.longitude[0] << ", " << res.longitude - << " of collection:" << res.collection->getName() + << " of collection:" << res.collectionNode->collection()->getName() << " are geoindexed"; //break; //remove this to make use of the index - - auto cnode = res.collectionNode; - auto& idxPtr = res.index; + + // FIXME - END ////////////////////////////////////////////////////////////// std::unique_ptr condition; if(functionArguments->numMembers() == 4){ @@ -4096,9 +4095,9 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info, bo } auto inode = new IndexNode( - plan, plan->nextId(), cnode->vocbase(), - cnode->collection(), cnode->outVariable(), - std::vector{Transaction::IndexHandle{idxPtr}}, + plan, plan->nextId(), res.collectionNode->vocbase(), + res.collectionNode->collection(), res.collectionNode->outVariable(), + std::vector{Transaction::IndexHandle{res.index}}, condition.get(), asc); plan->registerNode(inode); condition.release(); @@ -4106,13 +4105,16 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info, bo if(info.executionNodeType == EN::SORT){ plan->unlinkNode(info.executionNode); } - plan->replaceNode(cnode,inode); + plan->replaceNode(res.collectionNode,inode); //signal that plan has been changed return true; }; - +AstNode const* isValueOrRefNode(AstNode const* node){ + //TODO - implement me + return node; +} GeoIndexInfo isDistanceFunction(AstNode const* node){ // the expression must exist and it must be a function call @@ -4136,12 +4138,21 @@ GeoIndexInfo isDistanceFunction(AstNode const* node){ GeoIndexInfo isGeoFilterExpression(AstNode const* node){ // binary compare must be on top + bool dist_first = true; + bool lessEqual = true; auto rv = GeoIndexInfo{}; if( node->type != NODE_TYPE_OPERATOR_BINARY_GE && node->type != NODE_TYPE_OPERATOR_BINARY_GT && node->type != NODE_TYPE_OPERATOR_BINARY_LE && node->type != NODE_TYPE_OPERATOR_BINARY_LT) { return rv; + } else { + if (node->type == NODE_TYPE_OPERATOR_BINARY_GE || node->type == NODE_TYPE_OPERATOR_BINARY_GT){ + dist_first = false; + } + } + if (node->type == NODE_TYPE_OPERATOR_BINARY_GT || node->type == NODE_TYPE_OPERATOR_BINARY_LT){ + lessEqual = false; } // binary expression has 2 members @@ -4150,22 +4161,23 @@ GeoIndexInfo isGeoFilterExpression(AstNode const* node){ } auto first = node->getMember(0); - auto second = node->getMember(0); + auto second = node->getMember(1); - auto first_dist_fun = isDistanceFunction(first); - if(first_dist_fun && true){ - first_dist_fun.within = true; - first_dist_fun.range = 1.0; //fixme - LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND WITHIN"; - return first_dist_fun; - } + auto eval_stuff = [](bool dist_first, bool lessEqual, GeoIndexInfo&& dist_fun, AstNode const* value_node){ + if (!dist_first && dist_fun && value_node){ + dist_fun.within = true; + dist_fun.range = value_node; //FIXME + dist_fun.lessgreaterequal = lessEqual; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND WITHIN"; + } else { + dist_fun.invalidate(); + } + return dist_fun; + }; - auto second_dist_fun = isDistanceFunction(second); - if (second_dist_fun && true){ - second_dist_fun.within = true; - second_dist_fun.range = 1.0; //fixme - LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND WITHIN"; - return second_dist_fun; + rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(first), isValueOrRefNode(second)); + if (!rv) { + rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(second), isValueOrRefNode(first)); } return rv; @@ -4175,6 +4187,7 @@ GeoIndexInfo isGeoFilterExpression(AstNode const* node){ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, ExecutionPlan* plan, ExecutionNode* n){ ExecutionNode* setter = nullptr; auto rv = GeoIndexInfo{}; + //TODO - iterate over elements of conjunction / disjunction switch(type){ case EN::SORT: { auto node = static_cast(n); @@ -4191,7 +4204,7 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec TRI_ASSERT(variable != nullptr); //// find the expression that is bound to the variable - // get the expression node that holds the cacluation + // get the expression node that holds the calculation setter = plan->getVarSetBy(variable->id); } break; @@ -4274,16 +4287,16 @@ void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, std::vector infos; checkNodesForGeoOptimization(EN::SORT, plan, infos); checkNodesForGeoOptimization(EN::FILTER, plan, infos); - + bool modified = false; for(auto& info : infos){ - if (applyGeoOptimization(true, plan, info, true)){ - modified = true; - } + if (applyGeoOptimization(true, plan, info, true)){ + modified = true; + break; // break on first replacement - might be relaxed later + } } opt->addPlan(plan, rule, modified); LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE"; LOG_TOPIC(DEBUG, Logger::DEVEL) << ""; } - From 9d04b37e58661c15908505d92e4a4f222ca50a63 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 5 Dec 2016 12:46:23 +0100 Subject: [PATCH 18/53] within queries are now working --- arangod/Aql/OptimizerRules.cpp | 64 +++++++++++++------ arangod/Indexes/GeoIndex.cpp | 29 +++++---- arangod/Indexes/GeoIndex.h | 2 +- js/server/tests/aql/aql-optimizer-geoindex.js | 5 +- 4 files changed, 64 insertions(+), 36 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 5ecd5b3070..5b67a0c11c 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3929,7 +3929,7 @@ struct GeoIndexInfo{ , executionNode(nullptr) , node(nullptr) , index(nullptr) - , range(0) + , range(nullptr) , executionNodeType(EN::ILLEGAL) , within(false) , lessgreaterequal(false) @@ -3949,12 +3949,12 @@ struct GeoIndexInfo{ }; std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info, - AstNode* lat, AstNode* lon, AstNode* withRange = nullptr){ + AstNode* lat, AstNode* lon, bool lessEqual = false, AstNode const* withRange = nullptr){ auto ast = plan->getAst(); auto varAstNode = ast->createNodeReference(info.collectionNode->outVariable()); auto nAryAnd = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_AND); - nAryAnd->reserve(withRange ? 3 : 2); + nAryAnd->reserve(withRange ? 4 : 2); auto latKey = ast->createNodeAttributeAccess(varAstNode, "latitude",8); auto latEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, latKey, lat); @@ -3968,6 +3968,11 @@ std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& auto withKey = ast->createNodeAttributeAccess(varAstNode, "within",6); auto withEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, withKey, withRange); nAryAnd->addMember(withEq); + + auto lessKey = ast->createNodeAttributeAccess(varAstNode, "lesseq",6); + auto lessValue = ast->createNodeValueBool(lessEqual); + auto lessEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, lessKey, lessValue); + nAryAnd->addMember(lessEq); } auto unAryOr = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_OR, nAryAnd); @@ -3978,8 +3983,7 @@ std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& return condition; } -GeoIndexInfo -geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionPlan* plan, GeoIndexInfo info){ +GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionPlan* plan, GeoIndexInfo info){ using SV = std::vector; LOG_TOPIC(DEBUG, Logger::DEVEL) << " enter argument check"; // first and second should be based on the same document - need to provide the document @@ -4046,7 +4050,7 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionP } -bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info, bool asc){ +bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ // FIXME - this code should go to the candidate finding ///////////////////// // get it running first @@ -4066,7 +4070,7 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info, bo return false; } - LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; std::pair* constantPair; GeoIndexInfo res; @@ -4078,27 +4082,27 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info, bo constantPair = &argPair1; } - LOG_TOPIC(DEBUG, Logger::DEVEL) << " attributes: " << res.longitude[0] - << ", " << res.longitude - << " of collection:" << res.collectionNode->collection()->getName() - << " are geoindexed"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << " attributes: " << res.longitude[0] + // << ", " << res.longitude + // << " of collection:" << res.collectionNode->collection()->getName() + // << " are geoindexed"; //break; //remove this to make use of the index // FIXME - END ////////////////////////////////////////////////////////////// std::unique_ptr condition; - if(functionArguments->numMembers() == 4){ - condition = buildGeoCondition(plan,res, constantPair->first, constantPair->second); + if(info.within){ + condition = buildGeoCondition(plan,res, constantPair->first, constantPair->second, info.lessgreaterequal, info.range); } else { - condition = buildGeoCondition(plan,res, constantPair->first, constantPair->second, functionArguments->getMember(4)); + condition = buildGeoCondition(plan,res, constantPair->first, constantPair->second); } auto inode = new IndexNode( plan, plan->nextId(), res.collectionNode->vocbase(), res.collectionNode->collection(), res.collectionNode->outVariable(), std::vector{Transaction::IndexHandle{res.index}}, - condition.get(), asc); + condition.get(), false); plan->registerNode(inode); condition.release(); @@ -4131,7 +4135,7 @@ GeoIndexInfo isDistanceFunction(AstNode const* node){ if ( func->externalName != "DISTANCE" || node->numMembers() != 1 ) { return rv; } - LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND DISTANCE FUNCTION"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND DISTANCE FUNCTION"; rv.node = node; return rv; } @@ -4145,6 +4149,8 @@ GeoIndexInfo isGeoFilterExpression(AstNode const* node){ && node->type != NODE_TYPE_OPERATOR_BINARY_GT && node->type != NODE_TYPE_OPERATOR_BINARY_LE && node->type != NODE_TYPE_OPERATOR_BINARY_LT) { + + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "expression does not contain <,<=,>=,>"; return rv; } else { if (node->type == NODE_TYPE_OPERATOR_BINARY_GE || node->type == NODE_TYPE_OPERATOR_BINARY_GT){ @@ -4155,30 +4161,40 @@ GeoIndexInfo isGeoFilterExpression(AstNode const* node){ lessEqual = false; } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "binary operator found"; // binary expression has 2 members if(node->numMembers() != 2){ return rv; } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "operator has 2 members"; auto first = node->getMember(0); auto second = node->getMember(1); + node->dump(0); + auto eval_stuff = [](bool dist_first, bool lessEqual, GeoIndexInfo&& dist_fun, AstNode const* value_node){ - if (!dist_first && dist_fun && value_node){ + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "1: " << dist_first; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "2: " << (bool)dist_fun; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "3: " << (bool)value_node; + if (dist_first && dist_fun && value_node){ dist_fun.within = true; dist_fun.range = value_node; //FIXME dist_fun.lessgreaterequal = lessEqual; - LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND WITHIN"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND WITHIN"; } else { dist_fun.invalidate(); } return dist_fun; }; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "frist check"; rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(first), isValueOrRefNode(second)); if (!rv) { + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "second check"; rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(second), isValueOrRefNode(first)); } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "result " << (bool) rv; return rv; } @@ -4188,8 +4204,10 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec ExecutionNode* setter = nullptr; auto rv = GeoIndexInfo{}; //TODO - iterate over elements of conjunction / disjunction + LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER IDENTIFY"; switch(type){ case EN::SORT: { + LOG_TOPIC(DEBUG, Logger::DEVEL) << "found sort node"; auto node = static_cast(n); auto const& elements = node->getElements(); @@ -4210,6 +4228,7 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec break; case EN::FILTER: { + LOG_TOPIC(DEBUG, Logger::DEVEL) << "found filter node"; auto node = static_cast(n); // filter nodes always have one input variable @@ -4231,6 +4250,8 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec if (setter == nullptr || setter->getType() != EN::CALCULATION) { return rv; } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "found setter node for calcuation"; + // downcast to calculation node and get expression auto cn = static_cast(setter); auto const expression = cn->expression(); @@ -4243,6 +4264,7 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec AstNode const* node = expression->node(); + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "checking expression of calcaulation"; switch(type){ case EN::SORT: { rv = isDistanceFunction(node); @@ -4274,7 +4296,7 @@ void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* p continue; } infos.push_back(std::move(geoIndexInfo)); - LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND NEAR OR WITHIN"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND NEAR OR WITHIN"; } } @@ -4290,13 +4312,13 @@ void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, bool modified = false; for(auto& info : infos){ - if (applyGeoOptimization(true, plan, info, true)){ + if (applyGeoOptimization(true, plan, info)){ modified = true; break; // break on first replacement - might be relaxed later } } opt->addPlan(plan, rule, modified); - LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; LOG_TOPIC(DEBUG, Logger::DEVEL) << ""; } diff --git a/arangod/Indexes/GeoIndex.cpp b/arangod/Indexes/GeoIndex.cpp index c8a014a73b..c60362bc72 100644 --- a/arangod/Indexes/GeoIndex.cpp +++ b/arangod/Indexes/GeoIndex.cpp @@ -48,7 +48,7 @@ GeoIndexIterator::GeoIndexIterator(LogicalCollection* collection, _lon(0), _near(true), _withinRange(0), - _withinInverse(false) + _withinLessEq(false) // lookup will hold the inforamtion if this is a cursor for // near/within and the reference point //_lookups(trx, node, reference, index->fields()), @@ -57,11 +57,9 @@ GeoIndexIterator::GeoIndexIterator(LogicalCollection* collection, } void GeoIndexIterator::evaluateCondition() { - LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER evaluate Condition"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER evaluate Condition"; if (_condition) { - LOG_TOPIC(DEBUG, Logger::DEVEL) << "The Condition is"; - _condition->dump(0); auto numMembers = _condition->numMembers(); if(numMembers >= 2){ @@ -73,22 +71,24 @@ void GeoIndexIterator::evaluateCondition() { if (numMembers == 2){ //near _near = true; - } else if (numMembers == 3) { //within + LOG_TOPIC(DEBUG, Logger::DEVEL) << "INDEX CONFIGURED FOR NEAR"; + } else { //within _near = false; _withinRange = _condition->getMember(2)->getMember(1)->getDoubleValue(); - } else { - LOG_TOPIC(DEBUG, Logger::DEVEL) << "Invalid Number of arguments"; + _withinLessEq = _condition->getMember(3)->getMember(1)->getDoubleValue(); + + LOG_TOPIC(DEBUG, Logger::DEVEL) << "INDEX CONFIGURED FOR WITHIN with range " << _withinRange; } } else { - LOG_TOPIC(DEBUG, Logger::DEVEL) << "No Condition passed to constructor"; + LOG(ERR) << "No Condition passed to GeoIndexIterator constructor"; } - LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT evaluate Condition"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT evaluate Condition"; } IndexLookupResult GeoIndexIterator::next() { - LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER next"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER next"; if (!_cursor){ createCursor(_lat,_lon); } @@ -105,7 +105,7 @@ IndexLookupResult GeoIndexIterator::next() { } void GeoIndexIterator::nextBabies(std::vector& result, size_t batchSize) { - LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER nextBabies"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER nextBabies " << batchSize; if (!_cursor){ createCursor(_lat,_lon); } @@ -114,19 +114,24 @@ void GeoIndexIterator::nextBabies(std::vector& result, size_t if (batchSize > 0) { auto coords = std::unique_ptr(::GeoIndex_ReadCursor(_cursor,batchSize)); size_t length = coords ? coords->length : 0; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "length " << length; if (!length){ return; } + for(std::size_t index = 0; index < length; ++index){ + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "near " << _near << " max allowed range: " << _withinRange + // << " actual range: " << GeoIndex_distance(&_coor, &coords->coordinates[index]) ; if (_near || GeoIndex_distance(&_coor, &coords->coordinates[index]) <= _withinRange ){ + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "add above to result" ; result.emplace_back(IndexLookupResult(::GeoIndex::toRevision(coords->coordinates[index].data))); } else { break; } } } - LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT nextBabies"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT nextBabies " << result.size(); } ::GeoCursor* GeoIndexIterator::replaceCursor(::GeoCursor* c){ diff --git a/arangod/Indexes/GeoIndex.h b/arangod/Indexes/GeoIndex.h index 0cc9d80ce4..ca0195d7cd 100644 --- a/arangod/Indexes/GeoIndex.h +++ b/arangod/Indexes/GeoIndex.h @@ -76,7 +76,7 @@ class GeoIndexIterator final : public IndexIterator { double _lon; bool _near; double _withinRange; - double _withinInverse; + double _withinLessEq; }; class GeoIndex final : public Index { diff --git a/js/server/tests/aql/aql-optimizer-geoindex.js b/js/server/tests/aql/aql-optimizer-geoindex.js index 811066bff4..c38fe4435e 100644 --- a/js/server/tests/aql/aql-optimizer-geoindex.js +++ b/js/server/tests/aql/aql-optimizer-geoindex.js @@ -148,7 +148,7 @@ function optimizerRuleTestSuite() { //query clust sort filter [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 1 RETURN d", false, false, false ], [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 1 RETURN d", false, false, false ], - //[ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d", false, false, false ], + [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d", false, false, true ], ]; queries.forEach(function(query) { @@ -185,7 +185,8 @@ function optimizerRuleTestSuite() { var queries = [ [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 5 RETURN d", false, false, false ], [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 5 RETURN d", false, false, false ], - [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 2 RETURN d", false, false, false ], + [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 111200 RETURN d", false, false, false ], +// [ "FOR i IN 1..2 FOR d IN geocol SORT distance(i,2,d.lat,d.lon) ASC LIMIT 5 RETURN d", false, false, false ], ]; var expected = [ From 9b4e01eb3bf32f5da88934702efb92e5eee21a38 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 5 Dec 2016 13:17:01 +0100 Subject: [PATCH 19/53] move geoIndexRule to a positon before inexes have been touched by optimizer --- arangod/Aql/Optimizer.cpp | 4 ++-- arangod/Aql/Optimizer.h | 5 +++-- arangod/Aql/OptimizerRules.cpp | 2 +- arangod/Aql/OptimizerRules.h | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arangod/Aql/Optimizer.cpp b/arangod/Aql/Optimizer.cpp index c61731fc01..a42eaf142a 100644 --- a/arangod/Aql/Optimizer.cpp +++ b/arangod/Aql/Optimizer.cpp @@ -486,8 +486,8 @@ void Optimizer::setupRules() { patchUpdateStatementsRule_pass9, DoesNotCreateAdditionalPlans, true); // patch update statements - registerRule("geo-index-optimizer", optimizeGeoIndexRule, - geoDistanceRule, DoesNotCreateAdditionalPlans, true); + registerRule("geo-index-optimizer", geoIndexRule, + applyGeoIndexRule, DoesNotCreateAdditionalPlans, true); if (arangodb::ServerState::instance()->isCoordinator()) { // distribute operations in cluster diff --git a/arangod/Aql/Optimizer.h b/arangod/Aql/Optimizer.h index b47755135b..502b173602 100644 --- a/arangod/Aql/Optimizer.h +++ b/arangod/Aql/Optimizer.h @@ -145,6 +145,8 @@ class Optimizer { // remove redundant OR conditions removeRedundantOrRule_pass6 = 820, + applyGeoIndexRule = 1060, + useIndexesRule_pass6 = 830, // try to remove filters covered by index ranges @@ -192,9 +194,8 @@ class Optimizer { removeUnnecessaryRemoteScatterRule_pass10 = 1040, // recognize that a RemoveNode can be moved to the shards - undistributeRemoveAfterEnumCollRule_pass10 = 1050, + undistributeRemoveAfterEnumCollRule_pass10 = 1050 - geoDistanceRule = 1060 }; public: diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 5b67a0c11c..64565e6d7c 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4300,7 +4300,7 @@ void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* p } } -void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, +void arangodb::aql::geoIndexRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule) { diff --git a/arangod/Aql/OptimizerRules.h b/arangod/Aql/OptimizerRules.h index e0659d0547..30392be37d 100644 --- a/arangod/Aql/OptimizerRules.h +++ b/arangod/Aql/OptimizerRules.h @@ -198,7 +198,7 @@ void prepareTraversalsRule(Optimizer* opt, ExecutionPlan* plan, /// @brief moves simple subqueries one level higher void inlineSubqueriesRule(Optimizer*, ExecutionPlan*, Optimizer::Rule const*); -void optimizeGeoIndexRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule); +void geoIndexRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule); } // namespace aql } // namespace arangodb From 5dcf61c9badf604f01e0c251638546e2773aafb0 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 5 Dec 2016 14:00:14 +0100 Subject: [PATCH 20/53] now work to a certain degree with binaray/naray-and in FILTER/SORT condition --- arangod/Aql/OptimizerRules.cpp | 34 ++++++++++++++++---- js/server/tests/aql/aql-optimizer-indexes.js | 2 +- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 64565e6d7c..526f2eae12 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4052,8 +4052,7 @@ GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pai bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ - // FIXME - this code should go to the candidate finding ///////////////////// - // get it running first + // FIXME -- technical debt -- this code should go to the candidate finding ///////////////////// auto const& functionArguments = info.node->getMember(0); if(functionArguments->numMembers() < 4){ return false; @@ -4088,7 +4087,7 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ // << " are geoindexed"; //break; //remove this to make use of the index - + // FIXME - END ////////////////////////////////////////////////////////////// std::unique_ptr condition; @@ -4250,7 +4249,7 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec if (setter == nullptr || setter->getType() != EN::CALCULATION) { return rv; } - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "found setter node for calcuation"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "found setter node for calcuation"; // downcast to calculation node and get expression auto cn = static_cast(setter); @@ -4264,19 +4263,40 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec AstNode const* node = expression->node(); - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "checking expression of calcaulation"; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "checking expression of calcaulation"; + + //FIXME -- technical debt -- code duplication / not all cases covered switch(type){ case EN::SORT: { - rv = isDistanceFunction(node); + //iterate && and find frist candidate - this gets way more complex if we want to check more + auto ntype = node->type; + if ( ntype == NODE_TYPE_OPERATOR_BINARY_AND || ntype == NODE_TYPE_OPERATOR_NARY_AND){ + for(std::size_t i = 0; i < node->numMembers(); ++i){ + rv = isDistanceFunction(node->getMember(i)); + if(rv) break; + } + } else { + rv = isDistanceFunction(node); + } } break; case EN::FILTER: { - rv = isGeoFilterExpression(node); + //iterate && and find frist candidate - this gets way more complex if we want to check more + auto ntype = node->type; + if ( ntype == NODE_TYPE_OPERATOR_BINARY_AND || ntype == NODE_TYPE_OPERATOR_NARY_AND){ + for(std::size_t i = 0; i < node->numMembers(); ++i){ + rv = isGeoFilterExpression(node->getMember(i)); + if(rv) break; + } + } else { + rv = isGeoFilterExpression(node); + } } break; default: + LOG_TOPIC(DEBUG, Logger::DEVEL) << "expression is not valid for geoindex"; rv.invalidate(); // not required but make sure the result is invalid } diff --git a/js/server/tests/aql/aql-optimizer-indexes.js b/js/server/tests/aql/aql-optimizer-indexes.js index a793ac8796..87f633b34d 100644 --- a/js/server/tests/aql/aql-optimizer-indexes.js +++ b/js/server/tests/aql/aql-optimizer-indexes.js @@ -408,7 +408,7 @@ function optimizerIndexesTestSuite () { assertEqual("SingletonNode", nodeTypes[0], query); assertNotEqual(-1, nodeTypes.indexOf("IndexNode"), query); - + var results = AQL_EXECUTE(query); assertEqual([ 12 ], results.json, query); assertEqual(0, results.stats.scannedFull); From b0b660eb851d41ac25f41c98843acfc49bc5cc59 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 5 Dec 2016 15:19:06 +0100 Subject: [PATCH 21/53] add iterative preorder walk for and conditions --- arangod/Aql/OptimizerRules.cpp | 48 ++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 526f2eae12..fb60529e87 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4139,6 +4139,32 @@ GeoIndexInfo isDistanceFunction(AstNode const* node){ return rv; } +GeoIndexInfo iterativePreorderWithCondition(AstNode const* root, GeoIndexInfo(*condition)(AstNode const*)){ + // returns on first hit + if (!root){ + return GeoIndexInfo{}; + } + + std::vector nodestack; + nodestack.push_back(root); + + while(nodestack.size()){ + AstNode const* current = nodestack.back(); + nodestack.pop_back(); + GeoIndexInfo rv = condition(current); + if (rv) { + return rv; + } + + if (current->type == NODE_TYPE_OPERATOR_BINARY_AND || current->type == NODE_TYPE_OPERATOR_NARY_AND ){ + for (std::size_t i = 0; i < current->numMembers(); ++i){ + nodestack.push_back(current->getMember(i)); + } + } + } + return GeoIndexInfo{}; +} + GeoIndexInfo isGeoFilterExpression(AstNode const* node){ // binary compare must be on top bool dist_first = true; @@ -4268,30 +4294,12 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec //FIXME -- technical debt -- code duplication / not all cases covered switch(type){ case EN::SORT: { - //iterate && and find frist candidate - this gets way more complex if we want to check more - auto ntype = node->type; - if ( ntype == NODE_TYPE_OPERATOR_BINARY_AND || ntype == NODE_TYPE_OPERATOR_NARY_AND){ - for(std::size_t i = 0; i < node->numMembers(); ++i){ - rv = isDistanceFunction(node->getMember(i)); - if(rv) break; - } - } else { - rv = isDistanceFunction(node); - } + rv = isDistanceFunction(node); } break; case EN::FILTER: { - //iterate && and find frist candidate - this gets way more complex if we want to check more - auto ntype = node->type; - if ( ntype == NODE_TYPE_OPERATOR_BINARY_AND || ntype == NODE_TYPE_OPERATOR_NARY_AND){ - for(std::size_t i = 0; i < node->numMembers(); ++i){ - rv = isGeoFilterExpression(node->getMember(i)); - if(rv) break; - } - } else { - rv = isGeoFilterExpression(node); - } + rv = iterativePreorderWithCondition(node,isGeoFilterExpression); } break; From c84223d59890c05afc57e8988f8e89b4baf60298 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 5 Dec 2016 15:31:50 +0100 Subject: [PATCH 22/53] Add information if AstNode has been found in subexpression this information can be used in rewriting the condition and deciding if a sort node can be deleted or not --- arangod/Aql/OptimizerRules.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index fb60529e87..9593445d9e 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3934,6 +3934,7 @@ struct GeoIndexInfo{ , within(false) , lessgreaterequal(false) , valid(true) + , inSubCondition(false) {} EnumerateCollectionNode* collectionNode; // node that will be replaced by (geo) IndexNode ExecutionNode* executionNode; // start node hat is a sort or filter @@ -3944,6 +3945,7 @@ struct GeoIndexInfo{ bool within; // is this a within lookup bool lessgreaterequal; // is this a check for le/ge (true) or lt/gt (false) bool valid; // contains this node a valid condition + bool inSubCondition; std::vector longitude; // access path to longitude std::vector latitude; // access path to latitude }; @@ -4119,7 +4121,7 @@ AstNode const* isValueOrRefNode(AstNode const* node){ return node; } -GeoIndexInfo isDistanceFunction(AstNode const* node){ +GeoIndexInfo isDistanceFunction(AstNode const* node, bool inSubCondition){ // the expression must exist and it must be a function call auto rv = GeoIndexInfo{}; if(node->type != NODE_TYPE_FCALL) { @@ -4136,22 +4138,24 @@ GeoIndexInfo isDistanceFunction(AstNode const* node){ } //LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND DISTANCE FUNCTION"; rv.node = node; + rv.inSubCondition = inSubCondition; return rv; } -GeoIndexInfo iterativePreorderWithCondition(AstNode const* root, GeoIndexInfo(*condition)(AstNode const*)){ +GeoIndexInfo iterativePreorderWithCondition(AstNode const* root, GeoIndexInfo(*condition)(AstNode const*, bool)){ // returns on first hit if (!root){ return GeoIndexInfo{}; } - + bool inSubCondition = false; std::vector nodestack; nodestack.push_back(root); while(nodestack.size()){ AstNode const* current = nodestack.back(); nodestack.pop_back(); - GeoIndexInfo rv = condition(current); + GeoIndexInfo rv = condition(current,inSubCondition); + inSubCondition = true; // only false for root if (rv) { return rv; } @@ -4165,7 +4169,7 @@ GeoIndexInfo iterativePreorderWithCondition(AstNode const* root, GeoIndexInfo(*c return GeoIndexInfo{}; } -GeoIndexInfo isGeoFilterExpression(AstNode const* node){ +GeoIndexInfo isGeoFilterExpression(AstNode const* node, bool inSubCondition){ // binary compare must be on top bool dist_first = true; bool lessEqual = true; @@ -4214,10 +4218,10 @@ GeoIndexInfo isGeoFilterExpression(AstNode const* node){ }; //LOG_TOPIC(DEBUG, Logger::DEVEL) << "frist check"; - rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(first), isValueOrRefNode(second)); + rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(first, inSubCondition), isValueOrRefNode(second)); if (!rv) { //LOG_TOPIC(DEBUG, Logger::DEVEL) << "second check"; - rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(second), isValueOrRefNode(first)); + rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(second, inSubCondition), isValueOrRefNode(first)); } //LOG_TOPIC(DEBUG, Logger::DEVEL) << "result " << (bool) rv; @@ -4294,7 +4298,7 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec //FIXME -- technical debt -- code duplication / not all cases covered switch(type){ case EN::SORT: { - rv = isDistanceFunction(node); + rv = isDistanceFunction(node,false); } break; From 5aabbb0ac003edd4f2788c6d15db68b4e7e8ede6 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 5 Dec 2016 16:21:21 +0100 Subject: [PATCH 23/53] fix const-ness so the condition can be modiefied --- arangod/Aql/OptimizerRules.cpp | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 9593445d9e..28b557d16b 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4116,12 +4116,12 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ return true; }; -AstNode const* isValueOrRefNode(AstNode const* node){ +AstNode* isValueOrRefNode(AstNode* node){ //TODO - implement me return node; } -GeoIndexInfo isDistanceFunction(AstNode const* node, bool inSubCondition){ +GeoIndexInfo isDistanceFunction(AstNode* node, bool inSubCondition){ // the expression must exist and it must be a function call auto rv = GeoIndexInfo{}; if(node->type != NODE_TYPE_FCALL) { @@ -4142,17 +4142,17 @@ GeoIndexInfo isDistanceFunction(AstNode const* node, bool inSubCondition){ return rv; } -GeoIndexInfo iterativePreorderWithCondition(AstNode const* root, GeoIndexInfo(*condition)(AstNode const*, bool)){ +GeoIndexInfo iterativePreorderWithCondition(AstNode* root, GeoIndexInfo(*condition)(AstNode*, bool)){ // returns on first hit if (!root){ return GeoIndexInfo{}; } bool inSubCondition = false; - std::vector nodestack; + std::vector nodestack; nodestack.push_back(root); while(nodestack.size()){ - AstNode const* current = nodestack.back(); + AstNode* current = nodestack.back(); nodestack.pop_back(); GeoIndexInfo rv = condition(current,inSubCondition); inSubCondition = true; // only false for root @@ -4169,7 +4169,7 @@ GeoIndexInfo iterativePreorderWithCondition(AstNode const* root, GeoIndexInfo(*c return GeoIndexInfo{}; } -GeoIndexInfo isGeoFilterExpression(AstNode const* node, bool inSubCondition){ +GeoIndexInfo isGeoFilterExpression(AstNode* node, bool inSubCondition){ // binary compare must be on top bool dist_first = true; bool lessEqual = true; @@ -4197,12 +4197,13 @@ GeoIndexInfo isGeoFilterExpression(AstNode const* node, bool inSubCondition){ } //LOG_TOPIC(DEBUG, Logger::DEVEL) << "operator has 2 members"; - auto first = node->getMember(0); - auto second = node->getMember(1); + + AstNode* first = node->getMember(0); + AstNode* second = node->getMember(1); //FIXME -- const node node->dump(0); - auto eval_stuff = [](bool dist_first, bool lessEqual, GeoIndexInfo&& dist_fun, AstNode const* value_node){ + auto eval_stuff = [](bool dist_first, bool lessEqual, GeoIndexInfo&& dist_fun, AstNode* value_node){ //LOG_TOPIC(DEBUG, Logger::DEVEL) << "1: " << dist_first; //LOG_TOPIC(DEBUG, Logger::DEVEL) << "2: " << (bool)dist_fun; //LOG_TOPIC(DEBUG, Logger::DEVEL) << "3: " << (bool)value_node; @@ -4217,6 +4218,7 @@ GeoIndexInfo isGeoFilterExpression(AstNode const* node, bool inSubCondition){ return dist_fun; }; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "frist check"; rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(first, inSubCondition), isValueOrRefNode(second)); if (!rv) { @@ -4238,7 +4240,7 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec case EN::SORT: { LOG_TOPIC(DEBUG, Logger::DEVEL) << "found sort node"; auto node = static_cast(n); - auto const& elements = node->getElements(); + auto& elements = node->getElements(); // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion if ( !(elements.size() == 1 && elements[0].second)) { @@ -4247,7 +4249,7 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec } //variable of sort expression - auto const variable = elements[0].first; + auto variable = elements[0].first; TRI_ASSERT(variable != nullptr); //// find the expression that is bound to the variable @@ -4283,14 +4285,14 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec // downcast to calculation node and get expression auto cn = static_cast(setter); - auto const expression = cn->expression(); + auto expression = cn->expression(); // the expression must exist and it must have an astNode if (expression == nullptr || expression->node() == nullptr){ // not the right type of node return rv; } - AstNode const* node = expression->node(); + AstNode* node = expression->nodeForModification(); LOG_TOPIC(DEBUG, Logger::DEVEL) << "checking expression of calcaulation"; @@ -4322,7 +4324,7 @@ void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* p SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, type, true); - for (auto const& n : nodes) { + for (auto& n : nodes) { auto geoIndexInfo = identifyGeoOptimizationCandidate(type, plan, n); if(!geoIndexInfo){ continue; From 6b93b9d2eb4ac874c8583620fdf699cb93d31643 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 5 Dec 2016 19:38:01 +0100 Subject: [PATCH 24/53] bring functions in correct order --- arangod/Aql/OptimizerRules.cpp | 512 +++++++++++++++++---------------- 1 file changed, 261 insertions(+), 251 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 28b557d16b..9fab7b6f35 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3919,8 +3919,9 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, } - +/////////////////////////////////////////////////////////////////////////////// // GEO RULES ////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// struct GeoIndexInfo{ operator bool() const { return node && valid; } void invalidate() { valid = false; } @@ -3950,41 +3951,231 @@ struct GeoIndexInfo{ std::vector latitude; // access path to latitude }; -std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info, - AstNode* lat, AstNode* lon, bool lessEqual = false, AstNode const* withRange = nullptr){ - auto ast = plan->getAst(); - auto varAstNode = ast->createNodeReference(info.collectionNode->outVariable()); +////////////////////////////////////////////////////////////////////// +//candidate checking - auto nAryAnd = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_AND); - nAryAnd->reserve(withRange ? 4 : 2); - - auto latKey = ast->createNodeAttributeAccess(varAstNode, "latitude",8); - auto latEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, latKey, lat); - nAryAnd->addMember(latEq); - - auto lonKey = ast->createNodeAttributeAccess(varAstNode, "longitude",9); - auto lonEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, lonKey, lon); - nAryAnd->addMember(lonEq); - - if(withRange){ - auto withKey = ast->createNodeAttributeAccess(varAstNode, "within",6); - auto withEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, withKey, withRange); - nAryAnd->addMember(withEq); - - auto lessKey = ast->createNodeAttributeAccess(varAstNode, "lesseq",6); - auto lessValue = ast->createNodeValueBool(lessEqual); - auto lessEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, lessKey, lessValue); - nAryAnd->addMember(lessEq); - } - - auto unAryOr = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_OR, nAryAnd); - - auto condition = std::make_unique(ast); - condition->andCombine(unAryOr); - condition->normalize(plan); - return condition; +AstNode* isValueOrRefNode(AstNode* node){ + //TODO - implement me + return node; } +GeoIndexInfo isDistanceFunction(AstNode* node, bool inSubCondition){ + // the expression must exist and it must be a function call + auto rv = GeoIndexInfo{}; + if(node->type != NODE_TYPE_FCALL) { + return rv; + } + + //get the ast node of the expression + auto func = static_cast(node->getData()); + + // we're looking for "DISTANCE()", which is a function call + // with an empty parameters array + if ( func->externalName != "DISTANCE" || node->numMembers() != 1 ) { + return rv; + } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND DISTANCE FUNCTION"; + rv.node = node; + rv.inSubCondition = inSubCondition; + return rv; +} + +GeoIndexInfo isGeoFilterExpression(AstNode* node, bool inSubCondition){ + // binary compare must be on top + bool dist_first = true; + bool lessEqual = true; + auto rv = GeoIndexInfo{}; + if( node->type != NODE_TYPE_OPERATOR_BINARY_GE + && node->type != NODE_TYPE_OPERATOR_BINARY_GT + && node->type != NODE_TYPE_OPERATOR_BINARY_LE + && node->type != NODE_TYPE_OPERATOR_BINARY_LT) { + + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "expression does not contain <,<=,>=,>"; + return rv; + } else { + if (node->type == NODE_TYPE_OPERATOR_BINARY_GE || node->type == NODE_TYPE_OPERATOR_BINARY_GT){ + dist_first = false; + } + } + if (node->type == NODE_TYPE_OPERATOR_BINARY_GT || node->type == NODE_TYPE_OPERATOR_BINARY_LT){ + lessEqual = false; + } + + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "binary operator found"; + // binary expression has 2 members + if(node->numMembers() != 2){ + return rv; + } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "operator has 2 members"; + + + AstNode* first = node->getMember(0); + AstNode* second = node->getMember(1); //FIXME -- const node + + node->dump(0); + + auto eval_stuff = [](bool dist_first, bool lessEqual, GeoIndexInfo&& dist_fun, AstNode* value_node){ + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "1: " << dist_first; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "2: " << (bool)dist_fun; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "3: " << (bool)value_node; + if (dist_first && dist_fun && value_node){ + dist_fun.within = true; + dist_fun.range = value_node; //FIXME + dist_fun.lessgreaterequal = lessEqual; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND WITHIN"; + } else { + dist_fun.invalidate(); + } + return dist_fun; + }; + + + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "frist check"; + rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(first, inSubCondition), isValueOrRefNode(second)); + if (!rv) { + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "second check"; + rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(second, inSubCondition), isValueOrRefNode(first)); + } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "result " << (bool) rv; + + return rv; +} + +GeoIndexInfo iterativePreorderWithCondition(AstNode* root, GeoIndexInfo(*condition)(AstNode*, bool)){ + // returns on first hit + if (!root){ + return GeoIndexInfo{}; + } + bool inSubCondition = false; + std::vector nodestack; + nodestack.push_back(root); + + while(nodestack.size()){ + AstNode* current = nodestack.back(); + nodestack.pop_back(); + GeoIndexInfo rv = condition(current,inSubCondition); + inSubCondition = true; // only false for root + if (rv) { + return rv; + } + + if (current->type == NODE_TYPE_OPERATOR_BINARY_AND || current->type == NODE_TYPE_OPERATOR_NARY_AND ){ + for (std::size_t i = 0; i < current->numMembers(); ++i){ + nodestack.push_back(current->getMember(i)); + } + } + } + return GeoIndexInfo{}; +} + +//checks a single sort or filter node +GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, ExecutionPlan* plan, ExecutionNode* n){ + ExecutionNode* setter = nullptr; + auto rv = GeoIndexInfo{}; + //TODO - iterate over elements of conjunction / disjunction + LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER IDENTIFY"; + switch(type){ + case EN::SORT: { + LOG_TOPIC(DEBUG, Logger::DEVEL) << "found sort node"; + auto node = static_cast(n); + auto& elements = node->getElements(); + + // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion + if ( !(elements.size() == 1 && elements[0].second)) { + //test on second makes sure the SORT is ascending + return rv; + } + + //variable of sort expression + auto variable = elements[0].first; + TRI_ASSERT(variable != nullptr); + + //// find the expression that is bound to the variable + // get the expression node that holds the calculation + setter = plan->getVarSetBy(variable->id); + } + break; + + case EN::FILTER: { + LOG_TOPIC(DEBUG, Logger::DEVEL) << "found filter node"; + auto node = static_cast(n); + + // filter nodes always have one input variable + auto varsUsedHere = node->getVariablesUsedHere(); + TRI_ASSERT(varsUsedHere.size() == 1); + + // now check who introduced our variable + auto variable = varsUsedHere[0]; + setter = plan->getVarSetBy(variable->id); + } + break; + + default: + return rv; + } + + // common part - extract astNode from setter witch is a calculation node + if (setter == nullptr || setter->getType() != EN::CALCULATION) { + return rv; + } + LOG_TOPIC(DEBUG, Logger::DEVEL) << "found setter node for calcuation"; + + // downcast to calculation node and get expression + auto cn = static_cast(setter); + auto expression = cn->expression(); + + // the expression must exist and it must have an astNode + if (expression == nullptr || expression->node() == nullptr){ + // not the right type of node + return rv; + } + AstNode* node = expression->nodeForModification(); + + + LOG_TOPIC(DEBUG, Logger::DEVEL) << "checking expression of calcaulation"; + + //FIXME -- technical debt -- code duplication / not all cases covered + switch(type){ + case EN::SORT: { + rv = isDistanceFunction(node,false); + } + break; + + case EN::FILTER: { + rv = iterativePreorderWithCondition(node,isGeoFilterExpression); + } + break; + + default: + LOG_TOPIC(DEBUG, Logger::DEVEL) << "expression is not valid for geoindex"; + rv.invalidate(); // not required but make sure the result is invalid + } + + rv.executionNode = n; + rv.executionNodeType = type; + + return rv; +}; + +//checks sort and filter nodes for conditions +void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* plan, std::vector& infos){ + SmallVector::allocator_type::arena_type a; + SmallVector nodes{a}; + plan->findNodesOfType(nodes, type, true); + for (auto& n : nodes) { + auto geoIndexInfo = identifyGeoOptimizationCandidate(type, plan, n); + if(!geoIndexInfo){ + continue; + } + infos.push_back(std::move(geoIndexInfo)); + //LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND NEAR OR WITHIN"; + } +} + +////////////////////////////////////////////////////////////////////// +//modify plan + +// should go to candidate checking GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionPlan* plan, GeoIndexInfo info){ using SV = std::vector; LOG_TOPIC(DEBUG, Logger::DEVEL) << " enter argument check"; @@ -4051,7 +4242,44 @@ GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pai return info; } +// builds a condition that can be used with the index interface and +// contains all parameters required by the GeoIndex +std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info, + AstNode* lat, AstNode* lon, bool lessEqual = false, AstNode const* withRange = nullptr){ + auto ast = plan->getAst(); + auto varAstNode = ast->createNodeReference(info.collectionNode->outVariable()); + auto nAryAnd = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_AND); + nAryAnd->reserve(withRange ? 4 : 2); + + auto latKey = ast->createNodeAttributeAccess(varAstNode, "latitude",8); + auto latEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, latKey, lat); + nAryAnd->addMember(latEq); + + auto lonKey = ast->createNodeAttributeAccess(varAstNode, "longitude",9); + auto lonEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, lonKey, lon); + nAryAnd->addMember(lonEq); + + if(withRange){ + auto withKey = ast->createNodeAttributeAccess(varAstNode, "within",6); + auto withEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, withKey, withRange); + nAryAnd->addMember(withEq); + + auto lessKey = ast->createNodeAttributeAccess(varAstNode, "lesseq",6); + auto lessValue = ast->createNodeValueBool(lessEqual); + auto lessEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, lessKey, lessValue); + nAryAnd->addMember(lessEq); + } + + auto unAryOr = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_OR, nAryAnd); + + auto condition = std::make_unique(ast); + condition->andCombine(unAryOr); + condition->normalize(plan); + return condition; +} + +// applys the optimization for a candidate bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ // FIXME -- technical debt -- this code should go to the candidate finding ///////////////////// @@ -4116,224 +4344,6 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ return true; }; -AstNode* isValueOrRefNode(AstNode* node){ - //TODO - implement me - return node; -} - -GeoIndexInfo isDistanceFunction(AstNode* node, bool inSubCondition){ - // the expression must exist and it must be a function call - auto rv = GeoIndexInfo{}; - if(node->type != NODE_TYPE_FCALL) { - return rv; - } - - //get the ast node of the expression - auto func = static_cast(node->getData()); - - // we're looking for "DISTANCE()", which is a function call - // with an empty parameters array - if ( func->externalName != "DISTANCE" || node->numMembers() != 1 ) { - return rv; - } - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND DISTANCE FUNCTION"; - rv.node = node; - rv.inSubCondition = inSubCondition; - return rv; -} - -GeoIndexInfo iterativePreorderWithCondition(AstNode* root, GeoIndexInfo(*condition)(AstNode*, bool)){ - // returns on first hit - if (!root){ - return GeoIndexInfo{}; - } - bool inSubCondition = false; - std::vector nodestack; - nodestack.push_back(root); - - while(nodestack.size()){ - AstNode* current = nodestack.back(); - nodestack.pop_back(); - GeoIndexInfo rv = condition(current,inSubCondition); - inSubCondition = true; // only false for root - if (rv) { - return rv; - } - - if (current->type == NODE_TYPE_OPERATOR_BINARY_AND || current->type == NODE_TYPE_OPERATOR_NARY_AND ){ - for (std::size_t i = 0; i < current->numMembers(); ++i){ - nodestack.push_back(current->getMember(i)); - } - } - } - return GeoIndexInfo{}; -} - -GeoIndexInfo isGeoFilterExpression(AstNode* node, bool inSubCondition){ - // binary compare must be on top - bool dist_first = true; - bool lessEqual = true; - auto rv = GeoIndexInfo{}; - if( node->type != NODE_TYPE_OPERATOR_BINARY_GE - && node->type != NODE_TYPE_OPERATOR_BINARY_GT - && node->type != NODE_TYPE_OPERATOR_BINARY_LE - && node->type != NODE_TYPE_OPERATOR_BINARY_LT) { - - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "expression does not contain <,<=,>=,>"; - return rv; - } else { - if (node->type == NODE_TYPE_OPERATOR_BINARY_GE || node->type == NODE_TYPE_OPERATOR_BINARY_GT){ - dist_first = false; - } - } - if (node->type == NODE_TYPE_OPERATOR_BINARY_GT || node->type == NODE_TYPE_OPERATOR_BINARY_LT){ - lessEqual = false; - } - - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "binary operator found"; - // binary expression has 2 members - if(node->numMembers() != 2){ - return rv; - } - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "operator has 2 members"; - - - AstNode* first = node->getMember(0); - AstNode* second = node->getMember(1); //FIXME -- const node - - node->dump(0); - - auto eval_stuff = [](bool dist_first, bool lessEqual, GeoIndexInfo&& dist_fun, AstNode* value_node){ - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "1: " << dist_first; - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "2: " << (bool)dist_fun; - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "3: " << (bool)value_node; - if (dist_first && dist_fun && value_node){ - dist_fun.within = true; - dist_fun.range = value_node; //FIXME - dist_fun.lessgreaterequal = lessEqual; - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND WITHIN"; - } else { - dist_fun.invalidate(); - } - return dist_fun; - }; - - - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "frist check"; - rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(first, inSubCondition), isValueOrRefNode(second)); - if (!rv) { - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "second check"; - rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(second, inSubCondition), isValueOrRefNode(first)); - } - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "result " << (bool) rv; - - return rv; -} - - -GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, ExecutionPlan* plan, ExecutionNode* n){ - ExecutionNode* setter = nullptr; - auto rv = GeoIndexInfo{}; - //TODO - iterate over elements of conjunction / disjunction - LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER IDENTIFY"; - switch(type){ - case EN::SORT: { - LOG_TOPIC(DEBUG, Logger::DEVEL) << "found sort node"; - auto node = static_cast(n); - auto& elements = node->getElements(); - - // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion - if ( !(elements.size() == 1 && elements[0].second)) { - //test on second makes sure the SORT is ascending - return rv; - } - - //variable of sort expression - auto variable = elements[0].first; - TRI_ASSERT(variable != nullptr); - - //// find the expression that is bound to the variable - // get the expression node that holds the calculation - setter = plan->getVarSetBy(variable->id); - } - break; - - case EN::FILTER: { - LOG_TOPIC(DEBUG, Logger::DEVEL) << "found filter node"; - auto node = static_cast(n); - - // filter nodes always have one input variable - auto varsUsedHere = node->getVariablesUsedHere(); - TRI_ASSERT(varsUsedHere.size() == 1); - - // now check who introduced our variable - auto variable = varsUsedHere[0]; - setter = plan->getVarSetBy(variable->id); - } - break; - - default: - return rv; - } - - - // common part - extract astNode from setter witch is a calculation node - if (setter == nullptr || setter->getType() != EN::CALCULATION) { - return rv; - } - LOG_TOPIC(DEBUG, Logger::DEVEL) << "found setter node for calcuation"; - - // downcast to calculation node and get expression - auto cn = static_cast(setter); - auto expression = cn->expression(); - - // the expression must exist and it must have an astNode - if (expression == nullptr || expression->node() == nullptr){ - // not the right type of node - return rv; - } - AstNode* node = expression->nodeForModification(); - - - LOG_TOPIC(DEBUG, Logger::DEVEL) << "checking expression of calcaulation"; - - //FIXME -- technical debt -- code duplication / not all cases covered - switch(type){ - case EN::SORT: { - rv = isDistanceFunction(node,false); - } - break; - - case EN::FILTER: { - rv = iterativePreorderWithCondition(node,isGeoFilterExpression); - } - break; - - default: - LOG_TOPIC(DEBUG, Logger::DEVEL) << "expression is not valid for geoindex"; - rv.invalidate(); // not required but make sure the result is invalid - } - - rv.executionNode = n; - rv.executionNodeType = type; - - return rv; -}; - -void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* plan, std::vector& infos){ - SmallVector::allocator_type::arena_type a; - SmallVector nodes{a}; - plan->findNodesOfType(nodes, type, true); - for (auto& n : nodes) { - auto geoIndexInfo = identifyGeoOptimizationCandidate(type, plan, n); - if(!geoIndexInfo){ - continue; - } - infos.push_back(std::move(geoIndexInfo)); - //LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND NEAR OR WITHIN"; - } -} - void arangodb::aql::geoIndexRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule) { From 275e36b60349e60112ffa188dd0fc8f9d076b179 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 5 Dec 2016 19:49:10 +0100 Subject: [PATCH 25/53] add pointer that needs to be replaced with true in sort or filter condition --- arangod/Aql/OptimizerRules.cpp | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 9fab7b6f35..dc65dc837e 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3923,12 +3923,13 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, // GEO RULES ////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// struct GeoIndexInfo{ - operator bool() const { return node && valid; } + operator bool() const { return distanceNode && valid; } void invalidate() { valid = false; } GeoIndexInfo() : collectionNode(nullptr) , executionNode(nullptr) - , node(nullptr) + , expressionNode(nullptr) + , distanceNode(nullptr) , index(nullptr) , range(nullptr) , executionNodeType(EN::ILLEGAL) @@ -3939,7 +3940,8 @@ struct GeoIndexInfo{ {} EnumerateCollectionNode* collectionNode; // node that will be replaced by (geo) IndexNode ExecutionNode* executionNode; // start node hat is a sort or filter - AstNode const* node; // AstNode that contains the sort/filter condition + AstNode const* expressionNode; // AstNode that contains the sort/filter condition + AstNode const* distanceNode; // AstNode that contains the distance parameters std::shared_ptr index; //pointer to geoindex AstNode const* range; // range for within ExecutionNode::NodeType executionNodeType; // type of execution node sort or filter @@ -3959,23 +3961,24 @@ AstNode* isValueOrRefNode(AstNode* node){ return node; } -GeoIndexInfo isDistanceFunction(AstNode* node, bool inSubCondition){ +GeoIndexInfo isDistanceFunction(AstNode* distanceNode, bool inSubCondition){ // the expression must exist and it must be a function call auto rv = GeoIndexInfo{}; - if(node->type != NODE_TYPE_FCALL) { + if(distanceNode->type != NODE_TYPE_FCALL) { return rv; } //get the ast node of the expression - auto func = static_cast(node->getData()); + auto func = static_cast(distanceNode->getData()); // we're looking for "DISTANCE()", which is a function call // with an empty parameters array - if ( func->externalName != "DISTANCE" || node->numMembers() != 1 ) { + if ( func->externalName != "DISTANCE" || distanceNode->numMembers() != 1 ) { return rv; } //LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND DISTANCE FUNCTION"; - rv.node = node; + rv.distanceNode = distanceNode; + rv.expressionNode = distanceNode; rv.inSubCondition = inSubCondition; return rv; } @@ -4012,8 +4015,6 @@ GeoIndexInfo isGeoFilterExpression(AstNode* node, bool inSubCondition){ AstNode* first = node->getMember(0); AstNode* second = node->getMember(1); //FIXME -- const node - node->dump(0); - auto eval_stuff = [](bool dist_first, bool lessEqual, GeoIndexInfo&& dist_fun, AstNode* value_node){ //LOG_TOPIC(DEBUG, Logger::DEVEL) << "1: " << dist_first; //LOG_TOPIC(DEBUG, Logger::DEVEL) << "2: " << (bool)dist_fun; @@ -4038,6 +4039,11 @@ GeoIndexInfo isGeoFilterExpression(AstNode* node, bool inSubCondition){ } //LOG_TOPIC(DEBUG, Logger::DEVEL) << "result " << (bool) rv; + if(rv){ + //this must be set after checking if the node contains a distance node. + rv.expressionNode = node; + } + return rv; } @@ -4283,7 +4289,7 @@ std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ // FIXME -- technical debt -- this code should go to the candidate finding ///////////////////// - auto const& functionArguments = info.node->getMember(0); + auto const& functionArguments = info.distanceNode->getMember(0); if(functionArguments->numMembers() < 4){ return false; } From d1a3e4482d39d0b69446ff4c64e2b72aa55caa73 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 5 Dec 2016 20:18:31 +0100 Subject: [PATCH 26/53] condition replace should be almost working --- arangod/Aql/OptimizerRules.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index dc65dc837e..aaa0ceab67 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3940,8 +3940,8 @@ struct GeoIndexInfo{ {} EnumerateCollectionNode* collectionNode; // node that will be replaced by (geo) IndexNode ExecutionNode* executionNode; // start node hat is a sort or filter - AstNode const* expressionNode; // AstNode that contains the sort/filter condition - AstNode const* distanceNode; // AstNode that contains the distance parameters + AstNode* expressionNode; // AstNode that contains the sort/filter condition + AstNode* distanceNode; // AstNode that contains the distance parameters std::shared_ptr index; //pointer to geoindex AstNode const* range; // range for within ExecutionNode::NodeType executionNodeType; // type of execution node sort or filter @@ -4285,6 +4285,18 @@ std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& return condition; } +//replaces the geoCondition with true. +//void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ +void replaceGeoCondition(GeoIndexInfo& info){ + //auto ast = plan->getAst(); + //ast->createNodeValueBool(true); + if( info.inSubCondition ) { + info.expressionNode->clearMembers(); + info.expressionNode->setValueType(VALUE_TYPE_BOOL); + info.expressionNode->setBoolValue(true); + } +} + // applys the optimization for a candidate bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ @@ -4341,6 +4353,8 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ plan->registerNode(inode); condition.release(); + //replaceGeoCondition(info); + if(info.executionNodeType == EN::SORT){ plan->unlinkNode(info.executionNode); } From 71be8c215a091bd6abcabd3ba8fafba751ce3b7c Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Tue, 6 Dec 2016 10:00:11 +0100 Subject: [PATCH 27/53] node replacement is now working --- arangod/Aql/OptimizerRules.cpp | 65 +++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index aaa0ceab67..add4571ada 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3928,6 +3928,7 @@ struct GeoIndexInfo{ GeoIndexInfo() : collectionNode(nullptr) , executionNode(nullptr) + , expressionParent(nullptr) , expressionNode(nullptr) , distanceNode(nullptr) , index(nullptr) @@ -3936,10 +3937,10 @@ struct GeoIndexInfo{ , within(false) , lessgreaterequal(false) , valid(true) - , inSubCondition(false) {} EnumerateCollectionNode* collectionNode; // node that will be replaced by (geo) IndexNode ExecutionNode* executionNode; // start node hat is a sort or filter + AstNode* expressionParent; // AstNode that is the parent of the Node AstNode* expressionNode; // AstNode that contains the sort/filter condition AstNode* distanceNode; // AstNode that contains the distance parameters std::shared_ptr index; //pointer to geoindex @@ -3948,7 +3949,6 @@ struct GeoIndexInfo{ bool within; // is this a within lookup bool lessgreaterequal; // is this a check for le/ge (true) or lt/gt (false) bool valid; // contains this node a valid condition - bool inSubCondition; std::vector longitude; // access path to longitude std::vector latitude; // access path to latitude }; @@ -3961,7 +3961,7 @@ AstNode* isValueOrRefNode(AstNode* node){ return node; } -GeoIndexInfo isDistanceFunction(AstNode* distanceNode, bool inSubCondition){ +GeoIndexInfo isDistanceFunction(AstNode* distanceNode, AstNode* expressionParent){ // the expression must exist and it must be a function call auto rv = GeoIndexInfo{}; if(distanceNode->type != NODE_TYPE_FCALL) { @@ -3979,11 +3979,11 @@ GeoIndexInfo isDistanceFunction(AstNode* distanceNode, bool inSubCondition){ //LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND DISTANCE FUNCTION"; rv.distanceNode = distanceNode; rv.expressionNode = distanceNode; - rv.inSubCondition = inSubCondition; + rv.expressionParent = expressionParent; return rv; } -GeoIndexInfo isGeoFilterExpression(AstNode* node, bool inSubCondition){ +GeoIndexInfo isGeoFilterExpression(AstNode* node, AstNode* expressionParent){ // binary compare must be on top bool dist_first = true; bool lessEqual = true; @@ -4032,10 +4032,10 @@ GeoIndexInfo isGeoFilterExpression(AstNode* node, bool inSubCondition){ //LOG_TOPIC(DEBUG, Logger::DEVEL) << "frist check"; - rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(first, inSubCondition), isValueOrRefNode(second)); + rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(first, expressionParent), isValueOrRefNode(second)); if (!rv) { //LOG_TOPIC(DEBUG, Logger::DEVEL) << "second check"; - rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(second, inSubCondition), isValueOrRefNode(first)); + rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(second, expressionParent), isValueOrRefNode(first)); } //LOG_TOPIC(DEBUG, Logger::DEVEL) << "result " << (bool) rv; @@ -4047,27 +4047,25 @@ GeoIndexInfo isGeoFilterExpression(AstNode* node, bool inSubCondition){ return rv; } -GeoIndexInfo iterativePreorderWithCondition(AstNode* root, GeoIndexInfo(*condition)(AstNode*, bool)){ +GeoIndexInfo iterativePreorderWithCondition(AstNode* root, GeoIndexInfo(*condition)(AstNode*, AstNode*)){ // returns on first hit if (!root){ return GeoIndexInfo{}; } - bool inSubCondition = false; - std::vector nodestack; - nodestack.push_back(root); + std::vector> nodestack; + nodestack.push_back({root,nullptr}); while(nodestack.size()){ - AstNode* current = nodestack.back(); + auto current = nodestack.back(); nodestack.pop_back(); - GeoIndexInfo rv = condition(current,inSubCondition); - inSubCondition = true; // only false for root + GeoIndexInfo rv = condition(current.first,current.second); if (rv) { return rv; } - if (current->type == NODE_TYPE_OPERATOR_BINARY_AND || current->type == NODE_TYPE_OPERATOR_NARY_AND ){ - for (std::size_t i = 0; i < current->numMembers(); ++i){ - nodestack.push_back(current->getMember(i)); + if (current.first->type == NODE_TYPE_OPERATOR_BINARY_AND || current.first->type == NODE_TYPE_OPERATOR_NARY_AND ){ + for (std::size_t i = 0; i < current.first->numMembers(); ++i){ + nodestack.push_back({current.first->getMember(i),current.first}); } } } @@ -4143,12 +4141,12 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec //FIXME -- technical debt -- code duplication / not all cases covered switch(type){ case EN::SORT: { - rv = isDistanceFunction(node,false); + rv = isDistanceFunction(node,nullptr); } break; case EN::FILTER: { - rv = iterativePreorderWithCondition(node,isGeoFilterExpression); + rv = iterativePreorderWithCondition(node,&isGeoFilterExpression); } break; @@ -4287,14 +4285,21 @@ std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& //replaces the geoCondition with true. //void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ -void replaceGeoCondition(GeoIndexInfo& info){ - //auto ast = plan->getAst(); - //ast->createNodeValueBool(true); - if( info.inSubCondition ) { - info.expressionNode->clearMembers(); - info.expressionNode->setValueType(VALUE_TYPE_BOOL); - info.expressionNode->setBoolValue(true); - } +void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ + + if( info.expressionParent ) { + auto ast = plan->getAst(); + auto replacement = ast->createNodeValueBool(true); + info.expressionParent->dump(0); + for(std::size_t i = 0; i < info.expressionParent->numMembers(); ++i){ + if(info.expressionParent->getMember(i) == info.expressionNode){ + info.expressionParent->removeMemberUnchecked(i); + info.expressionParent->addMember(replacement); + } + } + info.expressionParent->dump(0); + } + } // applys the optimization for a candidate @@ -4353,9 +4358,11 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ plan->registerNode(inode); condition.release(); - //replaceGeoCondition(info); + replaceGeoCondition(plan, info); - if(info.executionNodeType == EN::SORT){ + // if executionNode is sort OR a filter without further sub conditions + // the node can be unlinked + if( info.executionNodeType == EN::SORT || !info.expressionParent){ plan->unlinkNode(info.executionNode); } plan->replaceNode(res.collectionNode,inode); From 2bae135b84e4843d4dc0e576ab427d8d3eb41121 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Tue, 6 Dec 2016 10:12:12 +0100 Subject: [PATCH 28/53] add test case --- js/server/tests/aql/aql-optimizer-geoindex.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/js/server/tests/aql/aql-optimizer-geoindex.js b/js/server/tests/aql/aql-optimizer-geoindex.js index c38fe4435e..ef837b9fd6 100644 --- a/js/server/tests/aql/aql-optimizer-geoindex.js +++ b/js/server/tests/aql/aql-optimizer-geoindex.js @@ -148,7 +148,8 @@ function optimizerRuleTestSuite() { //query clust sort filter [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 1 RETURN d", false, false, false ], [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 1 RETURN d", false, false, false ], - [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d", false, false, true ], + [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d", false, false, false ], + [ "FOR i in 1..2 FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 && i > 1 LIMIT 1 RETURN d", false, false, true ], ]; queries.forEach(function(query) { From a941808ea8f902be2a5ea9e9b9637681ee1bbf79 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Tue, 6 Dec 2016 10:29:38 +0100 Subject: [PATCH 29/53] remove debug code for performance testing --- arangod/Aql/OptimizerRules.cpp | 52 ++++++++++++++++------------------ arangod/Indexes/GeoIndex.cpp | 7 ----- 2 files changed, 24 insertions(+), 35 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index add4571ada..05f0cacfa0 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4076,11 +4076,9 @@ GeoIndexInfo iterativePreorderWithCondition(AstNode* root, GeoIndexInfo(*conditi GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, ExecutionPlan* plan, ExecutionNode* n){ ExecutionNode* setter = nullptr; auto rv = GeoIndexInfo{}; - //TODO - iterate over elements of conjunction / disjunction - LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER IDENTIFY"; switch(type){ case EN::SORT: { - LOG_TOPIC(DEBUG, Logger::DEVEL) << "found sort node"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "found sort node"; auto node = static_cast(n); auto& elements = node->getElements(); @@ -4101,7 +4099,7 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec break; case EN::FILTER: { - LOG_TOPIC(DEBUG, Logger::DEVEL) << "found filter node"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "found filter node"; auto node = static_cast(n); // filter nodes always have one input variable @@ -4122,7 +4120,7 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec if (setter == nullptr || setter->getType() != EN::CALCULATION) { return rv; } - LOG_TOPIC(DEBUG, Logger::DEVEL) << "found setter node for calcuation"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "found setter node for calcuation"; // downcast to calculation node and get expression auto cn = static_cast(setter); @@ -4136,7 +4134,7 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec AstNode* node = expression->nodeForModification(); - LOG_TOPIC(DEBUG, Logger::DEVEL) << "checking expression of calcaulation"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "checking expression of calcaulation"; //FIXME -- technical debt -- code duplication / not all cases covered switch(type){ @@ -4182,11 +4180,11 @@ void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* p // should go to candidate checking GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionPlan* plan, GeoIndexInfo info){ using SV = std::vector; - LOG_TOPIC(DEBUG, Logger::DEVEL) << " enter argument check"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << " enter argument check"; // first and second should be based on the same document - need to provide the document // in order to see which collection is bound to it and if that collections supports geo-index if( !pair.first->isAttributeAccessForVariable() || !pair.second->isAttributeAccessForVariable()){ - LOG_TOPIC(DEBUG, Logger::DEVEL) << " not both args are of type attribute access"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << " not both args are of type attribute access"; info.invalidate(); return info; } @@ -4198,14 +4196,14 @@ GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pai SV accessPath1{pair.first->getString()}; SV accessPath2{pair.second->getString()}; - LOG_TOPIC(DEBUG, Logger::DEVEL) << " got setter"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << " got setter"; if(setter1 == setter2){ if(setter1->getType() == EN::ENUMERATE_COLLECTION){ auto collNode = reinterpret_cast(setter1); auto coll = collNode->collection(); //what kind of indexes does it have on what attributes auto lcoll = coll->getCollection(); // TODO - check collection for suitable geo-indexes - LOG_TOPIC(DEBUG, Logger::DEVEL) << " SETTER IS ENUMERATE_COLLECTION: " << coll->getName(); + //LOG_TOPIC(DEBUG, Logger::DEVEL) << " SETTER IS ENUMERATE_COLLECTION: " << coll->getName(); for(auto indexShardPtr : lcoll->getIndexes()){ // get real index arangodb::Index& index = *indexShardPtr.get(); @@ -4216,19 +4214,19 @@ GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pai continue; } - ///////////////////////////////////////////////// - //FIXME - REMOVE DEBUG CODE LATER - auto vecs = std::vector>{index.fieldNames(), std::vector{accessPath1, accessPath2}}; - for(auto vec : vecs ){ - for(auto path : vec){ - std::cout << "AccessPath VECTOR: "; - for(auto word : path){ - std::cout << word << " "; - } - std::cout << std::endl; - } - } - ///////////////////////////////////////////////// + // ///////////////////////////////////////////////// + // //FIXME - REMOVE DEBUG CODE LATER + // auto vecs = std::vector>{index.fieldNames(), std::vector{accessPath1, accessPath2}}; + // for(auto vec : vecs ){ + // for(auto path : vec){ + // std::cout << "AccessPath VECTOR: "; + // for(auto word : path){ + // std::cout << word << " "; + // } + // std::cout << std::endl; + // } + // } + // ///////////////////////////////////////////////// //check access paths of attributes in ast and those in index match if( index.fieldNames()[0] == accessPath1 && index.fieldNames()[1] == accessPath2 ){ @@ -4290,14 +4288,12 @@ void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ if( info.expressionParent ) { auto ast = plan->getAst(); auto replacement = ast->createNodeValueBool(true); - info.expressionParent->dump(0); for(std::size_t i = 0; i < info.expressionParent->numMembers(); ++i){ if(info.expressionParent->getMember(i) == info.expressionNode){ info.expressionParent->removeMemberUnchecked(i); info.expressionParent->addMember(replacement); } } - info.expressionParent->dump(0); } } @@ -4375,7 +4371,7 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule) { - LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER GEO RULE"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER GEO RULE"; std::vector infos; checkNodesForGeoOptimization(EN::SORT, plan, infos); @@ -4390,6 +4386,6 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, } opt->addPlan(plan, rule, modified); - LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; - LOG_TOPIC(DEBUG, Logger::DEVEL) << ""; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << ""; } diff --git a/arangod/Indexes/GeoIndex.cpp b/arangod/Indexes/GeoIndex.cpp index c60362bc72..0d605f9a02 100644 --- a/arangod/Indexes/GeoIndex.cpp +++ b/arangod/Indexes/GeoIndex.cpp @@ -57,27 +57,20 @@ GeoIndexIterator::GeoIndexIterator(LogicalCollection* collection, } void GeoIndexIterator::evaluateCondition() { - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER evaluate Condition"; - if (_condition) { auto numMembers = _condition->numMembers(); if(numMembers >= 2){ _lat = _condition->getMember(0)->getMember(1)->getDoubleValue(); - LOG_TOPIC(DEBUG, Logger::DEVEL) << "lat: " << _lat; _lon = _condition->getMember(1)->getMember(1)->getDoubleValue(); - LOG_TOPIC(DEBUG, Logger::DEVEL) << "lon: " << _lon; } if (numMembers == 2){ //near _near = true; - LOG_TOPIC(DEBUG, Logger::DEVEL) << "INDEX CONFIGURED FOR NEAR"; } else { //within _near = false; _withinRange = _condition->getMember(2)->getMember(1)->getDoubleValue(); _withinLessEq = _condition->getMember(3)->getMember(1)->getDoubleValue(); - - LOG_TOPIC(DEBUG, Logger::DEVEL) << "INDEX CONFIGURED FOR WITHIN with range " << _withinRange; } } else { From 3e24624c6e4576544eb5f9814f4cb508fe635059 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Tue, 6 Dec 2016 15:56:22 +0100 Subject: [PATCH 30/53] back to performance --- arangod/Aql/OptimizerRules.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 05f0cacfa0..368257fd12 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4149,7 +4149,7 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec break; default: - LOG_TOPIC(DEBUG, Logger::DEVEL) << "expression is not valid for geoindex"; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "expression is not valid for geoindex"; rv.invalidate(); // not required but make sure the result is invalid } @@ -4169,6 +4169,7 @@ void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* p if(!geoIndexInfo){ continue; } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ADDING Candidate"; infos.push_back(std::move(geoIndexInfo)); //LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND NEAR OR WITHIN"; } @@ -4204,6 +4205,7 @@ GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pai auto lcoll = coll->getCollection(); // TODO - check collection for suitable geo-indexes //LOG_TOPIC(DEBUG, Logger::DEVEL) << " SETTER IS ENUMERATE_COLLECTION: " << coll->getName(); + //LOG_TOPIC(DEBUG, Logger::DEVEL) << " COLLECTION - number of indexes: " << lcoll->getIndexes().size(); for(auto indexShardPtr : lcoll->getIndexes()){ // get real index arangodb::Index& index = *indexShardPtr.get(); @@ -4211,9 +4213,12 @@ GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pai // check if current index is a geo-index if( index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO1_INDEX && index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO2_INDEX){ + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "Index type not of Geo: " << (int) index.type(); continue; } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "Index is a GeoIndex" << coll->getName(); + // ///////////////////////////////////////////////// // //FIXME - REMOVE DEBUG CODE LATER // auto vecs = std::vector>{index.fieldNames(), std::vector{accessPath1, accessPath2}}; @@ -4300,12 +4305,14 @@ void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ // applys the optimization for a candidate bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER applyGeoOptimization"; // FIXME -- technical debt -- this code should go to the candidate finding ///////////////////// auto const& functionArguments = info.distanceNode->getMember(0); if(functionArguments->numMembers() < 4){ return false; } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "distance function has 4 arguments"; std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; @@ -4313,6 +4320,8 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ auto result1 = geoDistanceFunctionArgCheck(argPair1, plan, info); auto result2 = geoDistanceFunctionArgCheck(argPair2, plan, info); + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "result1: " << result1 << "result2" << result2; + // xor only one argument pair shall have a geoIndex if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ return false; @@ -4386,6 +4395,6 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, } opt->addPlan(plan, rule, modified); - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; //LOG_TOPIC(DEBUG, Logger::DEVEL) << ""; } From 541675504964002a41d5ac11cf6dc68f93eab4b5 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Wed, 7 Dec 2016 15:06:44 +0100 Subject: [PATCH 31/53] avoid use of geo-index-rule in cases where it could yield an invalid result --- arangod/Aql/Optimizer.h | 2 +- arangod/Aql/OptimizerRules.cpp | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/arangod/Aql/Optimizer.h b/arangod/Aql/Optimizer.h index 502b173602..0ec7d6661a 100644 --- a/arangod/Aql/Optimizer.h +++ b/arangod/Aql/Optimizer.h @@ -145,7 +145,7 @@ class Optimizer { // remove redundant OR conditions removeRedundantOrRule_pass6 = 820, - applyGeoIndexRule = 1060, + applyGeoIndexRule = 825, useIndexesRule_pass6 = 830, diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 368257fd12..a683928e06 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4201,6 +4201,7 @@ GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pai if(setter1 == setter2){ if(setter1->getType() == EN::ENUMERATE_COLLECTION){ auto collNode = reinterpret_cast(setter1); + auto coll = collNode->collection(); //what kind of indexes does it have on what attributes auto lcoll = coll->getCollection(); // TODO - check collection for suitable geo-indexes @@ -4339,6 +4340,35 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ constantPair = &argPair1; } + // We are not allowed to be a inner loop + if(res.collectionNode->isInInnerLoop()){ + return false; + } + + //// this works only as long as we just use lists of ExecutionNodes + // avoid other constructs between sort/filter and enumerate collection + ExecutionNode* current = res.executionNode->getFirstDependency(); + ExecutionNode* end = res.collectionNode; + while(current != end){ + if( current->getType() == EN::SORT + || current->getType() == EN::COLLECT + || current->getType() == EN::FILTER + || current->getType() == EN::ENUMERATE_COLLECTION + || current->getType() == EN::INDEX + ){ + return false; + } + current = current->getFirstDependency(); + } + + // avoid sorts above index node + while(current != plan->root()){ + if(current->getType() == EN::SORT){ + return false; + } + current = current->getFirstDependency(); + } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << " attributes: " << res.longitude[0] // << ", " << res.longitude // << " of collection:" << res.collectionNode->collection()->getName() From b6606727eef0dd816f8155bac169a8f5698367e5 Mon Sep 17 00:00:00 2001 From: jsteemann Date: Thu, 8 Dec 2016 11:13:10 +0100 Subject: [PATCH 32/53] fixes --- arangod/Aql/OptimizerRules.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index a683928e06..7b9039b1de 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4349,7 +4349,7 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ // avoid other constructs between sort/filter and enumerate collection ExecutionNode* current = res.executionNode->getFirstDependency(); ExecutionNode* end = res.collectionNode; - while(current != end){ + while(current != end && current != nullptr){ if( current->getType() == EN::SORT || current->getType() == EN::COLLECT || current->getType() == EN::FILTER @@ -4362,7 +4362,7 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ } // avoid sorts above index node - while(current != plan->root()){ + while(current != plan->root() && current != nullptr){ if(current->getType() == EN::SORT){ return false; } @@ -4425,6 +4425,6 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, } opt->addPlan(plan, rule, modified); - LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; //LOG_TOPIC(DEBUG, Logger::DEVEL) << ""; } From 1acfd654674dc5d5e141e3491aadecc1b97277c1 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 12 Dec 2016 13:30:38 +0100 Subject: [PATCH 33/53] work on geo-index in cluster --- arangod/Aql/OptimizerRules.cpp | 39 ++++++++++++++++++++++++++++------ arangod/Aql/SortNode.cpp | 2 +- arangod/Aql/SortNode.h | 3 +++ 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 7b9039b1de..61ecf0a4a4 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -2727,6 +2727,7 @@ void arangodb::aql::distributeFilternCalcToClusterRule( void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule) { + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER DISTRIBUTE SORT RULE"; SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::GATHER, true); @@ -2782,6 +2783,7 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, stopSearching = true; break; case EN::SORT: + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "APPLY DISTRIBUTE SORT"; auto thisSortNode = static_cast(inspectNode); // remember our cursor... @@ -2789,7 +2791,9 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, // then unlink the filter/calculator from the plan plan->unlinkNode(inspectNode); // and re-insert into plan in front of the remoteNode - plan->insertDependency(rn, inspectNode); + if(thisSortNode->_reinsertInCluster){ + plan->insertDependency(rn, inspectNode); + } gatherNode->setElements(thisSortNode->getElements()); modified = true; // ready to rumble! @@ -3922,6 +3926,13 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, /////////////////////////////////////////////////////////////////////////////// // GEO RULES ////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// +// +// TODO +// +// - SORT cond0, cond1, cond2 ( conditions are seperated bu , not &&) +// - Cluster: scatter gather(with merge) +// - When filter and sort match on condition it is ok to have them nested + struct GeoIndexInfo{ operator bool() const { return distanceNode && valid; } void invalidate() { valid = false; } @@ -4047,7 +4058,7 @@ GeoIndexInfo isGeoFilterExpression(AstNode* node, AstNode* expressionParent){ return rv; } -GeoIndexInfo iterativePreorderWithCondition(AstNode* root, GeoIndexInfo(*condition)(AstNode*, AstNode*)){ +GeoIndexInfo iterativePreorderWithCondition(EN::NodeType type, AstNode* root, GeoIndexInfo(*condition)(AstNode*, AstNode*)){ // returns on first hit if (!root){ return GeoIndexInfo{}; @@ -4063,10 +4074,18 @@ GeoIndexInfo iterativePreorderWithCondition(AstNode* root, GeoIndexInfo(*conditi return rv; } - if (current.first->type == NODE_TYPE_OPERATOR_BINARY_AND || current.first->type == NODE_TYPE_OPERATOR_NARY_AND ){ - for (std::size_t i = 0; i < current.first->numMembers(); ++i){ - nodestack.push_back({current.first->getMember(i),current.first}); + if (type == EN::FILTER){ + if (current.first->type == NODE_TYPE_OPERATOR_BINARY_AND || current.first->type == NODE_TYPE_OPERATOR_NARY_AND ){ + for (std::size_t i = 0; i < current.first->numMembers(); ++i){ + nodestack.push_back({current.first->getMember(i),current.first}); + } } + } else if (type == EN::SORT) { + //if (current.first->type == NODE_TYPE_OPERATOR_){ + // for (std::size_t i = 0; i < current.first->numMembers(); ++i){ + // nodestack.push_back({current.first->getMember(i),current.first}); + // } + //} } } return GeoIndexInfo{}; @@ -4139,12 +4158,13 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec //FIXME -- technical debt -- code duplication / not all cases covered switch(type){ case EN::SORT: { + // check comma separated parts of condition cond0, cond1, cond2 rv = isDistanceFunction(node,nullptr); } break; case EN::FILTER: { - rv = iterativePreorderWithCondition(node,&isGeoFilterExpression); + rv = iterativePreorderWithCondition(type, node, &isGeoFilterExpression); } break; @@ -4398,7 +4418,12 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ // if executionNode is sort OR a filter without further sub conditions // the node can be unlinked if( info.executionNodeType == EN::SORT || !info.expressionParent){ - plan->unlinkNode(info.executionNode); + if (!arangodb::ServerState::instance()->isCoordinator()) { + plan->unlinkNode(info.executionNode); + } else if (info.executionNodeType == EN::SORT){ + //make sure sort is not reinserted in cluster + static_cast(info.executionNode)->_reinsertInCluster = false; + } } plan->replaceNode(res.collectionNode,inode); diff --git a/arangod/Aql/SortNode.cpp b/arangod/Aql/SortNode.cpp index cc260878ee..8fa0facb1d 100644 --- a/arangod/Aql/SortNode.cpp +++ b/arangod/Aql/SortNode.cpp @@ -32,7 +32,7 @@ using namespace arangodb::aql; SortNode::SortNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base, SortElementVector const& elements, bool stable) - : ExecutionNode(plan, base), _elements(elements), _stable(stable) {} + : ExecutionNode(plan, base), _reinsertInCluster(true), _elements(elements), _stable(stable){} /// @brief toVelocyPack, for SortNode void SortNode::toVelocyPackHelper(VPackBuilder& nodes, bool verbose) const { diff --git a/arangod/Aql/SortNode.h b/arangod/Aql/SortNode.h index 6f30c99316..2e9f2456d3 100644 --- a/arangod/Aql/SortNode.h +++ b/arangod/Aql/SortNode.h @@ -120,6 +120,9 @@ class SortNode : public ExecutionNode { /// values (e.g. when a FILTER condition exists that guarantees this) void removeConditions(size_t count); + // reinsert node when building gather node - this is used e.g for the geo-index + bool _reinsertInCluster; + private: /// @brief pairs, consisting of variable and sort direction /// (true = ascending | false = descending) From 634eeabc0bb500951c4aaa9e9b3b87405dfe32e1 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 12 Dec 2016 13:46:20 +0100 Subject: [PATCH 34/53] add cluster to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index bc55957536..0f684a89e9 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ Debug32/ Release64/ Release32/ WindowsLibraries/ +cluster/ core TAGS From 1d5e28d969efe7521a4b3a1573c8da1235271b2f Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 12 Dec 2016 15:36:43 +0100 Subject: [PATCH 35/53] fix geo-index for cluster case --- arangod/Aql/OptimizerRules.cpp | 4 ++-- arangod/Aql/SortNode.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 61ecf0a4a4..f926f5c26a 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4417,8 +4417,8 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ // if executionNode is sort OR a filter without further sub conditions // the node can be unlinked - if( info.executionNodeType == EN::SORT || !info.expressionParent){ - if (!arangodb::ServerState::instance()->isCoordinator()) { + if(!info.expressionParent){ + if (!arangodb::ServerState::instance()->isCoordinator() || info.executionNodeType == EN::FILTER) { plan->unlinkNode(info.executionNode); } else if (info.executionNodeType == EN::SORT){ //make sure sort is not reinserted in cluster diff --git a/arangod/Aql/SortNode.h b/arangod/Aql/SortNode.h index 2e9f2456d3..6338ec5acf 100644 --- a/arangod/Aql/SortNode.h +++ b/arangod/Aql/SortNode.h @@ -53,7 +53,7 @@ class SortNode : public ExecutionNode { public: SortNode(ExecutionPlan* plan, size_t id, SortElementVector const& elements, bool stable) - : ExecutionNode(plan, id), _elements(elements), _stable(stable) {} + : ExecutionNode(plan, id), _reinsertInCluster(true), _elements(elements), _stable(stable) {} SortNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base, SortElementVector const& elements, bool stable); From 783a9aff8999410c720250c0f9c6a1e3c578a6cc Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 12 Dec 2016 16:24:52 +0100 Subject: [PATCH 36/53] check if nodes are really sorted --- js/server/tests/aql/aql-optimizer-geoindex.js | 53 ++++++++++++++++--- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/js/server/tests/aql/aql-optimizer-geoindex.js b/js/server/tests/aql/aql-optimizer-geoindex.js index ef837b9fd6..8382bfb9ec 100644 --- a/js/server/tests/aql/aql-optimizer-geoindex.js +++ b/js/server/tests/aql/aql-optimizer-geoindex.js @@ -49,7 +49,8 @@ function optimizerRuleTestSuite() { // quickly disable tests here var enabled = { basics : true, - sort : true + removeNodes : true, + sorted : true } var ruleName = "use-geoindex"; @@ -111,6 +112,34 @@ function optimizerRuleTestSuite() { assertEqual(node.type, type, "check whether this node is of type "+type); }; + var geodistance = function(latitude1, longitude1, latitude2, longitude2) { + //if (TYPEWEIGHT(latitude1) !== TYPEWEIGHT_NUMBER || + // TYPEWEIGHT(longitude1) !== TYPEWEIGHT_NUMBER || + // TYPEWEIGHT(latitude2) !== TYPEWEIGHT_NUMBER || + // TYPEWEIGHT(longitude2) !== TYPEWEIGHT_NUMBER) { + // WARN('DISTANCE', INTERNAL.errors.ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH); + // return null; + //} + + //var p1 = AQL_TO_NUMBER(latitude1) * (Math.PI / 180.0); + //var p2 = AQL_TO_NUMBER(latitude2) * (Math.PI / 180.0); + //var d1 = AQL_TO_NUMBER(latitude2 - latitude1) * (Math.PI / 180.0); + //var d2 = AQL_TO_NUMBER(longitude2 - longitude1) * (Math.PI / 180.0); + + var p1 = (latitude1) * (Math.PI / 180.0); + var p2 = (latitude2) * (Math.PI / 180.0); + var d1 = (latitude2 - latitude1) * (Math.PI / 180.0); + var d2 = (longitude2 - longitude1) * (Math.PI / 180.0); + + var a = Math.sin(d1 / 2.0) * Math.sin(d1 / 2.0) + + Math.cos(p1) * Math.cos(p2) * + Math.sin(d2 / 2.0) * Math.sin(d2 / 2.0); + var c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1.0 - a)); + + return (6371e3 * c); + } + + return { //////////////////////////////////////////////////////////////////////////////// @@ -144,7 +173,7 @@ function optimizerRuleTestSuite() { if(enabled.basics){ geocol.ensureIndex({ type: "hash", fields: [ "y", "z" ], unique: false }); - var queries = [ + var queries = [ //query clust sort filter [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 1 RETURN d", false, false, false ], [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 1 RETURN d", false, false, false ], @@ -181,8 +210,8 @@ function optimizerRuleTestSuite() { } }, // testRuleBasics - testRuleSort : function () { - if(enabled.sort){ + testRuleRemoveNodes : function () { + if(enabled.removeNodes){ var queries = [ [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 5 RETURN d", false, false, false ], [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 5 RETURN d", false, false, false ], @@ -202,12 +231,24 @@ function optimizerRuleTestSuite() { pairs = result.json.map(function(res){ return [res.lat,res.lon]; }); - internal.print(pairs) + //internal.print(pairs) assertEqual(expected[qindex].sort(),pairs.sort()) //expect(expected[qindex].sort()).to.be.equal(result.json.sort()) }); } - } // testRuleSort + }, // testRuleSort + + testRuleSorted : function(){ + if(enabled.sorted){ + var old=0; + var query = "FOR d IN " + colName + " SORT distance(d.lat, d.lon, 0, 0) RETURN distance(d.lat, d.lon, 0, 0)"; + var result = AQL_EXECUTE(query); + distances = result.json.map(d => { return parseFloat(d.toFixed(5))}); + //internal.print(distances); + old=0; + distances.forEach(d => { assertTrue( d >= old); old = d; }); + } + } //testSorted }; // test dictionary (return) } // optimizerRuleTestSuite From b8cc674219c7bc1670d62f11422768d96147a704 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Tue, 13 Dec 2016 10:59:02 +0100 Subject: [PATCH 37/53] prepare to fix filter-sort --- arangod/Aql/OptimizerRules.cpp | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index f926f5c26a..117a00b561 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4437,17 +4437,40 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER GEO RULE"; - std::vector infos; - checkNodesForGeoOptimization(EN::SORT, plan, infos); - checkNodesForGeoOptimization(EN::FILTER, plan, infos); + std::vector filter_info; + std::vector sort_info; + checkNodesForGeoOptimization(EN::FILTER, plan, filter_info); + checkNodesForGeoOptimization(EN::SORT, plan, sort_info); bool modified = false; - for(auto& info : infos){ + GeoIndexInfo filter_applyed; + for(auto& info : filter_info){ if (applyGeoOptimization(true, plan, info)){ modified = true; + filter_applyed = info; break; // break on first replacement - might be relaxed later } } + + if (!modified){ + for(auto& info : sort_info){ + if (applyGeoOptimization(true, plan, info)){ + modified = true; + filter_applyed = info; + break; // break on first replacement - might be relaxed later + } + } + } else { + // find sort that matches filter used and delete sort node if it has only one condition + // check - memory to unlinked collection node should still be valid - see distributeSortToClusterRule + // both nodes must have matching collection and access path to args there may be no additional sort + // between the sort filter and index(collection) + + // implement + + LOG_TOPIC(DEBUG, Logger::DEVEL) << "GEO RULE - check for sort: " << modified; + } + opt->addPlan(plan, rule, modified); //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; From aac38c8a00c27ca5625f920db9c9043e69665850 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Tue, 13 Dec 2016 11:42:49 +0100 Subject: [PATCH 38/53] move collection acccesspath check to identify section so we can compare nodes --- arangod/Aql/OptimizerRules.cpp | 151 +++++++++++++++++---------------- 1 file changed, 79 insertions(+), 72 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 117a00b561..c9e382eed9 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3948,6 +3948,7 @@ struct GeoIndexInfo{ , within(false) , lessgreaterequal(false) , valid(true) + , constantPair{nullptr,nullptr} {} EnumerateCollectionNode* collectionNode; // node that will be replaced by (geo) IndexNode ExecutionNode* executionNode; // start node hat is a sort or filter @@ -3962,6 +3963,7 @@ struct GeoIndexInfo{ bool valid; // contains this node a valid condition std::vector longitude; // access path to longitude std::vector latitude; // access path to latitude + std::pair constantPair; }; ////////////////////////////////////////////////////////////////////// @@ -4179,26 +4181,6 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec return rv; }; -//checks sort and filter nodes for conditions -void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* plan, std::vector& infos){ - SmallVector::allocator_type::arena_type a; - SmallVector nodes{a}; - plan->findNodesOfType(nodes, type, true); - for (auto& n : nodes) { - auto geoIndexInfo = identifyGeoOptimizationCandidate(type, plan, n); - if(!geoIndexInfo){ - continue; - } - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ADDING Candidate"; - infos.push_back(std::move(geoIndexInfo)); - //LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND NEAR OR WITHIN"; - } -} - -////////////////////////////////////////////////////////////////////// -//modify plan - -// should go to candidate checking GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionPlan* plan, GeoIndexInfo info){ using SV = std::vector; //LOG_TOPIC(DEBUG, Logger::DEVEL) << " enter argument check"; @@ -4270,10 +4252,73 @@ GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pai return info; } +bool checkDistanceArguments(GeoIndexInfo& info, ExecutionPlan* plan){ + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER applyGeoOptimization"; + + auto const& functionArguments = info.distanceNode->getMember(0); + if(functionArguments->numMembers() < 4){ + return false; + } + + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "distance function has 4 arguments"; + + std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; + std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; + + GeoIndexInfo result1 = geoDistanceFunctionArgCheck(argPair1, plan, info /*copy*/); + GeoIndexInfo result2 = geoDistanceFunctionArgCheck(argPair2, plan, info /*copy*/); + //info now conatins access path to collection + + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "result1: " << result1 << "result2" << result2; + + // xor only one argument pair shall have a geoIndex + if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ + info.invalidate(); + return false; + } + + //LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; + + GeoIndexInfo res; + if(result1){ + info = std::move(result1); + info.constantPair = std::move(argPair2); + } else { + info = std::move(result2); + info.constantPair = std::move(argPair1); + } + + return true; +} + +//checks sort and filter nodes for conditions +void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* plan, std::vector& infos){ + SmallVector::allocator_type::arena_type a; + SmallVector nodes{a}; + plan->findNodesOfType(nodes, type, true); + for (auto& n : nodes) { + auto geoIndexInfo = identifyGeoOptimizationCandidate(type, plan, n); + if(!geoIndexInfo){ + continue; + } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ADDING Candidate"; + if(checkDistanceArguments(geoIndexInfo, plan)){ + infos.push_back(std::move(geoIndexInfo)); + } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND NEAR OR WITHIN"; + } +} + +////////////////////////////////////////////////////////////////////// +//modify plan + // builds a condition that can be used with the index interface and // contains all parameters required by the GeoIndex std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info, - AstNode* lat, AstNode* lon, bool lessEqual = false, AstNode const* withRange = nullptr){ + bool lessEqual = false, AstNode const* withRange = nullptr){ + + AstNode* lat = info.constantPair.first; + AstNode* lon = info.constantPair.second; auto ast = plan->getAst(); auto varAstNode = ast->createNodeReference(info.collectionNode->outVariable()); @@ -4288,13 +4333,13 @@ std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& auto lonEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, lonKey, lon); nAryAnd->addMember(lonEq); - if(withRange){ + if(info.within){ auto withKey = ast->createNodeAttributeAccess(varAstNode, "within",6); - auto withEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, withKey, withRange); + auto withEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, withKey, info.range); nAryAnd->addMember(withEq); auto lessKey = ast->createNodeAttributeAccess(varAstNode, "lesseq",6); - auto lessValue = ast->createNodeValueBool(lessEqual); + auto lessValue = ast->createNodeValueBool(info.lessgreaterequal); auto lessEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, lessKey, lessValue); nAryAnd->addMember(lessEq); } @@ -4326,49 +4371,15 @@ void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ // applys the optimization for a candidate bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER applyGeoOptimization"; - - // FIXME -- technical debt -- this code should go to the candidate finding ///////////////////// - auto const& functionArguments = info.distanceNode->getMember(0); - if(functionArguments->numMembers() < 4){ - return false; - } - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "distance function has 4 arguments"; - - std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; - std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; - - auto result1 = geoDistanceFunctionArgCheck(argPair1, plan, info); - auto result2 = geoDistanceFunctionArgCheck(argPair2, plan, info); - - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "result1: " << result1 << "result2" << result2; - - // xor only one argument pair shall have a geoIndex - if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ - return false; - } - - //LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; - - std::pair* constantPair; - GeoIndexInfo res; - if(result1){ - res = std::move(result1); - constantPair = &argPair2; - } else { - res = std::move(result2); - constantPair = &argPair1; - } - // We are not allowed to be a inner loop - if(res.collectionNode->isInInnerLoop()){ + if(info.collectionNode->isInInnerLoop()){ return false; } //// this works only as long as we just use lists of ExecutionNodes // avoid other constructs between sort/filter and enumerate collection - ExecutionNode* current = res.executionNode->getFirstDependency(); - ExecutionNode* end = res.collectionNode; + ExecutionNode* current = info.executionNode->getFirstDependency(); + ExecutionNode* end = info.collectionNode; while(current != end && current != nullptr){ if( current->getType() == EN::SORT || current->getType() == EN::COLLECT @@ -4399,16 +4410,12 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ // FIXME - END ////////////////////////////////////////////////////////////// std::unique_ptr condition; - if(info.within){ - condition = buildGeoCondition(plan,res, constantPair->first, constantPair->second, info.lessgreaterequal, info.range); - } else { - condition = buildGeoCondition(plan,res, constantPair->first, constantPair->second); - } + condition = buildGeoCondition(plan,info); auto inode = new IndexNode( - plan, plan->nextId(), res.collectionNode->vocbase(), - res.collectionNode->collection(), res.collectionNode->outVariable(), - std::vector{Transaction::IndexHandle{res.index}}, + plan, plan->nextId(), info.collectionNode->vocbase(), + info.collectionNode->collection(), info.collectionNode->outVariable(), + std::vector{Transaction::IndexHandle{info.index}}, condition.get(), false); plan->registerNode(inode); condition.release(); @@ -4425,15 +4432,15 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ static_cast(info.executionNode)->_reinsertInCluster = false; } } - plan->replaceNode(res.collectionNode,inode); + plan->replaceNode(info.collectionNode,inode); //signal that plan has been changed return true; }; void arangodb::aql::geoIndexRule(Optimizer* opt, - ExecutionPlan* plan, - Optimizer::Rule const* rule) { + ExecutionPlan* plan, + Optimizer::Rule const* rule) { //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER GEO RULE"; From 7d93fe0bddd4eff3f4fa233e029d83888a4d072b Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Tue, 13 Dec 2016 11:48:40 +0100 Subject: [PATCH 39/53] add link to geo-index doc to sourcefile to clarify goals --- arangod/Aql/OptimizerRules.cpp | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index c9e382eed9..927d51f4a2 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3924,14 +3924,12 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, /////////////////////////////////////////////////////////////////////////////// -// GEO RULES ////////////////////////////////////////////////////////////////// +// GEO RULE /////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // -// TODO +// Description of what this Rule tries to achieve: +// https://docs.google.com/document/d/1G57UP08ZFywUXKi5cLvEIKpZP-AUKGwG9oAnFOX8LLo // -// - SORT cond0, cond1, cond2 ( conditions are seperated bu , not &&) -// - Cluster: scatter gather(with merge) -// - When filter and sort match on condition it is ok to have them nested struct GeoIndexInfo{ operator bool() const { return distanceNode && valid; } @@ -3939,6 +3937,7 @@ struct GeoIndexInfo{ GeoIndexInfo() : collectionNode(nullptr) , executionNode(nullptr) + , indexNode(nullptr) , expressionParent(nullptr) , expressionNode(nullptr) , distanceNode(nullptr) @@ -3952,6 +3951,7 @@ struct GeoIndexInfo{ {} EnumerateCollectionNode* collectionNode; // node that will be replaced by (geo) IndexNode ExecutionNode* executionNode; // start node hat is a sort or filter + IndexNode* indexNode; // AstNode that is the parent of the Node AstNode* expressionParent; // AstNode that is the parent of the Node AstNode* expressionNode; // AstNode that contains the sort/filter condition AstNode* distanceNode; // AstNode that contains the distance parameters @@ -4026,7 +4026,7 @@ GeoIndexInfo isGeoFilterExpression(AstNode* node, AstNode* expressionParent){ AstNode* first = node->getMember(0); - AstNode* second = node->getMember(1); //FIXME -- const node + AstNode* second = node->getMember(1); auto eval_stuff = [](bool dist_first, bool lessEqual, GeoIndexInfo&& dist_fun, AstNode* value_node){ //LOG_TOPIC(DEBUG, Logger::DEVEL) << "1: " << dist_first; @@ -4034,7 +4034,7 @@ GeoIndexInfo isGeoFilterExpression(AstNode* node, AstNode* expressionParent){ //LOG_TOPIC(DEBUG, Logger::DEVEL) << "3: " << (bool)value_node; if (dist_first && dist_fun && value_node){ dist_fun.within = true; - dist_fun.range = value_node; //FIXME + dist_fun.range = value_node; dist_fun.lessgreaterequal = lessEqual; //LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND WITHIN"; } else { @@ -4083,11 +4083,7 @@ GeoIndexInfo iterativePreorderWithCondition(EN::NodeType type, AstNode* root, Ge } } } else if (type == EN::SORT) { - //if (current.first->type == NODE_TYPE_OPERATOR_){ - // for (std::size_t i = 0; i < current.first->numMembers(); ++i){ - // nodestack.push_back({current.first->getMember(i),current.first}); - // } - //} + // must be the only sort condition } } return GeoIndexInfo{}; @@ -4472,7 +4468,7 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, // check - memory to unlinked collection node should still be valid - see distributeSortToClusterRule // both nodes must have matching collection and access path to args there may be no additional sort // between the sort filter and index(collection) - + // implement LOG_TOPIC(DEBUG, Logger::DEVEL) << "GEO RULE - check for sort: " << modified; From e5fcdb7386cfcf34a7df8c81bb2883e5b0244162 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Tue, 13 Dec 2016 15:53:44 +0100 Subject: [PATCH 40/53] partial rewrite - now we start at endnodes and work upward towards singleton node --- arangod/Aql/OptimizerRules.cpp | 417 +++++++++++++++++---------------- 1 file changed, 213 insertions(+), 204 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 927d51f4a2..e4c1e2613e 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3950,7 +3950,7 @@ struct GeoIndexInfo{ , constantPair{nullptr,nullptr} {} EnumerateCollectionNode* collectionNode; // node that will be replaced by (geo) IndexNode - ExecutionNode* executionNode; // start node hat is a sort or filter + ExecutionNode* executionNode; // start node that is a sort or filter IndexNode* indexNode; // AstNode that is the parent of the Node AstNode* expressionParent; // AstNode that is the parent of the Node AstNode* expressionNode; // AstNode that contains the sort/filter condition @@ -4089,6 +4089,119 @@ GeoIndexInfo iterativePreorderWithCondition(EN::NodeType type, AstNode* root, Ge return GeoIndexInfo{}; } +GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionPlan* plan, GeoIndexInfo info){ + using SV = std::vector; + // LOG_TOPIC(DEBUG, Logger::DEVEL) << " enter argument check"; + // first and second should be based on the same document - need to provide the document + // in order to see which collection is bound to it and if that collections supports geo-index + if( !pair.first->isAttributeAccessForVariable() || !pair.second->isAttributeAccessForVariable()){ + // LOG_TOPIC(DEBUG, Logger::DEVEL) << " not both args are of type attribute access"; + info.invalidate(); + return info; + } + + // expect access of the for doc.attribute + // TODO: more complex access path have to be added: loop until REFERENCE TYPE IS FOUND + auto setter1 = plan->getVarSetBy(static_cast(pair.first->getMember(0)->getData())->id); + auto setter2 = plan->getVarSetBy(static_cast(pair.second->getMember(0)->getData())->id); + SV accessPath1{pair.first->getString()}; + SV accessPath2{pair.second->getString()}; + + // LOG_TOPIC(DEBUG, Logger::DEVEL) << " got setter"; + if(setter1 == setter2){ + if(setter1->getType() == EN::ENUMERATE_COLLECTION){ + auto collNode = reinterpret_cast(setter1); + + auto coll = collNode->collection(); //what kind of indexes does it have on what attributes + auto lcoll = coll->getCollection(); + // TODO - check collection for suitable geo-indexes + // LOG_TOPIC(DEBUG, Logger::DEVEL) << " SETTER IS ENUMERATE_COLLECTION: " << coll->getName(); + // LOG_TOPIC(DEBUG, Logger::DEVEL) << " COLLECTION - number of indexes: " << lcoll->getIndexes().size(); + for(auto indexShardPtr : lcoll->getIndexes()){ + // get real index + arangodb::Index& index = *indexShardPtr.get(); + + // check if current index is a geo-index + if( index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO1_INDEX + && index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO2_INDEX){ + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "Index type not of Geo: " << (int) index.type(); + continue; + } + + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "Index is a GeoIndex" << coll->getName(); + + // ///////////////////////////////////////////////// + // //FIXME - REMOVE DEBUG CODE LATER + // auto vecs = std::vector>{index.fieldNames(), std::vector{accessPath1, accessPath2}}; + // for(auto vec : vecs ){ + // for(auto path : vec){ + // std::cout << "AccessPath VECTOR: "; + // for(auto word : path){ + // std::cout << word << " "; + // } + // std::cout << std::endl; + // } + // } + // ///////////////////////////////////////////////// + + //check access paths of attributes in ast and those in index match + if( index.fieldNames()[0] == accessPath1 && index.fieldNames()[1] == accessPath2 ){ + info.collectionNode = collNode; + info.index = indexShardPtr; + info.longitude = std::move(accessPath1); + info.latitude = std::move(accessPath2); + return info; + } + } + } + } + + info.invalidate(); + return info; +} + +bool checkDistanceArguments(GeoIndexInfo& info, ExecutionPlan* plan){ + if(!info){ + return false; + } + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER applyGeoOptimization"; + + auto const& functionArguments = info.distanceNode->getMember(0); + if(functionArguments->numMembers() < 4){ + return false; + } + + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "distance function has 4 arguments"; + + std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; + std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; + + GeoIndexInfo result1 = geoDistanceFunctionArgCheck(argPair1, plan, info /*copy*/); + GeoIndexInfo result2 = geoDistanceFunctionArgCheck(argPair2, plan, info /*copy*/); + //info now conatins access path to collection + + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "result1: " << result1 << "result2" << result2; + + // xor only one argument pair shall have a geoIndex + if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ + info.invalidate(); + return false; + } + + // LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; + + GeoIndexInfo res; + if(result1){ + info = std::move(result1); + info.constantPair = std::move(argPair2); + } else { + info = std::move(result2); + info.constantPair = std::move(argPair1); + } + + return true; +} + //checks a single sort or filter node GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, ExecutionPlan* plan, ExecutionNode* n){ ExecutionNode* setter = nullptr; @@ -4174,137 +4287,11 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec rv.executionNode = n; rv.executionNodeType = type; + checkDistanceArguments(rv, plan); + return rv; }; -GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionPlan* plan, GeoIndexInfo info){ - using SV = std::vector; - //LOG_TOPIC(DEBUG, Logger::DEVEL) << " enter argument check"; - // first and second should be based on the same document - need to provide the document - // in order to see which collection is bound to it and if that collections supports geo-index - if( !pair.first->isAttributeAccessForVariable() || !pair.second->isAttributeAccessForVariable()){ - //LOG_TOPIC(DEBUG, Logger::DEVEL) << " not both args are of type attribute access"; - info.invalidate(); - return info; - } - - // expect access of the for doc.attribute - // TODO: more complex access path have to be added: loop until REFERENCE TYPE IS FOUND - auto setter1 = plan->getVarSetBy(static_cast(pair.first->getMember(0)->getData())->id); - auto setter2 = plan->getVarSetBy(static_cast(pair.second->getMember(0)->getData())->id); - SV accessPath1{pair.first->getString()}; - SV accessPath2{pair.second->getString()}; - - //LOG_TOPIC(DEBUG, Logger::DEVEL) << " got setter"; - if(setter1 == setter2){ - if(setter1->getType() == EN::ENUMERATE_COLLECTION){ - auto collNode = reinterpret_cast(setter1); - - auto coll = collNode->collection(); //what kind of indexes does it have on what attributes - auto lcoll = coll->getCollection(); - // TODO - check collection for suitable geo-indexes - //LOG_TOPIC(DEBUG, Logger::DEVEL) << " SETTER IS ENUMERATE_COLLECTION: " << coll->getName(); - //LOG_TOPIC(DEBUG, Logger::DEVEL) << " COLLECTION - number of indexes: " << lcoll->getIndexes().size(); - for(auto indexShardPtr : lcoll->getIndexes()){ - // get real index - arangodb::Index& index = *indexShardPtr.get(); - - // check if current index is a geo-index - if( index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO1_INDEX - && index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO2_INDEX){ - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "Index type not of Geo: " << (int) index.type(); - continue; - } - - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "Index is a GeoIndex" << coll->getName(); - - // ///////////////////////////////////////////////// - // //FIXME - REMOVE DEBUG CODE LATER - // auto vecs = std::vector>{index.fieldNames(), std::vector{accessPath1, accessPath2}}; - // for(auto vec : vecs ){ - // for(auto path : vec){ - // std::cout << "AccessPath VECTOR: "; - // for(auto word : path){ - // std::cout << word << " "; - // } - // std::cout << std::endl; - // } - // } - // ///////////////////////////////////////////////// - - //check access paths of attributes in ast and those in index match - if( index.fieldNames()[0] == accessPath1 && index.fieldNames()[1] == accessPath2 ){ - info.collectionNode = collNode; - info.index = indexShardPtr; - info.longitude = std::move(accessPath1); - info.latitude = std::move(accessPath2); - return info; - } - } - } - } - - info.invalidate(); - return info; -} - -bool checkDistanceArguments(GeoIndexInfo& info, ExecutionPlan* plan){ - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER applyGeoOptimization"; - - auto const& functionArguments = info.distanceNode->getMember(0); - if(functionArguments->numMembers() < 4){ - return false; - } - - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "distance function has 4 arguments"; - - std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; - std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; - - GeoIndexInfo result1 = geoDistanceFunctionArgCheck(argPair1, plan, info /*copy*/); - GeoIndexInfo result2 = geoDistanceFunctionArgCheck(argPair2, plan, info /*copy*/); - //info now conatins access path to collection - - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "result1: " << result1 << "result2" << result2; - - // xor only one argument pair shall have a geoIndex - if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ - info.invalidate(); - return false; - } - - //LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; - - GeoIndexInfo res; - if(result1){ - info = std::move(result1); - info.constantPair = std::move(argPair2); - } else { - info = std::move(result2); - info.constantPair = std::move(argPair1); - } - - return true; -} - -//checks sort and filter nodes for conditions -void checkNodesForGeoOptimization(ExecutionNode::NodeType type, ExecutionPlan* plan, std::vector& infos){ - SmallVector::allocator_type::arena_type a; - SmallVector nodes{a}; - plan->findNodesOfType(nodes, type, true); - for (auto& n : nodes) { - auto geoIndexInfo = identifyGeoOptimizationCandidate(type, plan, n); - if(!geoIndexInfo){ - continue; - } - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ADDING Candidate"; - if(checkDistanceArguments(geoIndexInfo, plan)){ - infos.push_back(std::move(geoIndexInfo)); - } - //LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND NEAR OR WITHIN"; - } -} - ////////////////////////////////////////////////////////////////////// //modify plan @@ -4348,11 +4335,9 @@ std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& return condition; } -//replaces the geoCondition with true. -//void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ - if( info.expressionParent ) { + if( info.expressionParent && info.executionNodeType == EN::FILTER) { auto ast = plan->getAst(); auto replacement = ast->createNodeValueBool(true); for(std::size_t i = 0; i < info.expressionParent->numMembers(); ++i){ @@ -4366,69 +4351,59 @@ void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ } // applys the optimization for a candidate -bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& info){ - // We are not allowed to be a inner loop - if(info.collectionNode->isInInnerLoop()){ +bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, GeoIndexInfo& second){ + if(!first && !second){ return false; } - //// this works only as long as we just use lists of ExecutionNodes - // avoid other constructs between sort/filter and enumerate collection - ExecutionNode* current = info.executionNode->getFirstDependency(); - ExecutionNode* end = info.collectionNode; - while(current != end && current != nullptr){ - if( current->getType() == EN::SORT - || current->getType() == EN::COLLECT - || current->getType() == EN::FILTER - || current->getType() == EN::ENUMERATE_COLLECTION - || current->getType() == EN::INDEX - ){ - return false; - } - current = current->getFirstDependency(); + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "GEO INDEX APPLY "; + + if(!first){ + first = std::move(second); + second.invalidate(); } - // avoid sorts above index node - while(current != plan->root() && current != nullptr){ - if(current->getType() == EN::SORT){ - return false; - } - current = current->getFirstDependency(); + // We are not allowed to be a inner loop + if(first.collectionNode->isInInnerLoop()){ + return false; } - //LOG_TOPIC(DEBUG, Logger::DEVEL) << " attributes: " << res.longitude[0] - // << ", " << res.longitude - // << " of collection:" << res.collectionNode->collection()->getName() - // << " are geoindexed"; - - //break; //remove this to make use of the index - - // FIXME - END ////////////////////////////////////////////////////////////// + // //LOG_TOPIC(DEBUG, Logger::DEVEL) << " attributes: " << res.longitude[0] + // // << ", " << res.longitude + // // << " of collection:" << res.collectionNode->collection()->getName() + // // << " are geoindexed"; std::unique_ptr condition; - condition = buildGeoCondition(plan,info); + condition = buildGeoCondition(plan,first); auto inode = new IndexNode( - plan, plan->nextId(), info.collectionNode->vocbase(), - info.collectionNode->collection(), info.collectionNode->outVariable(), - std::vector{Transaction::IndexHandle{info.index}}, + plan, plan->nextId(), first.collectionNode->vocbase(), + first.collectionNode->collection(), first.collectionNode->outVariable(), + std::vector{Transaction::IndexHandle{first.index}}, condition.get(), false); plan->registerNode(inode); condition.release(); - replaceGeoCondition(plan, info); + replaceGeoCondition(plan, first); + replaceGeoCondition(plan, second); // if executionNode is sort OR a filter without further sub conditions // the node can be unlinked - if(!info.expressionParent){ - if (!arangodb::ServerState::instance()->isCoordinator() || info.executionNodeType == EN::FILTER) { - plan->unlinkNode(info.executionNode); - } else if (info.executionNodeType == EN::SORT){ - //make sure sort is not reinserted in cluster - static_cast(info.executionNode)->_reinsertInCluster = false; + auto unlinkNode = [&](GeoIndexInfo& info){ + if(info && !info.expressionParent){ + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "info vaid and not expressionParent"; + if (!arangodb::ServerState::instance()->isCoordinator() || info.executionNodeType == EN::FILTER) { + plan->unlinkNode(info.executionNode); + } else if (info.executionNodeType == EN::SORT){ + //make sure sort is not reinserted in cluster + static_cast(info.executionNode)->_reinsertInCluster = false; + } } - } - plan->replaceNode(info.collectionNode,inode); + }; + + unlinkNode(first); + plan->replaceNode(first.collectionNode,inode); + unlinkNode(second); //signal that plan has been changed return true; @@ -4440,40 +4415,74 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER GEO RULE"; - std::vector filter_info; - std::vector sort_info; - checkNodesForGeoOptimization(EN::FILTER, plan, filter_info); - checkNodesForGeoOptimization(EN::SORT, plan, sort_info); - + SmallVector::allocator_type::arena_type a; + SmallVector nodes{a}; bool modified = false; - GeoIndexInfo filter_applyed; - for(auto& info : filter_info){ - if (applyGeoOptimization(true, plan, info)){ - modified = true; - filter_applyed = info; - break; // break on first replacement - might be relaxed later - } - } + //inspect each return node and work upwards to SingletonNode + plan->findEndNodes(nodes, true); + for (auto& node : nodes) { + GeoIndexInfo sortInfo{}; + GeoIndexInfo filterInfo{}; + auto current = node; - if (!modified){ - for(auto& info : sort_info){ - if (applyGeoOptimization(true, plan, info)){ - modified = true; - filter_applyed = info; - break; // break on first replacement - might be relaxed later + while (current){ + switch(current->getType()) { + case EN::SORT:{ + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "hit sort"; + sortInfo = identifyGeoOptimizationCandidate(EN::SORT, plan, current); + // if(sortInfo){ + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "sort valid"; + // } + } + break ; + case EN::FILTER:{ + + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "hit filter"; + filterInfo = identifyGeoOptimizationCandidate(EN::FILTER, plan, current); + // if(filterInfo){ + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "filter valid"; + //} + } + break; + case EN::ENUMERATE_COLLECTION:{ + EnumerateCollectionNode* collnode = static_cast(current); + // if(sortInfo){ + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "sortInfo valid collection:" << sortInfo.collectionNode; + // } + // if(filterInfo){ + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "filterInfo valid collection:" << filterInfo.collectionNode; + // } + if( (sortInfo && sortInfo.collectionNode!= collnode) + ||(filterInfo && filterInfo.collectionNode != collnode) + ){ + // LOG_TOPIC(DEBUG, Logger::DEVEL) << "invalidating ..."; + filterInfo.invalidate(); + sortInfo.invalidate(); + break; + } + if (/* filter and sort match && */ applyGeoOptimization(true, plan, filterInfo, sortInfo)){ + modified = true; + filterInfo.invalidate(); + sortInfo.invalidate(); + } + } + break; + + case EN::INDEX: + case EN::COLLECT:{ + filterInfo.invalidate(); + sortInfo.invalidate(); + break; + } + + default:{} //skip - do nothing + break; } + + current = current->getFirstDependency(); //inspect next node } - } else { - // find sort that matches filter used and delete sort node if it has only one condition - // check - memory to unlinked collection node should still be valid - see distributeSortToClusterRule - // both nodes must have matching collection and access path to args there may be no additional sort - // between the sort filter and index(collection) - // implement - - LOG_TOPIC(DEBUG, Logger::DEVEL) << "GEO RULE - check for sort: " << modified; } - opt->addPlan(plan, rule, modified); //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; From 615b85e5f5b39dbfc95c00e91d50563aaa933647 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Tue, 13 Dec 2016 16:09:00 +0100 Subject: [PATCH 41/53] add tests - sort && filter and add check for IndexNodes --- js/server/tests/aql/aql-optimizer-geoindex.js | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/js/server/tests/aql/aql-optimizer-geoindex.js b/js/server/tests/aql/aql-optimizer-geoindex.js index 8382bfb9ec..a9713e1f11 100644 --- a/js/server/tests/aql/aql-optimizer-geoindex.js +++ b/js/server/tests/aql/aql-optimizer-geoindex.js @@ -174,11 +174,13 @@ function optimizerRuleTestSuite() { geocol.ensureIndex({ type: "hash", fields: [ "y", "z" ], unique: false }); var queries = [ - //query clust sort filter - [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 1 RETURN d", false, false, false ], - [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 1 RETURN d", false, false, false ], - [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d", false, false, false ], - [ "FOR i in 1..2 FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 && i > 1 LIMIT 1 RETURN d", false, false, true ], + //query clust sort filter index + [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 1 RETURN d", false, false, false, true ], + [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 1 RETURN d", false, false, false, true ], + [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d", false, false, false, true ], + [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat, d.lon) FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d", false, false, false, true ], + [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat, d.lon) FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d", false, false, false, true ], + [ "FOR i in 1..2 FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 && i > 1 LIMIT 1 RETURN d", false, false, true, false ], ]; queries.forEach(function(query) { @@ -206,6 +208,12 @@ function optimizerRuleTestSuite() { hasNoFilterNode(result); } + if (query[4]) { + hasIndexNode(result); + } else { + hasNoIndexNode(result); + } + }); } }, // testRuleBasics From 3d17672feb505da4f0fd87ec48c70a3fa7def84d Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Tue, 13 Dec 2016 17:03:01 +0100 Subject: [PATCH 42/53] better tests --- arangod/Aql/OptimizerRules.cpp | 2 +- js/server/tests/aql/aql-optimizer-geoindex.js | 96 ++++++++++++------- 2 files changed, 64 insertions(+), 34 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index e4c1e2613e..5c5686f404 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4384,6 +4384,7 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, G plan->registerNode(inode); condition.release(); + plan->replaceNode(first.collectionNode,inode); replaceGeoCondition(plan, first); replaceGeoCondition(plan, second); @@ -4402,7 +4403,6 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, G }; unlinkNode(first); - plan->replaceNode(first.collectionNode,inode); unlinkNode(second); //signal that plan has been changed diff --git a/js/server/tests/aql/aql-optimizer-geoindex.js b/js/server/tests/aql/aql-optimizer-geoindex.js index a9713e1f11..23edece0c6 100644 --- a/js/server/tests/aql/aql-optimizer-geoindex.js +++ b/js/server/tests/aql/aql-optimizer-geoindex.js @@ -81,35 +81,35 @@ function optimizerRuleTestSuite() { } return 0; }; - var hasSortNode = function (plan) { - assertEqual(findExecutionNodes(plan, "SortNode").length, 1, "Has SortNode"); + var hasSortNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "SortNode").length, 1, query.string + " Has SortNode "); }; - var hasNoSortNode = function (plan) { - assertEqual(findExecutionNodes(plan, "SortNode").length, 0, "Has no SortNode"); + var hasNoSortNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "SortNode").length, 0, query.string + " Has no SortNode"); }; - var hasFilterNode = function (plan) { - assertEqual(findExecutionNodes(plan, "FilterNode").length, 1, "Has FilterNode"); + var hasFilterNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "FilterNode").length, 1, query.string + " Has FilterNode"); }; - var hasNoFilterNode = function (plan) { - assertEqual(findExecutionNodes(plan, "FilterNode").length, 0, "Has no FilterNode"); + var hasNoFilterNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "FilterNode").length, 0, query.string + " Has no FilterNode"); }; - var hasNoIndexNode = function (plan) { - assertEqual(findExecutionNodes(plan, "IndexNode").length, 0, "Has no IndexNode"); + var hasNoIndexNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "IndexNode").length, 0, query.string + " Has no IndexNode"); }; - var hasNoResultsNode = function (plan) { - assertEqual(findExecutionNodes(plan, "NoResultsNode").length, 1, "Has NoResultsNode"); + var hasNoResultsNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "NoResultsNode").length, 1, query.string + " Has NoResultsNode"); }; - var hasCalculationNodes = function (plan, countXPect) { + var hasCalculationNodes = function (plan,query, countXPect) { assertEqual(findExecutionNodes(plan, "CalculationNode").length, countXPect, "Has " + countXPect + " CalculationNode"); }; - var hasIndexNode = function (plan) { - var rn = findExecutionNodes(plan, "IndexNode"); - assertEqual(rn.length, 1, "Has IndexNode"); + var hasIndexNode = function (plan,query) { + var rn = findExecutionNodes(plan,"IndexNode"); + assertEqual(rn.length, 1, query.string + "Has IndexNode"); return; }; var isNodeType = function(node, type) { - assertEqual(node.type, type, "check whether this node is of type "+type); + assertEqual(node.type, type, query.string + " check whether this node is of type "+type); }; var geodistance = function(latitude1, longitude1, latitude2, longitude2) { @@ -175,16 +175,46 @@ function optimizerRuleTestSuite() { var queries = [ //query clust sort filter index - [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 1 RETURN d", false, false, false, true ], - [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 1 RETURN d", false, false, false, true ], - [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d", false, false, false, true ], - [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat, d.lon) FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d", false, false, false, true ], - [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat, d.lon) FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d", false, false, false, true ], - [ "FOR i in 1..2 FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 && i > 1 LIMIT 1 RETURN d", false, false, true, false ], + { string : "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR d IN " + colName + " SORT distance(0, 0, d.lat, d.lon) FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR d IN " + colName + " SORT distance(0, 0, d.lat, d.lon) FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR i in 1..2 FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 && i > 1 LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : true + , index : false + }, ]; queries.forEach(function(query) { - var result = AQL_EXPLAIN(query[0]); + var result = AQL_EXPLAIN(query.string); // //optimized on cluster // if (query[1]) { @@ -195,23 +225,23 @@ function optimizerRuleTestSuite() { // } //sort nodes - if (query[2]) { - hasSortNode(result); + if (query.sort) { + hasSortNode(result,query); } else { - hasNoSortNode(result); + hasNoSortNode(result,query); } //filter nodes - if (query[3]) { - hasFilterNode(result); + if (query.filter) { + hasFilterNode(result,query); } else { - hasNoFilterNode(result); + hasNoFilterNode(result,query); } - if (query[4]) { - hasIndexNode(result); + if (query.index){ + hasIndexNode(result,query); } else { - hasNoIndexNode(result); + hasNoIndexNode(result,query); } }); From 0f8b496c570de6bfe814f06a97f23de9fc8696d9 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Wed, 14 Dec 2016 10:46:24 +0100 Subject: [PATCH 43/53] add debugging info for geoindex --- arangod/Aql/OptimizerRules.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 5c5686f404..2a5a140567 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4384,7 +4384,10 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, G plan->registerNode(inode); condition.release(); + LOG_TOPIC(DEBUG, Logger::DEVEL) << "replacing node, type: " << first.collectionNode->getType() + << " with type: " << inode->getType(); plan->replaceNode(first.collectionNode,inode); + replaceGeoCondition(plan, first); replaceGeoCondition(plan, second); @@ -4406,6 +4409,7 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, G unlinkNode(second); //signal that plan has been changed + LOG_TOPIC(DEBUG, Logger::DEVEL) << "plan modified"; return true; }; @@ -4460,7 +4464,7 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, sortInfo.invalidate(); break; } - if (/* filter and sort match && */ applyGeoOptimization(true, plan, filterInfo, sortInfo)){ + if (applyGeoOptimization(true, plan, filterInfo, sortInfo)){ modified = true; filterInfo.invalidate(); sortInfo.invalidate(); @@ -4485,6 +4489,6 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, } opt->addPlan(plan, rule, modified); - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; + LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; //LOG_TOPIC(DEBUG, Logger::DEVEL) << ""; } From 2a404cfabe4b3fa2e608bed71bb0366c2bcc5e35 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Wed, 14 Dec 2016 12:21:21 +0100 Subject: [PATCH 44/53] add dump of ExectionPlan. Index Node seems to be created! --- arangod/Aql/OptimizerRules.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 2a5a140567..23e73832fa 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -51,6 +51,8 @@ #include #include +#include + using namespace arangodb; using namespace arangodb::aql; using EN = arangodb::aql::ExecutionNode; @@ -4384,9 +4386,18 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, G plan->registerNode(inode); condition.release(); + arangodb::velocypack::Builder builder; + bool withFigures = false; + plan->root()->toVelocyPack(builder, withFigures); + std::cout << builder.toString(); + builder.clear(); + LOG_TOPIC(DEBUG, Logger::DEVEL) << "replacing node, type: " << first.collectionNode->getType() << " with type: " << inode->getType(); plan->replaceNode(first.collectionNode,inode); + + plan->root()->toVelocyPack(builder, withFigures); + std::cout << builder.toString(); replaceGeoCondition(plan, first); replaceGeoCondition(plan, second); From 75823fe879c6fbcec103871467a744a6fdd99839 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Fri, 16 Dec 2016 15:19:10 +0100 Subject: [PATCH 45/53] another try to fix the plan --- arangod/Aql/OptimizerRules.cpp | 42 +++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 23e73832fa..ff61fb0d2f 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3940,6 +3940,8 @@ struct GeoIndexInfo{ : collectionNode(nullptr) , executionNode(nullptr) , indexNode(nullptr) + , setter(nullptr) + , setterClone(nullptr) , expressionParent(nullptr) , expressionNode(nullptr) , distanceNode(nullptr) @@ -3954,6 +3956,8 @@ struct GeoIndexInfo{ EnumerateCollectionNode* collectionNode; // node that will be replaced by (geo) IndexNode ExecutionNode* executionNode; // start node that is a sort or filter IndexNode* indexNode; // AstNode that is the parent of the Node + ExecutionNode* setter; // node that has contains the condition for filter or sort + ExecutionNode* setterClone; AstNode* expressionParent; // AstNode that is the parent of the Node AstNode* expressionNode; // AstNode that contains the sort/filter condition AstNode* distanceNode; // AstNode that contains the distance parameters @@ -4252,10 +4256,14 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec if (setter == nullptr || setter->getType() != EN::CALCULATION) { return rv; } + + //clone setter here + auto setterClone = setter->clone(plan,true,true); + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "found setter node for calcuation"; // downcast to calculation node and get expression - auto cn = static_cast(setter); + auto cn = static_cast(setterClone); auto expression = cn->expression(); // the expression must exist and it must have an astNode @@ -4288,6 +4296,8 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec rv.executionNode = n; rv.executionNodeType = type; + rv.setter = setter; + rv.setterClone = setterClone; checkDistanceArguments(rv, plan); @@ -4338,7 +4348,6 @@ std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& } void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ - if( info.expressionParent && info.executionNodeType == EN::FILTER) { auto ast = plan->getAst(); auto replacement = ast->createNodeValueBool(true); @@ -4348,8 +4357,9 @@ void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ info.expressionParent->addMember(replacement); } } - } - + info.setterClone->setId(1000); + plan->replaceNode(info.setter,info.setterClone); + } } // applys the optimization for a candidate @@ -4386,18 +4396,18 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, G plan->registerNode(inode); condition.release(); - arangodb::velocypack::Builder builder; - bool withFigures = false; - plan->root()->toVelocyPack(builder, withFigures); - std::cout << builder.toString(); - builder.clear(); + //arangodb::velocypack::Builder builder; + //bool withFigures = false; + //plan->root()->toVelocyPack(builder, withFigures); + //std::cout << builder.toString(); + //builder.clear(); LOG_TOPIC(DEBUG, Logger::DEVEL) << "replacing node, type: " << first.collectionNode->getType() << " with type: " << inode->getType(); plan->replaceNode(first.collectionNode,inode); - plan->root()->toVelocyPack(builder, withFigures); - std::cout << builder.toString(); + //plan->root()->toVelocyPack(builder, withFigures); + //std::cout << builder.toString(); replaceGeoCondition(plan, first); replaceGeoCondition(plan, second); @@ -4435,6 +4445,7 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, bool modified = false; //inspect each return node and work upwards to SingletonNode plan->findEndNodes(nodes, true); + ExecutionPlan* newPlan = nullptr; for (auto& node : nodes) { GeoIndexInfo sortInfo{}; GeoIndexInfo filterInfo{}; @@ -4475,7 +4486,8 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, sortInfo.invalidate(); break; } - if (applyGeoOptimization(true, plan, filterInfo, sortInfo)){ + newPlan = plan->clone(); + if (applyGeoOptimization(true, newPlan, filterInfo, sortInfo)){ modified = true; filterInfo.invalidate(); sortInfo.invalidate(); @@ -4498,7 +4510,11 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, } } - opt->addPlan(plan, rule, modified); + if (modified){ + opt->addPlan(newPlan, rule, modified); + } else { + opt->addPlan(plan, rule, modified); + } LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; //LOG_TOPIC(DEBUG, Logger::DEVEL) << ""; From 32e1c7eac441d4d6f5fd8c6fa1f4988735df8473 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 19 Dec 2016 09:16:09 +0100 Subject: [PATCH 46/53] manually cloning fixes the issue --- arangod/Aql/OptimizerRules.cpp | 57 ++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index ff61fb0d2f..c65b512f40 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3941,7 +3941,6 @@ struct GeoIndexInfo{ , executionNode(nullptr) , indexNode(nullptr) , setter(nullptr) - , setterClone(nullptr) , expressionParent(nullptr) , expressionNode(nullptr) , distanceNode(nullptr) @@ -3956,8 +3955,7 @@ struct GeoIndexInfo{ EnumerateCollectionNode* collectionNode; // node that will be replaced by (geo) IndexNode ExecutionNode* executionNode; // start node that is a sort or filter IndexNode* indexNode; // AstNode that is the parent of the Node - ExecutionNode* setter; // node that has contains the condition for filter or sort - ExecutionNode* setterClone; + CalculationNode* setter; // node that has contains the condition for filter or sort AstNode* expressionParent; // AstNode that is the parent of the Node AstNode* expressionNode; // AstNode that contains the sort/filter condition AstNode* distanceNode; // AstNode that contains the distance parameters @@ -4257,14 +4255,7 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec return rv; } - //clone setter here - auto setterClone = setter->clone(plan,true,true); - - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "found setter node for calcuation"; - - // downcast to calculation node and get expression - auto cn = static_cast(setterClone); - auto expression = cn->expression(); + auto expression = static_cast(setter)->expression(); // the expression must exist and it must have an astNode if (expression == nullptr || expression->node() == nullptr){ @@ -4296,8 +4287,7 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec rv.executionNode = n; rv.executionNodeType = type; - rv.setter = setter; - rv.setterClone = setterClone; + rv.setter = static_cast(setter); checkDistanceArguments(rv, plan); @@ -4349,16 +4339,31 @@ std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ if( info.expressionParent && info.executionNodeType == EN::FILTER) { + auto ast = plan->getAst(); + CalculationNode* newNode = nullptr; + Expression* expr = new Expression(ast, static_cast(info.setter)->expression()->nodeForModification()->clone(ast)); + + try { + newNode = new CalculationNode(plan, plan->nextId(), expr, static_cast(info.setter)->outVariable()); + } catch (...) { + delete expr; + throw; + } + + plan->registerNode(newNode); + plan->replaceNode(info.setter, newNode); + + auto replaceInfo = iterativePreorderWithCondition(EN::FILTER, newNode->expression()->nodeForModification(), &isGeoFilterExpression); + auto replacement = ast->createNodeValueBool(true); - for(std::size_t i = 0; i < info.expressionParent->numMembers(); ++i){ - if(info.expressionParent->getMember(i) == info.expressionNode){ - info.expressionParent->removeMemberUnchecked(i); - info.expressionParent->addMember(replacement); + for(std::size_t i = 0; i < replaceInfo.expressionParent->numMembers(); ++i){ + if(replaceInfo.expressionParent->getMember(i) == replaceInfo.expressionNode){ + replaceInfo.expressionParent->removeMemberUnchecked(i); + replaceInfo.expressionParent->addMember(replacement); } } - info.setterClone->setId(1000); - plan->replaceNode(info.setter,info.setterClone); + } } @@ -4445,7 +4450,7 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, bool modified = false; //inspect each return node and work upwards to SingletonNode plan->findEndNodes(nodes, true); - ExecutionPlan* newPlan = nullptr; + //ExecutionPlan* newPlan = nullptr; for (auto& node : nodes) { GeoIndexInfo sortInfo{}; GeoIndexInfo filterInfo{}; @@ -4486,8 +4491,8 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, sortInfo.invalidate(); break; } - newPlan = plan->clone(); - if (applyGeoOptimization(true, newPlan, filterInfo, sortInfo)){ + //newPlan = plan->clone(); + if (applyGeoOptimization(true, plan, filterInfo, sortInfo)){ modified = true; filterInfo.invalidate(); sortInfo.invalidate(); @@ -4510,11 +4515,11 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, } } - if (modified){ - opt->addPlan(newPlan, rule, modified); - } else { + //if (modified){ + // opt->addPlan(newPlan, rule, modified); + //} else { opt->addPlan(plan, rule, modified); - } + //} LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; //LOG_TOPIC(DEBUG, Logger::DEVEL) << ""; From 004da0c614ee2e9f6e3442b41a363ec8dbe4c01c Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 19 Dec 2016 10:52:42 +0100 Subject: [PATCH 47/53] only do not use geoindex in inner loop if we want to sort --- arangod/Aql/OptimizerRules.cpp | 14 ++++++-------- js/server/tests/aql/aql-optimizer-geoindex.js | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index c65b512f40..ec2a53437d 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4343,7 +4343,7 @@ void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ auto ast = plan->getAst(); CalculationNode* newNode = nullptr; Expression* expr = new Expression(ast, static_cast(info.setter)->expression()->nodeForModification()->clone(ast)); - + try { newNode = new CalculationNode(plan, plan->nextId(), expr, static_cast(info.setter)->outVariable()); } catch (...) { @@ -4353,7 +4353,7 @@ void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ plan->registerNode(newNode); plan->replaceNode(info.setter, newNode); - + auto replaceInfo = iterativePreorderWithCondition(EN::FILTER, newNode->expression()->nodeForModification(), &isGeoFilterExpression); auto replacement = ast->createNodeValueBool(true); @@ -4381,10 +4381,11 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, G } // We are not allowed to be a inner loop - if(first.collectionNode->isInInnerLoop()){ + if(first.collectionNode->isInInnerLoop() && first.executionNodeType == EN::SORT){ return false; } + LOG_TOPIC(DEBUG, Logger::DEVEL) << "NO INNER LOOP"; // //LOG_TOPIC(DEBUG, Logger::DEVEL) << " attributes: " << res.longitude[0] // // << ", " << res.longitude // // << " of collection:" << res.collectionNode->collection()->getName() @@ -4515,11 +4516,8 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, } } - //if (modified){ - // opt->addPlan(newPlan, rule, modified); - //} else { - opt->addPlan(plan, rule, modified); - //} + + opt->addPlan(plan, rule, modified); LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; //LOG_TOPIC(DEBUG, Logger::DEVEL) << ""; diff --git a/js/server/tests/aql/aql-optimizer-geoindex.js b/js/server/tests/aql/aql-optimizer-geoindex.js index 23edece0c6..318bf39457 100644 --- a/js/server/tests/aql/aql-optimizer-geoindex.js +++ b/js/server/tests/aql/aql-optimizer-geoindex.js @@ -209,7 +209,7 @@ function optimizerRuleTestSuite() { , cluster : false , sort : false , filter : true - , index : false + , index : true }, ]; From 3952bf20096ba0a3ee3882e28fb32581a2a22792 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 19 Dec 2016 11:00:52 +0100 Subject: [PATCH 48/53] remove debug code --- arangod/Aql/OptimizerRules.cpp | 93 +--------------------------------- 1 file changed, 1 insertion(+), 92 deletions(-) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index ec2a53437d..22f0704363 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -3993,7 +3993,6 @@ GeoIndexInfo isDistanceFunction(AstNode* distanceNode, AstNode* expressionParent if ( func->externalName != "DISTANCE" || distanceNode->numMembers() != 1 ) { return rv; } - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND DISTANCE FUNCTION"; rv.distanceNode = distanceNode; rv.expressionNode = distanceNode; rv.expressionParent = expressionParent; @@ -4010,7 +4009,6 @@ GeoIndexInfo isGeoFilterExpression(AstNode* node, AstNode* expressionParent){ && node->type != NODE_TYPE_OPERATOR_BINARY_LE && node->type != NODE_TYPE_OPERATOR_BINARY_LT) { - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "expression does not contain <,<=,>=,>"; return rv; } else { if (node->type == NODE_TYPE_OPERATOR_BINARY_GE || node->type == NODE_TYPE_OPERATOR_BINARY_GT){ @@ -4021,26 +4019,18 @@ GeoIndexInfo isGeoFilterExpression(AstNode* node, AstNode* expressionParent){ lessEqual = false; } - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "binary operator found"; - // binary expression has 2 members if(node->numMembers() != 2){ return rv; } - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "operator has 2 members"; - AstNode* first = node->getMember(0); AstNode* second = node->getMember(1); auto eval_stuff = [](bool dist_first, bool lessEqual, GeoIndexInfo&& dist_fun, AstNode* value_node){ - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "1: " << dist_first; - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "2: " << (bool)dist_fun; - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "3: " << (bool)value_node; if (dist_first && dist_fun && value_node){ dist_fun.within = true; dist_fun.range = value_node; dist_fun.lessgreaterequal = lessEqual; - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "FOUND WITHIN"; } else { dist_fun.invalidate(); } @@ -4048,13 +4038,10 @@ GeoIndexInfo isGeoFilterExpression(AstNode* node, AstNode* expressionParent){ }; - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "frist check"; rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(first, expressionParent), isValueOrRefNode(second)); if (!rv) { - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "second check"; rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(second, expressionParent), isValueOrRefNode(first)); } - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "result " << (bool) rv; if(rv){ //this must be set after checking if the node contains a distance node. @@ -4095,11 +4082,9 @@ GeoIndexInfo iterativePreorderWithCondition(EN::NodeType type, AstNode* root, Ge GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionPlan* plan, GeoIndexInfo info){ using SV = std::vector; - // LOG_TOPIC(DEBUG, Logger::DEVEL) << " enter argument check"; // first and second should be based on the same document - need to provide the document // in order to see which collection is bound to it and if that collections supports geo-index if( !pair.first->isAttributeAccessForVariable() || !pair.second->isAttributeAccessForVariable()){ - // LOG_TOPIC(DEBUG, Logger::DEVEL) << " not both args are of type attribute access"; info.invalidate(); return info; } @@ -4111,7 +4096,6 @@ GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pai SV accessPath1{pair.first->getString()}; SV accessPath2{pair.second->getString()}; - // LOG_TOPIC(DEBUG, Logger::DEVEL) << " got setter"; if(setter1 == setter2){ if(setter1->getType() == EN::ENUMERATE_COLLECTION){ auto collNode = reinterpret_cast(setter1); @@ -4119,8 +4103,6 @@ GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pai auto coll = collNode->collection(); //what kind of indexes does it have on what attributes auto lcoll = coll->getCollection(); // TODO - check collection for suitable geo-indexes - // LOG_TOPIC(DEBUG, Logger::DEVEL) << " SETTER IS ENUMERATE_COLLECTION: " << coll->getName(); - // LOG_TOPIC(DEBUG, Logger::DEVEL) << " COLLECTION - number of indexes: " << lcoll->getIndexes().size(); for(auto indexShardPtr : lcoll->getIndexes()){ // get real index arangodb::Index& index = *indexShardPtr.get(); @@ -4128,26 +4110,9 @@ GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pai // check if current index is a geo-index if( index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO1_INDEX && index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO2_INDEX){ - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "Index type not of Geo: " << (int) index.type(); continue; } - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "Index is a GeoIndex" << coll->getName(); - - // ///////////////////////////////////////////////// - // //FIXME - REMOVE DEBUG CODE LATER - // auto vecs = std::vector>{index.fieldNames(), std::vector{accessPath1, accessPath2}}; - // for(auto vec : vecs ){ - // for(auto path : vec){ - // std::cout << "AccessPath VECTOR: "; - // for(auto word : path){ - // std::cout << word << " "; - // } - // std::cout << std::endl; - // } - // } - // ///////////////////////////////////////////////// - //check access paths of attributes in ast and those in index match if( index.fieldNames()[0] == accessPath1 && index.fieldNames()[1] == accessPath2 ){ info.collectionNode = collNode; @@ -4168,15 +4133,12 @@ bool checkDistanceArguments(GeoIndexInfo& info, ExecutionPlan* plan){ if(!info){ return false; } - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER applyGeoOptimization"; auto const& functionArguments = info.distanceNode->getMember(0); if(functionArguments->numMembers() < 4){ return false; } - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "distance function has 4 arguments"; - std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; @@ -4184,16 +4146,12 @@ bool checkDistanceArguments(GeoIndexInfo& info, ExecutionPlan* plan){ GeoIndexInfo result2 = geoDistanceFunctionArgCheck(argPair2, plan, info /*copy*/); //info now conatins access path to collection - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "result1: " << result1 << "result2" << result2; - // xor only one argument pair shall have a geoIndex if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ info.invalidate(); return false; } - // LOG_TOPIC(DEBUG, Logger::DEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; - GeoIndexInfo res; if(result1){ info = std::move(result1); @@ -4212,7 +4170,6 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec auto rv = GeoIndexInfo{}; switch(type){ case EN::SORT: { - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "found sort node"; auto node = static_cast(n); auto& elements = node->getElements(); @@ -4233,7 +4190,6 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec break; case EN::FILTER: { - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "found filter node"; auto node = static_cast(n); // filter nodes always have one input variable @@ -4264,9 +4220,6 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec } AstNode* node = expression->nodeForModification(); - - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "checking expression of calcaulation"; - //FIXME -- technical debt -- code duplication / not all cases covered switch(type){ case EN::SORT: { @@ -4281,7 +4234,6 @@ GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, Exec break; default: - //LOG_TOPIC(DEBUG, Logger::DEVEL) << "expression is not valid for geoindex"; rv.invalidate(); // not required but make sure the result is invalid } @@ -4373,8 +4325,6 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, G return false; } - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "GEO INDEX APPLY "; - if(!first){ first = std::move(second); second.invalidate(); @@ -4385,12 +4335,6 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, G return false; } - LOG_TOPIC(DEBUG, Logger::DEVEL) << "NO INNER LOOP"; - // //LOG_TOPIC(DEBUG, Logger::DEVEL) << " attributes: " << res.longitude[0] - // // << ", " << res.longitude - // // << " of collection:" << res.collectionNode->collection()->getName() - // // << " are geoindexed"; - std::unique_ptr condition; condition = buildGeoCondition(plan,first); @@ -4402,19 +4346,6 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, G plan->registerNode(inode); condition.release(); - //arangodb::velocypack::Builder builder; - //bool withFigures = false; - //plan->root()->toVelocyPack(builder, withFigures); - //std::cout << builder.toString(); - //builder.clear(); - - LOG_TOPIC(DEBUG, Logger::DEVEL) << "replacing node, type: " << first.collectionNode->getType() - << " with type: " << inode->getType(); - plan->replaceNode(first.collectionNode,inode); - - //plan->root()->toVelocyPack(builder, withFigures); - //std::cout << builder.toString(); - replaceGeoCondition(plan, first); replaceGeoCondition(plan, second); @@ -4422,7 +4353,6 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, G // the node can be unlinked auto unlinkNode = [&](GeoIndexInfo& info){ if(info && !info.expressionParent){ - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "info vaid and not expressionParent"; if (!arangodb::ServerState::instance()->isCoordinator() || info.executionNodeType == EN::FILTER) { plan->unlinkNode(info.executionNode); } else if (info.executionNodeType == EN::SORT){ @@ -4436,7 +4366,6 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, G unlinkNode(second); //signal that plan has been changed - LOG_TOPIC(DEBUG, Logger::DEVEL) << "plan modified"; return true; }; @@ -4460,39 +4389,22 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, while (current){ switch(current->getType()) { case EN::SORT:{ - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "hit sort"; sortInfo = identifyGeoOptimizationCandidate(EN::SORT, plan, current); - // if(sortInfo){ - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "sort valid"; - // } } break ; case EN::FILTER:{ - - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "hit filter"; filterInfo = identifyGeoOptimizationCandidate(EN::FILTER, plan, current); - // if(filterInfo){ - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "filter valid"; - //} } break; case EN::ENUMERATE_COLLECTION:{ EnumerateCollectionNode* collnode = static_cast(current); - // if(sortInfo){ - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "sortInfo valid collection:" << sortInfo.collectionNode; - // } - // if(filterInfo){ - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "filterInfo valid collection:" << filterInfo.collectionNode; - // } if( (sortInfo && sortInfo.collectionNode!= collnode) ||(filterInfo && filterInfo.collectionNode != collnode) ){ - // LOG_TOPIC(DEBUG, Logger::DEVEL) << "invalidating ..."; filterInfo.invalidate(); sortInfo.invalidate(); break; } - //newPlan = plan->clone(); if (applyGeoOptimization(true, plan, filterInfo, sortInfo)){ modified = true; filterInfo.invalidate(); @@ -4514,11 +4426,8 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, current = current->getFirstDependency(); //inspect next node } - } opt->addPlan(plan, rule, modified); - - LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; - //LOG_TOPIC(DEBUG, Logger::DEVEL) << ""; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; } From 5234ac36217a133e583d9a6a6f8b36af37c26bc8 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 19 Dec 2016 11:06:32 +0100 Subject: [PATCH 49/53] Squashed commit of the following: commit 3952bf20096ba0a3ee3882e28fb32581a2a22792 Author: Jan Christoph Uhde Date: Mon Dec 19 11:00:52 2016 +0100 remove debug code commit 004da0c614ee2e9f6e3442b41a363ec8dbe4c01c Author: Jan Christoph Uhde Date: Mon Dec 19 10:52:42 2016 +0100 only do not use geoindex in inner loop if we want to sort commit 32e1c7eac441d4d6f5fd8c6fa1f4988735df8473 Author: Jan Christoph Uhde Date: Mon Dec 19 09:16:09 2016 +0100 manually cloning fixes the issue commit 75823fe879c6fbcec103871467a744a6fdd99839 Author: Jan Christoph Uhde Date: Fri Dec 16 15:19:10 2016 +0100 another try to fix the plan commit 2a404cfabe4b3fa2e608bed71bb0366c2bcc5e35 Author: Jan Christoph Uhde Date: Wed Dec 14 12:21:21 2016 +0100 add dump of ExectionPlan. Index Node seems to be created! commit 0f8b496c570de6bfe814f06a97f23de9fc8696d9 Author: Jan Christoph Uhde Date: Wed Dec 14 10:46:24 2016 +0100 add debugging info for geoindex commit 3d17672feb505da4f0fd87ec48c70a3fa7def84d Author: Jan Christoph Uhde Date: Tue Dec 13 17:03:01 2016 +0100 better tests commit 615b85e5f5b39dbfc95c00e91d50563aaa933647 Author: Jan Christoph Uhde Date: Tue Dec 13 16:09:00 2016 +0100 add tests - sort && filter and add check for IndexNodes commit e5fcdb7386cfcf34a7df8c81bb2883e5b0244162 Author: Jan Christoph Uhde Date: Tue Dec 13 15:53:44 2016 +0100 partial rewrite - now we start at endnodes and work upward towards singleton node commit 7d93fe0bddd4eff3f4fa233e029d83888a4d072b Author: Jan Christoph Uhde Date: Tue Dec 13 11:48:40 2016 +0100 add link to geo-index doc to sourcefile to clarify goals commit aac38c8a00c27ca5625f920db9c9043e69665850 Author: Jan Christoph Uhde Date: Tue Dec 13 11:42:49 2016 +0100 move collection acccesspath check to identify section so we can compare nodes commit b8cc674219c7bc1670d62f11422768d96147a704 Author: Jan Christoph Uhde Date: Tue Dec 13 10:59:02 2016 +0100 prepare to fix filter-sort commit 783a9aff8999410c720250c0f9c6a1e3c578a6cc Author: Jan Christoph Uhde Date: Mon Dec 12 16:24:52 2016 +0100 check if nodes are really sorted commit 1d5e28d969efe7521a4b3a1573c8da1235271b2f Author: Jan Christoph Uhde Date: Mon Dec 12 15:36:43 2016 +0100 fix geo-index for cluster case commit 634eeabc0bb500951c4aaa9e9b3b87405dfe32e1 Author: Jan Christoph Uhde Date: Mon Dec 12 13:46:20 2016 +0100 add cluster to .gitignore commit ae33a790fbc26e9fe00ec5efe5fe24d4657d4548 Merge: 1acfd65 27099a1 Author: Jan Christoph Uhde Date: Mon Dec 12 13:31:54 2016 +0100 Merge branch 'devel' into obi-geo-index * devel: Clarify the default value of req.body fix VS warning moveShard jobs running cppcheck cppcheck commit 1acfd654674dc5d5e141e3491aadecc1b97277c1 Author: Jan Christoph Uhde Date: Mon Dec 12 13:30:38 2016 +0100 work on geo-index in cluster commit 32d0bdc8469b404ef53ad897f8f3abb5ba8ccc4d Merge: b660672 ad4ba24 Author: Jan Christoph Uhde Date: Mon Dec 12 09:15:36 2016 +0100 Merge branch 'devel' of github.com:arangodb/arangodb into obi-geo-index * 'devel' of github.com:arangodb/arangodb: (132 commits) try to fix travis build Generate unique Swagger operationIds Add support for Swagger tags Add swagger route to Foxx API Require at least one arg in route def ported velocypack compatibility fix from arangodb/velocypack use bulk allocator for index elements fix memleak fix test in cluster remove unused code fixed resilience Update zwagger Readd satellite node Fix link Document OAuth1 Nicer OAuth2 docs properly start the tarball fix VS compile errors clones method in Jobs more useful we don't need the dpkg-shlibdeps workaround anymore. ... commit b6606727eef0dd816f8155bac169a8f5698367e5 Author: jsteemann Date: Thu Dec 8 11:13:10 2016 +0100 fixes commit 541675504964002a41d5ac11cf6dc68f93eab4b5 Author: Jan Christoph Uhde Date: Wed Dec 7 15:06:44 2016 +0100 avoid use of geo-index-rule in cases where it could yield an invalid result commit 3e24624c6e4576544eb5f9814f4cb508fe635059 Author: Jan Christoph Uhde Date: Tue Dec 6 15:56:22 2016 +0100 back to performance commit a941808ea8f902be2a5ea9e9b9637681ee1bbf79 Author: Jan Christoph Uhde Date: Tue Dec 6 10:29:38 2016 +0100 remove debug code for performance testing commit 2bae135b84e4843d4dc0e576ab427d8d3eb41121 Author: Jan Christoph Uhde Date: Tue Dec 6 10:12:12 2016 +0100 add test case commit 71be8c215a091bd6abcabd3ba8fafba751ce3b7c Author: Jan Christoph Uhde Date: Tue Dec 6 10:00:11 2016 +0100 node replacement is now working commit d1a3e4482d39d0b69446ff4c64e2b72aa55caa73 Author: Jan Christoph Uhde Date: Mon Dec 5 20:18:31 2016 +0100 condition replace should be almost working commit 275e36b60349e60112ffa188dd0fc8f9d076b179 Author: Jan Christoph Uhde Date: Mon Dec 5 19:49:10 2016 +0100 add pointer that needs to be replaced with true in sort or filter condition commit 6b93b9d2eb4ac874c8583620fdf699cb93d31643 Author: Jan Christoph Uhde Date: Mon Dec 5 19:38:01 2016 +0100 bring functions in correct order commit 5aabbb0ac003edd4f2788c6d15db68b4e7e8ede6 Author: Jan Christoph Uhde Date: Mon Dec 5 16:21:21 2016 +0100 fix const-ness so the condition can be modiefied commit c84223d59890c05afc57e8988f8e89b4baf60298 Author: Jan Christoph Uhde Date: Mon Dec 5 15:31:50 2016 +0100 Add information if AstNode has been found in subexpression this information can be used in rewriting the condition and deciding if a sort node can be deleted or not commit b0b660eb851d41ac25f41c98843acfc49bc5cc59 Author: Jan Christoph Uhde Date: Mon Dec 5 15:19:06 2016 +0100 add iterative preorder walk for and conditions commit 5dcf61c9badf604f01e0c251638546e2773aafb0 Author: Jan Christoph Uhde Date: Mon Dec 5 14:00:14 2016 +0100 now work to a certain degree with binaray/naray-and in FILTER/SORT condition commit 9b4e01eb3bf32f5da88934702efb92e5eee21a38 Author: Jan Christoph Uhde Date: Mon Dec 5 13:17:01 2016 +0100 move geoIndexRule to a positon before inexes have been touched by optimizer commit 9d04b37e58661c15908505d92e4a4f222ca50a63 Author: Jan Christoph Uhde Date: Mon Dec 5 12:46:23 2016 +0100 within queries are now working commit d858b9aa71b93b856bd24664ce53632115ff56b4 Author: Jan Christoph Uhde Date: Fri Dec 2 16:15:23 2016 +0100 further implement within commit 72d4790c68087bdafc6cff653647e56b966e41cf Author: Jan Christoph Uhde Date: Fri Dec 2 12:22:07 2016 +0100 remove boost::optional and make use of nodetype commit e0220be12d11a23328410aa8cbad6bc6c867a860 Author: Jan Christoph Uhde Date: Fri Dec 2 11:53:31 2016 +0100 stronger split between identification of candidates and application of geo index rule commit f27a22db063de6a129bc185201f4939b12876804 Author: Jan Christoph Uhde Date: Fri Dec 2 11:20:09 2016 +0100 fix debug code commit fab9af483ec209f00b96355fc71d09749268d3c5 Author: Jan Christoph Uhde Date: Fri Dec 2 11:00:39 2016 +0100 add extra log topic for development commit 56b6be851cc4dfa2a41a62d17259daebc8d052a4 Author: Jan Christoph Uhde Date: Fri Dec 2 10:44:42 2016 +0100 add functions providing capability to check ast for parts of geoindex rules commit 08ef943c83c7cebb1d9c64da87f530aed5bd8ded Author: Jan Christoph Uhde Date: Thu Dec 1 23:01:50 2016 +0100 fix tests commit 56614ac8c811b79e55cde95c4b7b34e16a4ce239 Author: Jan Christoph Uhde Date: Thu Dec 1 16:30:09 2016 +0100 switch unittests to chai and add failing test for FILTER condition commit 8bb719c6151bfa3c52172f97292b1f755c7e925a Author: Jan Christoph Uhde Date: Thu Dec 1 15:59:10 2016 +0100 add first tests for geoindex commit e619ef3e4e5907f781d0a59133a4bb4222811f2e Author: Jan Christoph Uhde Date: Thu Dec 1 12:21:51 2016 +0100 now inspect sort and filter nodes commit 5dbf5e14e343b7fd1384e8d0f2373f55135c53f9 Author: Jan Christoph Uhde Date: Thu Dec 1 10:03:24 2016 +0100 refactor geoOptimization into smaller fucntions this prepares the creation of the within rules commit 2110736d3644260e94a3271b47e570548c8c6089 Author: Jan Christoph Uhde Date: Wed Nov 30 16:50:02 2016 +0100 fix logical error in nextBabies commit 972af3af4cbc98379c1e1bdf69d77d3177c77a4a Author: Jan Christoph Uhde Date: Wed Nov 30 15:44:46 2016 +0100 add within to geoindex iterator commit 80c89d5f970a41b16ad3a5ddd1972b2134858bbe Author: Jan Christoph Uhde Date: Wed Nov 30 15:25:11 2016 +0100 geo condition is now build with a extra parameter for within commit 8bafcdfe92d2cc524fdda635ddea4cb57ef20145 Author: Jan Christoph Uhde Date: Wed Nov 30 14:10:16 2016 +0100 GeoIndexIterator now takes parameters via ConditionNode commit 86c21eb733346b6825947d40b44cc732442ba24c Author: Jan Christoph Uhde Date: Tue Nov 29 14:13:35 2016 +0100 make use of geo index cursor api commit 5b3be69e101428c0cfa15da0f57cf1a153f2b4e7 Author: Jan Christoph Uhde Date: Tue Nov 29 11:48:32 2016 +0100 WIP first working version of geoindex via aql --- .gitignore | 1 + arangod/Aql/Optimizer.cpp | 4 +- arangod/Aql/Optimizer.h | 5 +- arangod/Aql/OptimizerRules.cpp | 594 ++++++++++++++---- arangod/Aql/OptimizerRules.h | 2 +- arangod/Aql/SortNode.cpp | 2 +- arangod/Aql/SortNode.h | 5 +- arangod/Indexes/GeoIndex.cpp | 132 +++- arangod/Indexes/GeoIndex.h | 48 ++ arangod/Indexes/IndexIterator.h | 24 + js/server/tests/aql/aql-optimizer-geoindex.js | 300 +++++++++ js/server/tests/aql/aql-optimizer-indexes.js | 2 +- lib/Logger/LogTopic.cpp | 1 + lib/Logger/Logger.h | 9 +- 14 files changed, 978 insertions(+), 151 deletions(-) create mode 100644 js/server/tests/aql/aql-optimizer-geoindex.js diff --git a/.gitignore b/.gitignore index bc55957536..0f684a89e9 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ Debug32/ Release64/ Release32/ WindowsLibraries/ +cluster/ core TAGS diff --git a/arangod/Aql/Optimizer.cpp b/arangod/Aql/Optimizer.cpp index 428f98a498..995382588b 100644 --- a/arangod/Aql/Optimizer.cpp +++ b/arangod/Aql/Optimizer.cpp @@ -486,8 +486,8 @@ void Optimizer::setupRules() { patchUpdateStatementsRule_pass9, DoesNotCreateAdditionalPlans, true); // patch update statements - registerRule("geo-index-optimizer", optimizeGeoIndexRule, - geoDistanceRule, DoesNotCreateAdditionalPlans, true); + registerRule("geo-index-optimizer", geoIndexRule, + applyGeoIndexRule, DoesNotCreateAdditionalPlans, true); if (arangodb::ServerState::instance()->isCoordinator()) { // distribute operations in cluster diff --git a/arangod/Aql/Optimizer.h b/arangod/Aql/Optimizer.h index 9a98ce0f1b..44bb2ed5b0 100644 --- a/arangod/Aql/Optimizer.h +++ b/arangod/Aql/Optimizer.h @@ -145,6 +145,8 @@ class Optimizer { // remove redundant OR conditions removeRedundantOrRule_pass6 = 820, + applyGeoIndexRule = 825, + useIndexesRule_pass6 = 830, // try to remove filters covered by index ranges @@ -197,9 +199,8 @@ class Optimizer { removeSatelliteJoinsRule_pass10 = 1045, // recognize that a RemoveNode can be moved to the shards - undistributeRemoveAfterEnumCollRule_pass10 = 1050, + undistributeRemoveAfterEnumCollRule_pass10 = 1050 - geoDistanceRule = 1060 }; public: diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 56ac6be5c5..22f0704363 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -51,6 +51,8 @@ #include #include +#include + using namespace arangodb; using namespace arangodb::aql; using EN = arangodb::aql::ExecutionNode; @@ -2727,6 +2729,7 @@ void arangodb::aql::distributeFilternCalcToClusterRule( void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule) { + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER DISTRIBUTE SORT RULE"; SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::GATHER, true); @@ -2782,6 +2785,7 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, stopSearching = true; break; case EN::SORT: + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "APPLY DISTRIBUTE SORT"; auto thisSortNode = static_cast(inspectNode); // remember our cursor... @@ -2789,7 +2793,9 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt, // then unlink the filter/calculator from the plan plan->unlinkNode(inspectNode); // and re-insert into plan in front of the remoteNode - plan->insertDependency(rn, inspectNode); + if(thisSortNode->_reinsertInCluster){ + plan->insertDependency(rn, inspectNode); + } gatherNode->setElements(thisSortNode->getElements()); modified = true; // ready to rumble! @@ -3919,34 +3925,168 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, } +/////////////////////////////////////////////////////////////////////////////// +// GEO RULE /////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// +// Description of what this Rule tries to achieve: +// https://docs.google.com/document/d/1G57UP08ZFywUXKi5cLvEIKpZP-AUKGwG9oAnFOX8LLo +// - -struct GeoIndexInfo { - EnumerateCollectionNode* _collectionNode; - Collection const* _collection; - std::shared_ptr _index; - std::vector _longitude; - std::vector _latitude; +struct GeoIndexInfo{ + operator bool() const { return distanceNode && valid; } + void invalidate() { valid = false; } + GeoIndexInfo() + : collectionNode(nullptr) + , executionNode(nullptr) + , indexNode(nullptr) + , setter(nullptr) + , expressionParent(nullptr) + , expressionNode(nullptr) + , distanceNode(nullptr) + , index(nullptr) + , range(nullptr) + , executionNodeType(EN::ILLEGAL) + , within(false) + , lessgreaterequal(false) + , valid(true) + , constantPair{nullptr,nullptr} + {} + EnumerateCollectionNode* collectionNode; // node that will be replaced by (geo) IndexNode + ExecutionNode* executionNode; // start node that is a sort or filter + IndexNode* indexNode; // AstNode that is the parent of the Node + CalculationNode* setter; // node that has contains the condition for filter or sort + AstNode* expressionParent; // AstNode that is the parent of the Node + AstNode* expressionNode; // AstNode that contains the sort/filter condition + AstNode* distanceNode; // AstNode that contains the distance parameters + std::shared_ptr index; //pointer to geoindex + AstNode const* range; // range for within + ExecutionNode::NodeType executionNodeType; // type of execution node sort or filter + bool within; // is this a within lookup + bool lessgreaterequal; // is this a check for le/ge (true) or lt/gt (false) + bool valid; // contains this node a valid condition + std::vector longitude; // access path to longitude + std::vector latitude; // access path to latitude + std::pair constantPair; }; +////////////////////////////////////////////////////////////////////// +//candidate checking + +AstNode* isValueOrRefNode(AstNode* node){ + //TODO - implement me + return node; +} + +GeoIndexInfo isDistanceFunction(AstNode* distanceNode, AstNode* expressionParent){ + // the expression must exist and it must be a function call + auto rv = GeoIndexInfo{}; + if(distanceNode->type != NODE_TYPE_FCALL) { + return rv; + } + + //get the ast node of the expression + auto func = static_cast(distanceNode->getData()); + + // we're looking for "DISTANCE()", which is a function call + // with an empty parameters array + if ( func->externalName != "DISTANCE" || distanceNode->numMembers() != 1 ) { + return rv; + } + rv.distanceNode = distanceNode; + rv.expressionNode = distanceNode; + rv.expressionParent = expressionParent; + return rv; +} + +GeoIndexInfo isGeoFilterExpression(AstNode* node, AstNode* expressionParent){ + // binary compare must be on top + bool dist_first = true; + bool lessEqual = true; + auto rv = GeoIndexInfo{}; + if( node->type != NODE_TYPE_OPERATOR_BINARY_GE + && node->type != NODE_TYPE_OPERATOR_BINARY_GT + && node->type != NODE_TYPE_OPERATOR_BINARY_LE + && node->type != NODE_TYPE_OPERATOR_BINARY_LT) { + + return rv; + } else { + if (node->type == NODE_TYPE_OPERATOR_BINARY_GE || node->type == NODE_TYPE_OPERATOR_BINARY_GT){ + dist_first = false; + } + } + if (node->type == NODE_TYPE_OPERATOR_BINARY_GT || node->type == NODE_TYPE_OPERATOR_BINARY_LT){ + lessEqual = false; + } + + if(node->numMembers() != 2){ + return rv; + } + + AstNode* first = node->getMember(0); + AstNode* second = node->getMember(1); + + auto eval_stuff = [](bool dist_first, bool lessEqual, GeoIndexInfo&& dist_fun, AstNode* value_node){ + if (dist_first && dist_fun && value_node){ + dist_fun.within = true; + dist_fun.range = value_node; + dist_fun.lessgreaterequal = lessEqual; + } else { + dist_fun.invalidate(); + } + return dist_fun; + }; + rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(first, expressionParent), isValueOrRefNode(second)); + if (!rv) { + rv = eval_stuff(dist_first, lessEqual, isDistanceFunction(second, expressionParent), isValueOrRefNode(first)); + } -// TODO - remove debug code -#ifdef OBIDEBUG - #define OBILEVEL ERR -#else - #define OBILEVEL TRACE -#endif -static boost::optional -geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionNode* ex, ExecutionPlan* plan){ + if(rv){ + //this must be set after checking if the node contains a distance node. + rv.expressionNode = node; + } + + return rv; +} + +GeoIndexInfo iterativePreorderWithCondition(EN::NodeType type, AstNode* root, GeoIndexInfo(*condition)(AstNode*, AstNode*)){ + // returns on first hit + if (!root){ + return GeoIndexInfo{}; + } + std::vector> nodestack; + nodestack.push_back({root,nullptr}); + + while(nodestack.size()){ + auto current = nodestack.back(); + nodestack.pop_back(); + GeoIndexInfo rv = condition(current.first,current.second); + if (rv) { + return rv; + } + + if (type == EN::FILTER){ + if (current.first->type == NODE_TYPE_OPERATOR_BINARY_AND || current.first->type == NODE_TYPE_OPERATOR_NARY_AND ){ + for (std::size_t i = 0; i < current.first->numMembers(); ++i){ + nodestack.push_back({current.first->getMember(i),current.first}); + } + } + } else if (type == EN::SORT) { + // must be the only sort condition + } + } + return GeoIndexInfo{}; +} + +GeoIndexInfo geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionPlan* plan, GeoIndexInfo info){ using SV = std::vector; - LOG(OBILEVEL) << " enter argument check"; // first and second should be based on the same document - need to provide the document // in order to see which collection is bound to it and if that collections supports geo-index if( !pair.first->isAttributeAccessForVariable() || !pair.second->isAttributeAccessForVariable()){ - LOG(OBILEVEL) << " not both args are of type attribute access"; - return boost::none; + info.invalidate(); + return info; } // expect access of the for doc.attribute @@ -3956,14 +4096,13 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionN SV accessPath1{pair.first->getString()}; SV accessPath2{pair.second->getString()}; - LOG(OBILEVEL) << " got setter"; if(setter1 == setter2){ if(setter1->getType() == EN::ENUMERATE_COLLECTION){ auto collNode = reinterpret_cast(setter1); + auto coll = collNode->collection(); //what kind of indexes does it have on what attributes auto lcoll = coll->getCollection(); // TODO - check collection for suitable geo-indexes - LOG(OBILEVEL) << " SETTER IS ENUMERATE_COLLECTION: " << coll->getName(); for(auto indexShardPtr : lcoll->getIndexes()){ // get real index arangodb::Index& index = *indexShardPtr.get(); @@ -3974,140 +4113,321 @@ geoDistanceFunctionArgCheck(std::pair const& pair, ExecutionN continue; } -#ifdef OBIDEBUG - //FIXME - REMOVE DEBUG CODE LATER - auto vecs = std::vector>{index.fieldNames(), std::vector{accessPath1, accessPath2}}; - for(auto vec : vecs ){ - for(auto path : vec){ - std::cout << "AccessPath VECTOR: "; - for(auto word : path){ - std::cout << word << " "; - } - std::cout << std::endl; - } - } -#endif - - //check access paths of attribues in ast and those in index match + //check access paths of attributes in ast and those in index match if( index.fieldNames()[0] == accessPath1 && index.fieldNames()[1] == accessPath2 ){ - return GeoIndexInfo{collNode, coll, indexShardPtr, std::move(accessPath1), std::move(accessPath2) }; + info.collectionNode = collNode; + info.index = indexShardPtr; + info.longitude = std::move(accessPath1); + info.latitude = std::move(accessPath2); + return info; } } } } - return boost::none; + info.invalidate(); + return info; } -void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt, - ExecutionPlan* plan, - Optimizer::Rule const* rule) { +bool checkDistanceArguments(GeoIndexInfo& info, ExecutionPlan* plan){ + if(!info){ + return false; + } - LOG(OBILEVEL) << "ENTER GEO RULE"; + auto const& functionArguments = info.distanceNode->getMember(0); + if(functionArguments->numMembers() < 4){ + return false; + } + + std::pair argPair1 = { functionArguments->getMember(0), functionArguments->getMember(1) }; + std::pair argPair2 = { functionArguments->getMember(2), functionArguments->getMember(3) }; + + GeoIndexInfo result1 = geoDistanceFunctionArgCheck(argPair1, plan, info /*copy*/); + GeoIndexInfo result2 = geoDistanceFunctionArgCheck(argPair2, plan, info /*copy*/); + //info now conatins access path to collection + + // xor only one argument pair shall have a geoIndex + if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ + info.invalidate(); + return false; + } + + GeoIndexInfo res; + if(result1){ + info = std::move(result1); + info.constantPair = std::move(argPair2); + } else { + info = std::move(result2); + info.constantPair = std::move(argPair1); + } + + return true; +} + +//checks a single sort or filter node +GeoIndexInfo identifyGeoOptimizationCandidate(ExecutionNode::NodeType type, ExecutionPlan* plan, ExecutionNode* n){ + ExecutionNode* setter = nullptr; + auto rv = GeoIndexInfo{}; + switch(type){ + case EN::SORT: { + auto node = static_cast(n); + auto& elements = node->getElements(); + + // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion + if ( !(elements.size() == 1 && elements[0].second)) { + //test on second makes sure the SORT is ascending + return rv; + } + + //variable of sort expression + auto variable = elements[0].first; + TRI_ASSERT(variable != nullptr); + + //// find the expression that is bound to the variable + // get the expression node that holds the calculation + setter = plan->getVarSetBy(variable->id); + } + break; + + case EN::FILTER: { + auto node = static_cast(n); + + // filter nodes always have one input variable + auto varsUsedHere = node->getVariablesUsedHere(); + TRI_ASSERT(varsUsedHere.size() == 1); + + // now check who introduced our variable + auto variable = varsUsedHere[0]; + setter = plan->getVarSetBy(variable->id); + } + break; + + default: + return rv; + } + + // common part - extract astNode from setter witch is a calculation node + if (setter == nullptr || setter->getType() != EN::CALCULATION) { + return rv; + } + + auto expression = static_cast(setter)->expression(); + + // the expression must exist and it must have an astNode + if (expression == nullptr || expression->node() == nullptr){ + // not the right type of node + return rv; + } + AstNode* node = expression->nodeForModification(); + + //FIXME -- technical debt -- code duplication / not all cases covered + switch(type){ + case EN::SORT: { + // check comma separated parts of condition cond0, cond1, cond2 + rv = isDistanceFunction(node,nullptr); + } + break; + + case EN::FILTER: { + rv = iterativePreorderWithCondition(type, node, &isGeoFilterExpression); + } + break; + + default: + rv.invalidate(); // not required but make sure the result is invalid + } + + rv.executionNode = n; + rv.executionNodeType = type; + rv.setter = static_cast(setter); + + checkDistanceArguments(rv, plan); + + return rv; +}; + +////////////////////////////////////////////////////////////////////// +//modify plan + +// builds a condition that can be used with the index interface and +// contains all parameters required by the GeoIndex +std::unique_ptr buildGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info, + bool lessEqual = false, AstNode const* withRange = nullptr){ + + AstNode* lat = info.constantPair.first; + AstNode* lon = info.constantPair.second; + auto ast = plan->getAst(); + auto varAstNode = ast->createNodeReference(info.collectionNode->outVariable()); + + auto nAryAnd = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_AND); + nAryAnd->reserve(withRange ? 4 : 2); + + auto latKey = ast->createNodeAttributeAccess(varAstNode, "latitude",8); + auto latEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, latKey, lat); + nAryAnd->addMember(latEq); + + auto lonKey = ast->createNodeAttributeAccess(varAstNode, "longitude",9); + auto lonEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, lonKey, lon); + nAryAnd->addMember(lonEq); + + if(info.within){ + auto withKey = ast->createNodeAttributeAccess(varAstNode, "within",6); + auto withEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, withKey, info.range); + nAryAnd->addMember(withEq); + + auto lessKey = ast->createNodeAttributeAccess(varAstNode, "lesseq",6); + auto lessValue = ast->createNodeValueBool(info.lessgreaterequal); + auto lessEq = ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_EQ, lessKey, lessValue); + nAryAnd->addMember(lessEq); + } + + auto unAryOr = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_OR, nAryAnd); + + auto condition = std::make_unique(ast); + condition->andCombine(unAryOr); + condition->normalize(plan); + return condition; +} + +void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ + if( info.expressionParent && info.executionNodeType == EN::FILTER) { + + auto ast = plan->getAst(); + CalculationNode* newNode = nullptr; + Expression* expr = new Expression(ast, static_cast(info.setter)->expression()->nodeForModification()->clone(ast)); + + try { + newNode = new CalculationNode(plan, plan->nextId(), expr, static_cast(info.setter)->outVariable()); + } catch (...) { + delete expr; + throw; + } + + plan->registerNode(newNode); + plan->replaceNode(info.setter, newNode); + + auto replaceInfo = iterativePreorderWithCondition(EN::FILTER, newNode->expression()->nodeForModification(), &isGeoFilterExpression); + + auto replacement = ast->createNodeValueBool(true); + for(std::size_t i = 0; i < replaceInfo.expressionParent->numMembers(); ++i){ + if(replaceInfo.expressionParent->getMember(i) == replaceInfo.expressionNode){ + replaceInfo.expressionParent->removeMemberUnchecked(i); + replaceInfo.expressionParent->addMember(replacement); + } + } + + } +} + +// applys the optimization for a candidate +bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, GeoIndexInfo& second){ + if(!first && !second){ + return false; + } + + if(!first){ + first = std::move(second); + second.invalidate(); + } + + // We are not allowed to be a inner loop + if(first.collectionNode->isInInnerLoop() && first.executionNodeType == EN::SORT){ + return false; + } + + std::unique_ptr condition; + condition = buildGeoCondition(plan,first); + + auto inode = new IndexNode( + plan, plan->nextId(), first.collectionNode->vocbase(), + first.collectionNode->collection(), first.collectionNode->outVariable(), + std::vector{Transaction::IndexHandle{first.index}}, + condition.get(), false); + plan->registerNode(inode); + condition.release(); + + replaceGeoCondition(plan, first); + replaceGeoCondition(plan, second); + + // if executionNode is sort OR a filter without further sub conditions + // the node can be unlinked + auto unlinkNode = [&](GeoIndexInfo& info){ + if(info && !info.expressionParent){ + if (!arangodb::ServerState::instance()->isCoordinator() || info.executionNodeType == EN::FILTER) { + plan->unlinkNode(info.executionNode); + } else if (info.executionNodeType == EN::SORT){ + //make sure sort is not reinserted in cluster + static_cast(info.executionNode)->_reinsertInCluster = false; + } + } + }; + + unlinkNode(first); + unlinkNode(second); + + //signal that plan has been changed + return true; +}; + +void arangodb::aql::geoIndexRule(Optimizer* opt, + ExecutionPlan* plan, + Optimizer::Rule const* rule) { + + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER GEO RULE"; SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; bool modified = false; + //inspect each return node and work upwards to SingletonNode + plan->findEndNodes(nodes, true); + //ExecutionPlan* newPlan = nullptr; + for (auto& node : nodes) { + GeoIndexInfo sortInfo{}; + GeoIndexInfo filterInfo{}; + auto current = node; - plan->findNodesOfType(nodes, EN::SORT, true); + while (current){ + switch(current->getType()) { + case EN::SORT:{ + sortInfo = identifyGeoOptimizationCandidate(EN::SORT, plan, current); + } + break ; + case EN::FILTER:{ + filterInfo = identifyGeoOptimizationCandidate(EN::FILTER, plan, current); + } + break; + case EN::ENUMERATE_COLLECTION:{ + EnumerateCollectionNode* collnode = static_cast(current); + if( (sortInfo && sortInfo.collectionNode!= collnode) + ||(filterInfo && filterInfo.collectionNode != collnode) + ){ + filterInfo.invalidate(); + sortInfo.invalidate(); + break; + } + if (applyGeoOptimization(true, plan, filterInfo, sortInfo)){ + modified = true; + filterInfo.invalidate(); + sortInfo.invalidate(); + } + } + break; - for (auto const& n : nodes) { - auto node = static_cast(n); - auto const& elements = node->getElements(); + case EN::INDEX: + case EN::COLLECT:{ + filterInfo.invalidate(); + sortInfo.invalidate(); + break; + } - // we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion - if ( !(elements.size() == 1 && elements[0].second)) { - continue; + default:{} //skip - do nothing + break; + } + + current = current->getFirstDependency(); //inspect next node } - - //variable of sort expression - auto const variable = elements[0].first; - TRI_ASSERT(variable != nullptr); - - //// find the expression that is bound to the variable - // get the expression node that holds the cacluation - auto setter = plan->getVarSetBy(variable->id); - if (setter == nullptr || setter->getType() != EN::CALCULATION) { - continue; - } - - // downcast to calculation node and get expression - auto cn = static_cast(setter); - auto const expression = cn->expression(); - - // the expression must exist and it must be a function call - if (expression == nullptr || expression->node() == nullptr || - expression->node()->type != NODE_TYPE_FCALL) { - // not the right type of node - continue; - } - - //get the ast node of the expression - AstNode const* funcNode = expression->node(); - auto func = static_cast(funcNode->getData()); - - // we're looking for "DISTANCE()", which is a function call - // with an empty parameters array - if ( func->externalName != "DISTANCE" || funcNode->numMembers() != 1 ) { - continue; - } - - LOG(OBILEVEL) << " FOUND DISTANCE RULE"; - - auto const& distanceArgs = funcNode->getMember(0); - if(distanceArgs->numMembers() != 4){ - continue; - } - - std::pair argPair1 = { distanceArgs->getMember(0), distanceArgs->getMember(1) }; - std::pair argPair2 = { distanceArgs->getMember(2), distanceArgs->getMember(3) }; - - auto result1 = geoDistanceFunctionArgCheck(argPair1, node, plan); - auto result2 = geoDistanceFunctionArgCheck(argPair2, node, plan); - - // xor only one argument pair shall have a geoIndex - if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){ - continue; - } - - LOG(OBILEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS"; - - if(!result1){ - result1 = std::move(result2); - } - - LOG(OBILEVEL) << " attributes: " << result1.get()._longitude[0] - << ", " << result1.get()._longitude - << " of collection:" << result1.get()._collection->getName() - << " are geoindexed"; - - break; //remove this to make use of the index - - auto cnode = result1.get()._collectionNode; - auto& idxPtr = result1.get()._index; - - //create new index node and register it - auto condition = std::make_unique(plan->getAst()); //What is this condition exactly about - condition->normalize(plan); - auto inode = new IndexNode( - plan, plan->nextId(), cnode->vocbase(), - cnode->collection(), cnode->outVariable(), - std::vector{Transaction::IndexHandle{idxPtr}}, - condition.get(), !elements[0].second); - plan->registerNode(inode); - condition.release(); - - plan->unlinkNode(n); - plan->replaceNode(cnode,inode); - - //signal that plan has been changed - modified=true; - } opt->addPlan(plan, rule, modified); - - LOG(OBILEVEL) << "EXIT GEO RULE"; - LOG(OBILEVEL) << ""; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT GEO RULE - modified: " << modified; } - diff --git a/arangod/Aql/OptimizerRules.h b/arangod/Aql/OptimizerRules.h index cedcab94fa..2f848fef30 100644 --- a/arangod/Aql/OptimizerRules.h +++ b/arangod/Aql/OptimizerRules.h @@ -201,7 +201,7 @@ void prepareTraversalsRule(Optimizer* opt, ExecutionPlan* plan, /// @brief moves simple subqueries one level higher void inlineSubqueriesRule(Optimizer*, ExecutionPlan*, Optimizer::Rule const*); -void optimizeGeoIndexRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule); +void geoIndexRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule); } // namespace aql } // namespace arangodb diff --git a/arangod/Aql/SortNode.cpp b/arangod/Aql/SortNode.cpp index cc260878ee..8fa0facb1d 100644 --- a/arangod/Aql/SortNode.cpp +++ b/arangod/Aql/SortNode.cpp @@ -32,7 +32,7 @@ using namespace arangodb::aql; SortNode::SortNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base, SortElementVector const& elements, bool stable) - : ExecutionNode(plan, base), _elements(elements), _stable(stable) {} + : ExecutionNode(plan, base), _reinsertInCluster(true), _elements(elements), _stable(stable){} /// @brief toVelocyPack, for SortNode void SortNode::toVelocyPackHelper(VPackBuilder& nodes, bool verbose) const { diff --git a/arangod/Aql/SortNode.h b/arangod/Aql/SortNode.h index 6f30c99316..6338ec5acf 100644 --- a/arangod/Aql/SortNode.h +++ b/arangod/Aql/SortNode.h @@ -53,7 +53,7 @@ class SortNode : public ExecutionNode { public: SortNode(ExecutionPlan* plan, size_t id, SortElementVector const& elements, bool stable) - : ExecutionNode(plan, id), _elements(elements), _stable(stable) {} + : ExecutionNode(plan, id), _reinsertInCluster(true), _elements(elements), _stable(stable) {} SortNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base, SortElementVector const& elements, bool stable); @@ -120,6 +120,9 @@ class SortNode : public ExecutionNode { /// values (e.g. when a FILTER condition exists that guarantees this) void removeConditions(size_t count); + // reinsert node when building gather node - this is used e.g for the geo-index + bool _reinsertInCluster; + private: /// @brief pairs, consisting of variable and sort direction /// (true = ascending | false = descending) diff --git a/arangod/Indexes/GeoIndex.cpp b/arangod/Indexes/GeoIndex.cpp index 64e526824a..0d605f9a02 100644 --- a/arangod/Indexes/GeoIndex.cpp +++ b/arangod/Indexes/GeoIndex.cpp @@ -21,13 +21,141 @@ /// @author Dr. Frank Celler //////////////////////////////////////////////////////////////////////////////// -#include "GeoIndex.h" -#include "Logger/Logger.h" +#include "Aql/Ast.h" +#include "Aql/AstNode.h" +#include "Aql/SortCondition.h" #include "Basics/StringRef.h" #include "Basics/VelocyPackHelper.h" +#include "GeoIndex.h" +#include "Indexes/GeoIndex.h" +#include "Logger/Logger.h" #include "VocBase/transaction.h" using namespace arangodb; +GeoIndexIterator::GeoIndexIterator(LogicalCollection* collection, + arangodb::Transaction* trx, + ManagedDocumentResult* mmdr, + GeoIndex const* index, + arangodb::aql::AstNode const* cond, + arangodb::aql::Variable const* var) + : IndexIterator(collection, trx, mmdr, index), + _index(index), + _cursor(nullptr), + _coor(), + _condition(cond), + _variable(var), + _lat(0), + _lon(0), + _near(true), + _withinRange(0), + _withinLessEq(false) + // lookup will hold the inforamtion if this is a cursor for + // near/within and the reference point + //_lookups(trx, node, reference, index->fields()), + { + evaluateCondition(); + } + +void GeoIndexIterator::evaluateCondition() { + if (_condition) { + auto numMembers = _condition->numMembers(); + + if(numMembers >= 2){ + _lat = _condition->getMember(0)->getMember(1)->getDoubleValue(); + _lon = _condition->getMember(1)->getMember(1)->getDoubleValue(); + } + + if (numMembers == 2){ //near + _near = true; + } else { //within + _near = false; + _withinRange = _condition->getMember(2)->getMember(1)->getDoubleValue(); + _withinLessEq = _condition->getMember(3)->getMember(1)->getDoubleValue(); + } + + } else { + LOG(ERR) << "No Condition passed to GeoIndexIterator constructor"; + } + + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT evaluate Condition"; +} + +IndexLookupResult GeoIndexIterator::next() { + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER next"; + if (!_cursor){ + createCursor(_lat,_lon); + } + + auto coords = std::unique_ptr(::GeoIndex_ReadCursor(_cursor,1)); + if(coords && coords->length){ + if(_near || GeoIndex_distance(&_coor, &coords->coordinates[0]) <= _withinRange ){ + auto revision = ::GeoIndex::toRevision(coords->coordinates[0].data); + return IndexLookupResult{revision}; + } + } + // if there are no more results we return the default constructed IndexLookupResult + return IndexLookupResult{}; +} + +void GeoIndexIterator::nextBabies(std::vector& result, size_t batchSize) { + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "ENTER nextBabies " << batchSize; + if (!_cursor){ + createCursor(_lat,_lon); + } + + result.clear(); + if (batchSize > 0) { + auto coords = std::unique_ptr(::GeoIndex_ReadCursor(_cursor,batchSize)); + size_t length = coords ? coords->length : 0; + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "length " << length; + if (!length){ + return; + } + + + for(std::size_t index = 0; index < length; ++index){ + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "near " << _near << " max allowed range: " << _withinRange + // << " actual range: " << GeoIndex_distance(&_coor, &coords->coordinates[index]) ; + if (_near || GeoIndex_distance(&_coor, &coords->coordinates[index]) <= _withinRange ){ + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "add above to result" ; + result.emplace_back(IndexLookupResult(::GeoIndex::toRevision(coords->coordinates[index].data))); + } else { + break; + } + } + } + //LOG_TOPIC(DEBUG, Logger::DEVEL) << "EXIT nextBabies " << result.size(); +} + +::GeoCursor* GeoIndexIterator::replaceCursor(::GeoCursor* c){ + if(_cursor){ + ::GeoIndex_CursorFree(_cursor); + } + _cursor = c; + return _cursor; +} + +::GeoCursor* GeoIndexIterator::createCursor(double lat, double lon){ + _coor = GeoCoordinate{lat, lon, 0}; + return replaceCursor(::GeoIndex_NewCursor(_index->_geoIndex, &_coor)); +} + +/// @brief creates an IndexIterator for the given Condition +IndexIterator* GeoIndex::iteratorForCondition( + arangodb::Transaction* trx, + ManagedDocumentResult* mmdr, + arangodb::aql::AstNode const* node, + arangodb::aql::Variable const* reference, bool) const { + TRI_IF_FAILURE("HashIndex::noIterator") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + return new GeoIndexIterator(_collection, trx, mmdr, this, node, reference); +} + + +void GeoIndexIterator::reset() { + replaceCursor(nullptr); +} GeoIndex::GeoIndex(TRI_idx_iid_t iid, arangodb::LogicalCollection* collection, VPackSlice const& info) diff --git a/arangod/Indexes/GeoIndex.h b/arangod/Indexes/GeoIndex.h index 1d62a954f6..ca0195d7cd 100644 --- a/arangod/Indexes/GeoIndex.h +++ b/arangod/Indexes/GeoIndex.h @@ -27,6 +27,7 @@ #include "Basics/Common.h" #include "GeoIndex/GeoIndex.h" #include "Indexes/Index.h" +#include "Indexes/IndexIterator.h" #include "VocBase/vocbase.h" #include "VocBase/voc-types.h" @@ -37,8 +38,49 @@ static_assert(sizeof(GeoCoordinate::data) >= sizeof(TRI_voc_rid_t), "invalid size of GeoCoordinate.data"); namespace arangodb { +class GeoIndex; + +class GeoIndexIterator final : public IndexIterator { + public: + +/// @brief Construct an GeoIndexIterator based on Ast Conditions + GeoIndexIterator(LogicalCollection* collection, arangodb::Transaction* trx, + ManagedDocumentResult* mmdr, + GeoIndex const* index, + arangodb::aql::AstNode const*, + arangodb::aql::Variable const*); + + ~GeoIndexIterator() { + replaceCursor(nullptr); + }; + + char const* typeName() const override { return "geo-index-iterator"; } + + IndexLookupResult next() override; + + void nextBabies(std::vector&, size_t) override; + + void reset() override; + + private: + ::GeoCursor* replaceCursor(::GeoCursor* c); + ::GeoCursor* createCursor(double lat, double lon); + void evaluateCondition(); //called in constructor + + GeoIndex const* _index; + ::GeoCursor* _cursor; + ::GeoCoordinate _coor; + arangodb::aql::AstNode const* _condition; + arangodb::aql::Variable const* _variable; + double _lat; + double _lon; + bool _near; + double _withinRange; + double _withinLessEq; +}; class GeoIndex final : public Index { +friend class GeoIndexIterator; public: GeoIndex() = delete; @@ -66,6 +108,12 @@ class GeoIndex final : public Index { return TRI_IDX_TYPE_GEO2_INDEX; } + IndexIterator* iteratorForCondition(arangodb::Transaction*, + ManagedDocumentResult*, + arangodb::aql::AstNode const*, + arangodb::aql::Variable const*, + bool) const override; + bool allowExpansion() const override { return false; } bool canBeDropped() const override { return true; } diff --git a/arangod/Indexes/IndexIterator.h b/arangod/Indexes/IndexIterator.h index 84614a0120..d69b1d4724 100644 --- a/arangod/Indexes/IndexIterator.h +++ b/arangod/Indexes/IndexIterator.h @@ -21,6 +21,30 @@ /// @author Michael Hackstein //////////////////////////////////////////////////////////////////////////////// +// In order to implement a new IndexIterator the folling functions need to be +// implmeneted. +// +// typeName() returns a string descibing the type of the indexIterator +// +// The next() function of the IndexIterator returns IndexLookupResults that are +// created from RevisionIds. If there is nothing more to return a default +// constructed IndesLookupResult is returend. +// +// reset() resets the iterator +// +// optional - default implementation provided: +// +// nextBabies() gets more than one result, the function is meant to increase +// performance when receiving a single result from the index is more expensive +// per item than the item costs when receiving multiple results. +// +// skip(trySkip, skipped) tries to skip the next trySkip elements +// +// When finished you need to implement the fuction: +// virtual IndexIterator* iteratorForCondition(...) +// So a there is a way to create an iterator for the index + + #ifndef ARANGOD_INDEXES_INDEX_ITERATOR_H #define ARANGOD_INDEXES_INDEX_ITERATOR_H 1 diff --git a/js/server/tests/aql/aql-optimizer-geoindex.js b/js/server/tests/aql/aql-optimizer-geoindex.js new file mode 100644 index 0000000000..318bf39457 --- /dev/null +++ b/js/server/tests/aql/aql-optimizer-geoindex.js @@ -0,0 +1,300 @@ +/*jshint globalstrict:false, strict:false, maxlen: 500 */ +/*global assertEqual, assertFalse, assertTrue, assertNotEqual, AQL_EXPLAIN, AQL_EXECUTE */ + +// execute with: +// ./scripts/unittest shell_server_aql --test js/server/tests/aql/aql-optimizer-geoindex.js + +//////////////////////////////////////////////////////////////////////////////// +/// @brief tests for optimizer rules +/// +/// @file +/// +/// DISCLAIMER +/// +/// Copyright 2010-2012 triagens GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Jan Christoph Uhde +/// @author Copyright 2016, ArangoDB GmbH, Cologne, Germany +//////////////////////////////////////////////////////////////////////////////// + +const expect = require('chai').expect; +var internal = require("internal"); +var jsunity = require("jsunity"); +var helper = require("@arangodb/aql-helper"); +var isEqual = helper.isEqual; +var findExecutionNodes = helper.findExecutionNodes; +var findReferencedNodes = helper.findReferencedNodes; +var getQueryMultiplePlansAndExecutions = helper.getQueryMultiplePlansAndExecutions; +var removeAlwaysOnClusterRules = helper.removeAlwaysOnClusterRules; + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test suite +//////////////////////////////////////////////////////////////////////////////// + +function optimizerRuleTestSuite() { + // quickly disable tests here + var enabled = { + basics : true, + removeNodes : true, + sorted : true + } + + var ruleName = "use-geoindex"; + var secondRuleName = "use-geoindexes"; + var removeCalculationNodes = "remove-unnecessary-calculations-2"; + var colName = "UnitTestsAqlOptimizer" + ruleName.replace(/-/g, "_"); + var colNameOther = colName + "_XX"; + + // various choices to control the optimizer: + var paramNone = { optimizer: { rules: [ "-all" ] } }; + var paramIndexFromSort = { optimizer: { rules: [ "-all", "+" + ruleName ] } }; + var paramIndexRange = { optimizer: { rules: [ "-all", "+" + secondRuleName ] } }; + var paramIndexFromSort_IndexRange = { optimizer: { rules: [ "-all", "+" + ruleName, "+" + secondRuleName ] } }; + var paramIndexFromSort_IndexRange_RemoveCalculations = { + optimizer: { rules: [ "-all", "+" + ruleName, "+" + secondRuleName, "+" + removeCalculationNodes ] } + }; + var paramIndexFromSort_RemoveCalculations = { + optimizer: { rules: [ "-all", "+" + ruleName, "+" + removeCalculationNodes ] } + }; + + var geocol; + var sortArray = function (l, r) { + if (l[0] !== r[0]) { + return l[0] < r[0] ? -1 : 1; + } + if (l[1] !== r[1]) { + return l[1] < r[1] ? -1 : 1; + } + return 0; + }; + var hasSortNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "SortNode").length, 1, query.string + " Has SortNode "); + }; + var hasNoSortNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "SortNode").length, 0, query.string + " Has no SortNode"); + }; + var hasFilterNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "FilterNode").length, 1, query.string + " Has FilterNode"); + }; + var hasNoFilterNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "FilterNode").length, 0, query.string + " Has no FilterNode"); + }; + var hasNoIndexNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "IndexNode").length, 0, query.string + " Has no IndexNode"); + }; + var hasNoResultsNode = function (plan,query) { + assertEqual(findExecutionNodes(plan, "NoResultsNode").length, 1, query.string + " Has NoResultsNode"); + }; + var hasCalculationNodes = function (plan,query, countXPect) { + assertEqual(findExecutionNodes(plan, "CalculationNode").length, + countXPect, "Has " + countXPect + " CalculationNode"); + }; + var hasIndexNode = function (plan,query) { + var rn = findExecutionNodes(plan,"IndexNode"); + assertEqual(rn.length, 1, query.string + "Has IndexNode"); + return; + }; + var isNodeType = function(node, type) { + assertEqual(node.type, type, query.string + " check whether this node is of type "+type); + }; + + var geodistance = function(latitude1, longitude1, latitude2, longitude2) { + //if (TYPEWEIGHT(latitude1) !== TYPEWEIGHT_NUMBER || + // TYPEWEIGHT(longitude1) !== TYPEWEIGHT_NUMBER || + // TYPEWEIGHT(latitude2) !== TYPEWEIGHT_NUMBER || + // TYPEWEIGHT(longitude2) !== TYPEWEIGHT_NUMBER) { + // WARN('DISTANCE', INTERNAL.errors.ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH); + // return null; + //} + + //var p1 = AQL_TO_NUMBER(latitude1) * (Math.PI / 180.0); + //var p2 = AQL_TO_NUMBER(latitude2) * (Math.PI / 180.0); + //var d1 = AQL_TO_NUMBER(latitude2 - latitude1) * (Math.PI / 180.0); + //var d2 = AQL_TO_NUMBER(longitude2 - longitude1) * (Math.PI / 180.0); + + var p1 = (latitude1) * (Math.PI / 180.0); + var p2 = (latitude2) * (Math.PI / 180.0); + var d1 = (latitude2 - latitude1) * (Math.PI / 180.0); + var d2 = (longitude2 - longitude1) * (Math.PI / 180.0); + + var a = Math.sin(d1 / 2.0) * Math.sin(d1 / 2.0) + + Math.cos(p1) * Math.cos(p2) * + Math.sin(d2 / 2.0) * Math.sin(d2 / 2.0); + var c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1.0 - a)); + + return (6371e3 * c); + } + + + return { + + //////////////////////////////////////////////////////////////////////////////// + /// @brief set up + //////////////////////////////////////////////////////////////////////////////// + + setUp : function () { + var loopto = 10; + + internal.db._drop(colName); + geocol = internal.db._create(colName); + geocol.ensureIndex({type:"geo", fields:["lat","lon"]}) + for (lat=-40; lat <=40 ; ++lat){ + for (lon=-40; lon <= 40; ++lon){ + geocol.insert({lat,lon}); + } + } + }, + + //////////////////////////////////////////////////////////////////////////////// + /// @brief tear down + //////////////////////////////////////////////////////////////////////////////// + + tearDown : function () { + internal.db._drop(colName); + internal.db._drop(colNameOther); + geocol = null; + }, + + testRuleBasics : function () { + if(enabled.basics){ + geocol.ensureIndex({ type: "hash", fields: [ "y", "z" ], unique: false }); + + var queries = [ + //query clust sort filter index + { string : "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR d IN " + colName + " SORT distance(0, 0, d.lat, d.lon) FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR d IN " + colName + " SORT distance(0, 0, d.lat, d.lon) FILTER distance(0, 0, d.lat,d.lon ) < 1 LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : false + , index : true + }, + { string : "FOR i in 1..2 FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 1 && i > 1 LIMIT 1 RETURN d" + , cluster : false + , sort : false + , filter : true + , index : true + }, + ]; + + queries.forEach(function(query) { + var result = AQL_EXPLAIN(query.string); + + // //optimized on cluster + // if (query[1]) { + // assertNotEqual(-1, removeAlwaysOnClusterRules(result.plan.rules).indexOf(ruleName), query[0]); + // } + // else { + // assertEqual(-1, removeAlwaysOnClusterRules(result.plan.rules).indexOf(ruleName), query[0]); + // } + + //sort nodes + if (query.sort) { + hasSortNode(result,query); + } else { + hasNoSortNode(result,query); + } + + //filter nodes + if (query.filter) { + hasFilterNode(result,query); + } else { + hasNoFilterNode(result,query); + } + + if (query.index){ + hasIndexNode(result,query); + } else { + hasNoIndexNode(result,query); + } + + }); + } + }, // testRuleBasics + + testRuleRemoveNodes : function () { + if(enabled.removeNodes){ + var queries = [ + [ "FOR d IN " + colName + " SORT distance(d.lat,d.lon, 0 ,0 ) ASC LIMIT 5 RETURN d", false, false, false ], + [ "FOR d IN " + colName + " SORT distance(0, 0, d.lat,d.lon ) ASC LIMIT 5 RETURN d", false, false, false ], + [ "FOR d IN " + colName + " FILTER distance(0, 0, d.lat,d.lon ) < 111200 RETURN d", false, false, false ], +// [ "FOR i IN 1..2 FOR d IN geocol SORT distance(i,2,d.lat,d.lon) ASC LIMIT 5 RETURN d", false, false, false ], + ]; + + var expected = [ + [[0,0], [-1,0], [0,1], [1,0], [0,-1]], + [[0,0], [-1,0], [0,1], [1,0], [0,-1]], + [[0,0], [-1,0], [0,1], [1,0], [0,-1]], + ] + + queries.forEach(function(query, qindex) { + var result = AQL_EXECUTE(query[0]); + expect(expected[qindex].length).to.be.equal(result.json.length) + pairs = result.json.map(function(res){ + return [res.lat,res.lon]; + }); + //internal.print(pairs) + assertEqual(expected[qindex].sort(),pairs.sort()) + //expect(expected[qindex].sort()).to.be.equal(result.json.sort()) + }); + } + }, // testRuleSort + + testRuleSorted : function(){ + if(enabled.sorted){ + var old=0; + var query = "FOR d IN " + colName + " SORT distance(d.lat, d.lon, 0, 0) RETURN distance(d.lat, d.lon, 0, 0)"; + var result = AQL_EXECUTE(query); + distances = result.json.map(d => { return parseFloat(d.toFixed(5))}); + //internal.print(distances); + old=0; + distances.forEach(d => { assertTrue( d >= old); old = d; }); + } + } //testSorted + + }; // test dictionary (return) +} // optimizerRuleTestSuite + +//////////////////////////////////////////////////////////////////////////////// +/// @brief executes the test suite +//////////////////////////////////////////////////////////////////////////////// + +jsunity.run(optimizerRuleTestSuite); + +return jsunity.done(); diff --git a/js/server/tests/aql/aql-optimizer-indexes.js b/js/server/tests/aql/aql-optimizer-indexes.js index 344cd23beb..7c5889bf62 100644 --- a/js/server/tests/aql/aql-optimizer-indexes.js +++ b/js/server/tests/aql/aql-optimizer-indexes.js @@ -431,7 +431,7 @@ function optimizerIndexesTestSuite () { assertEqual("SingletonNode", nodeTypes[0], query); assertNotEqual(-1, nodeTypes.indexOf("IndexNode"), query); - + var results = AQL_EXECUTE(query); assertEqual([ 12 ], results.json, query); assertEqual(0, results.stats.scannedFull); diff --git a/lib/Logger/LogTopic.cpp b/lib/Logger/LogTopic.cpp index 91a294354f..6e52bd728f 100644 --- a/lib/Logger/LogTopic.cpp +++ b/lib/Logger/LogTopic.cpp @@ -47,6 +47,7 @@ LogTopic Logger::COMMUNICATION("communication", LogLevel::INFO); LogTopic Logger::COMPACTOR("compactor"); LogTopic Logger::CONFIG("config"); LogTopic Logger::DATAFILES("datafiles", LogLevel::INFO); +LogTopic Logger::DEVEL("development", LogLevel::DEBUG); LogTopic Logger::GRAPHS("graphs", LogLevel::INFO); LogTopic Logger::HEARTBEAT("heartbeat", LogLevel::INFO); LogTopic Logger::MEMORY("memory", LogLevel::FATAL); // suppress diff --git a/lib/Logger/Logger.h b/lib/Logger/Logger.h index 3529e9b9f4..f635577976 100644 --- a/lib/Logger/Logger.h +++ b/lib/Logger/Logger.h @@ -129,12 +129,13 @@ class Logger { public: static LogTopic AGENCY; static LogTopic AGENCYCOMM; - static LogTopic COLLECTOR; - static LogTopic COMPACTOR; - static LogTopic COMMUNICATION; - static LogTopic CONFIG; static LogTopic CLUSTER; + static LogTopic COLLECTOR; + static LogTopic COMMUNICATION; + static LogTopic COMPACTOR; + static LogTopic CONFIG; static LogTopic DATAFILES; + static LogTopic DEVEL; static LogTopic GRAPHS; static LogTopic HEARTBEAT; static LogTopic MEMORY; From 49fba8848701148681149984005c62402fa58b93 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 19 Dec 2016 11:33:45 +0100 Subject: [PATCH 50/53] add missing line --- arangod/Aql/OptimizerRules.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 22f0704363..0105124881 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4346,6 +4346,8 @@ bool applyGeoOptimization(bool near, ExecutionPlan* plan, GeoIndexInfo& first, G plan->registerNode(inode); condition.release(); + plan->replaceNode(first.collectionNode,inode); + replaceGeoCondition(plan, first); replaceGeoCondition(plan, second); From e83ac6e8b9ca7ab65c2316f0f94d3f317b3c4745 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 19 Dec 2016 12:30:47 +0100 Subject: [PATCH 51/53] delete ast node instead of replacing it with true in some cases. --- arangod/Aql/OptimizerRules.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 0105124881..b0f935515d 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4307,7 +4307,26 @@ void replaceGeoCondition(ExecutionPlan* plan, GeoIndexInfo& info){ plan->replaceNode(info.setter, newNode); auto replaceInfo = iterativePreorderWithCondition(EN::FILTER, newNode->expression()->nodeForModification(), &isGeoFilterExpression); + if(newNode->expression()->nodeForModification() == replaceInfo.expressionParent){ + if(replaceInfo.expressionParent->type == NODE_TYPE_OPERATOR_BINARY_AND){ + for(std::size_t i = 0; i < replaceInfo.expressionParent->numMembers(); ++i){ + if(replaceInfo.expressionParent->getMember(i) != replaceInfo.expressionNode){ + newNode->expression()->replaceNode(replaceInfo.expressionParent->getMember(i)); + return; + } + } + } + } + //else { + // // COULD BE IMPROVED + // if(replaceInfo.expressionParent->type == NODE_TYPE_OPERATOR_BINARY_AND){ + // // delete ast node - we would need the parent of expression parent to delete the node + // // we do not have it available here so we just replace the the node with true + // return; + // } + //} + //fallback auto replacement = ast->createNodeValueBool(true); for(std::size_t i = 0; i < replaceInfo.expressionParent->numMembers(); ++i){ if(replaceInfo.expressionParent->getMember(i) == replaceInfo.expressionNode){ From 271daafdbea341e8afb71d337742e1f89c3abc9f Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Mon, 19 Dec 2016 13:04:27 +0100 Subject: [PATCH 52/53] Add two more functions to perf analysis. --- scripts/setupPerfEvents.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/setupPerfEvents.sh b/scripts/setupPerfEvents.sh index 36af7a012f..f85971adc4 100755 --- a/scripts/setupPerfEvents.sh +++ b/scripts/setupPerfEvents.sh @@ -65,6 +65,10 @@ main(){ addEvent handleRequest handleRequest@HttpServer.cpp addEvent handleWrite handleWrite@SocketTask.cpp + echo "work in LogicalCollection" + addEvent logicalInsertDocument insertDocument@LogicalCollection.cpp + addEvent logicalInsert insert@LogicalCollection.cpp + addEvent tcp_sendmsg addEvent tcp_recvmsg From c4a99800cd874534761313db7923f23bf610b806 Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Mon, 19 Dec 2016 13:07:21 +0100 Subject: [PATCH 53/53] Upgrade instructions for perf tool. --- scripts/perfanalysis.cpp | 2 +- scripts/setupPerfEvents.sh | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/scripts/perfanalysis.cpp b/scripts/perfanalysis.cpp index 01a635b561..535a43dcb2 100644 --- a/scripts/perfanalysis.cpp +++ b/scripts/perfanalysis.cpp @@ -1,5 +1,5 @@ // Compile with -// g++ perfanalysis.cpp -o perfanalyis -std=c++11 -Wall -O3 +// g++ perfanalysis.cpp -o perfanalyis -std=c++14 -Wall -O3 #include #include diff --git a/scripts/setupPerfEvents.sh b/scripts/setupPerfEvents.sh index f85971adc4..75eb924f8d 100755 --- a/scripts/setupPerfEvents.sh +++ b/scripts/setupPerfEvents.sh @@ -8,24 +8,26 @@ # document operations. Run this script with sudo when the ArangoDB # process is already running: # -# ./setupPerfEvents.sh +# sudo ./setupPerfEvents.sh # # Now you are able to recrod the event with: # -# sudo perf record -e "probe_arangod:*" -aR sleep 60 +# sudo perf record -e "probe_arangod:*" -aR # -# The above command will get sample data for 60 seconds. A file "perf.data" is -# written to the current directory. Dump the events in this file with: +# The above command will get sample data indefinitely, hit Ctrl-C when +# the measurement is finished. A file "perf.data" is written to the +# current directory. Dump the events in this file with: # # sudo perf script > perf.history # # This logs the times when individual threads hit the events. # Use the program perfanalyis.cpp in this directory in the following way: +# (for compilation instructions see at the top of perfanalysis.cpp) # -# sudo ./perfanalyis < perf.history > perf.statistics +# ./scripts/perfanalyis < perf.history > perf.statistics # # This will group enter and exit events of functions together, compute the time -# spent and sort by function. When finised remove all events with: +# spent and sort by function. When finished remove all events with: # # sudo perf probe -d "probe_arangod:*" #