From f995c46b71dabc68ba53f2e406c5e64c1c5cf234 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Mon, 29 Jul 2019 15:57:29 +0200 Subject: [PATCH] nested collect backport 3.5 (#9587) * stop optimization for nested collects (#9484) * Update CHANGELOG --- CHANGELOG | 4 +- arangod/Aql/Ast.cpp | 18 +- arangod/Aql/AstHelper.h | 231 +++++++++++++++++ arangod/Aql/ExecutionNode.h | 17 +- arangod/Aql/OptimizerRules.cpp | 124 +++++---- ...optimizer-rule-remove-collect-variables.js | 245 +++++++++++++++++- 6 files changed, 565 insertions(+), 74 deletions(-) create mode 100644 arangod/Aql/AstHelper.h diff --git a/CHANGELOG b/CHANGELOG index 6a6343b2c6..8f0eb2b024 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ -v3.5.0-rc.6 (2019-07-27) +v3.5.0-rc.6 (2019-XX-XX) ------------------------ +* Fixed issue #9459: Optimization rule remove-collect-variables does not KEEP all necessary data. + * Added gzip and encryption options to arangoimport and arangoexport. * Added missing REST API route GET /_api/transaction for retrieving the list of diff --git a/arangod/Aql/Ast.cpp b/arangod/Aql/Ast.cpp index 72f9753503..eb1cd4d3c6 100644 --- a/arangod/Aql/Ast.cpp +++ b/arangod/Aql/Ast.cpp @@ -826,12 +826,12 @@ AstNode* Ast::createNodeBinaryOperator(AstNodeType type, AstNode const* lhs, // contain an attribute access, e.g. doc.value1 == doc.value2 bool swap = false; if (type == NODE_TYPE_OPERATOR_BINARY_EQ && - rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS && + rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS && lhs->type != NODE_TYPE_ATTRIBUTE_ACCESS) { // value == doc.value => doc.value == value swap = true; } else if (type == NODE_TYPE_OPERATOR_BINARY_NE && - rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS && + rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS && lhs->type != NODE_TYPE_ATTRIBUTE_ACCESS) { // value != doc.value => doc.value != value swap = true; @@ -1926,9 +1926,9 @@ void Ast::validateAndOptimize() { } } else if (node->type == NODE_TYPE_AGGREGATIONS) { --ctx->stopOptimizationRequests; - } else if (node->type == NODE_TYPE_ARRAY && - node->hasFlag(DETERMINED_CONSTANT) && - !node->hasFlag(VALUE_CONSTANT) && + } else if (node->type == NODE_TYPE_ARRAY && + node->hasFlag(DETERMINED_CONSTANT) && + !node->hasFlag(VALUE_CONSTANT) && node->numMembers() < 10) { // optimization attempt: we are speculating that this array contains function // call parameters, which may have been optimized somehow. @@ -3206,12 +3206,12 @@ AstNode* Ast::optimizeFunctionCall(AstNode* node) { } #if 0 } else if (func->name == "LIKE") { - // optimize a LIKE(x, y) into a plain x == y or a range scan in case the + // optimize a LIKE(x, y) into a plain x == y or a range scan in case the // search is case-sensitive and the pattern is either a full match or a // left-most prefix // this is desirable in 99.999% of all cases, but would cause the following incompatibilities: - // - the AQL LIKE function will implicitly cast its operands to strings, whereas + // - the AQL LIKE function will implicitly cast its operands to strings, whereas // operator == in AQL will not do this. So LIKE(1, '1') would behave differently // when executed via the AQL LIKE function or via 1 == '1' // - for left-most prefix searches (e.g. LIKE(text, 'abc%')) we need to determine @@ -3232,7 +3232,7 @@ AstNode* Ast::optimizeFunctionCall(AstNode* node) { caseInsensitive = caseArg->isTrue(); } } - + auto patternArg = args->getMember(1); if (!caseInsensitive && patternArg->isStringValue()) { @@ -3265,7 +3265,7 @@ AstNode* Ast::optimizeFunctionCall(AstNode* node) { AstNode* op = createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND, lhs, rhs); if (wildcardIsLastChar) { - // replace LIKE with >= && <= + // replace LIKE with >= && <= return op; } // add >= && <=, but keep LIKE in place diff --git a/arangod/Aql/AstHelper.h b/arangod/Aql/AstHelper.h new file mode 100644 index 0000000000..2f33d2371a --- /dev/null +++ b/arangod/Aql/AstHelper.h @@ -0,0 +1,231 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2019 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Jan Christoph Uhde +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_AQL_AST_HELPER_H +#define ARANGOD_AQL_AST_HELPER_H 1 + +#include "Aql/Ast.h" +#include "Aql/AstNode.h" +#include "Aql/Variable.h" +#include "Basics/SmallVector.h" +#include "Logger/Logger.h" + +#include + +namespace { + auto doNothingVisitor = [](arangodb::aql::AstNode const*) {}; +} + +namespace arangodb { +namespace aql { +namespace ast { + +namespace { + +template < typename T> +T& operator<<(T& os, SmallVector const& vec) { + os << "[ "; + for(auto const& var: vec) { + os << var->name << " "; + } + return os << "]"; +} + +bool accessesSearchVariableViaReference(AstNode const* current, Variable const* searchVariable) { + if (current->type == NODE_TYPE_INDEXED_ACCESS) { + auto sub = current->getMemberUnchecked(0); + if (sub->type == NODE_TYPE_REFERENCE) { + Variable const* v = static_cast(sub->getData()); + if (v->id == searchVariable->id) { + return true; + } + } + } else if (current->type == NODE_TYPE_EXPANSION) { + if (current->numMembers() < 2) { + return false; + } + auto it = current->getMemberUnchecked(0); + if (it->type != NODE_TYPE_ITERATOR || it->numMembers() != 2) { + return false; + } + + auto sub1 = it->getMember(0); + auto sub2 = it->getMember(1); + if (sub1->type != NODE_TYPE_VARIABLE || sub2->type != NODE_TYPE_REFERENCE) { + return false; + } + Variable const* v = static_cast(sub2->getData()); + if (v->id == searchVariable->id) { + return true; + } + } + + return false; +}; + +bool isTargetVariable(AstNode const* node, SmallVector& searchVariables, bool& isSafeForOptimization) { + TRI_ASSERT(!searchVariables.empty()); + TRI_ASSERT(node->type == NODE_TYPE_INDEXED_ACCESS || node->type == NODE_TYPE_EXPANSION); + + // given and expression like g3[0].`g2`[0].`g1`[0].`item1`.`_id` + // this loop resolves subtrees of the form: .`g2`[0].`g1`[0] + // search variables should equal [g1, g2, g3]. Therefor we need not match the + // variable names from the vector with those in the expression while going + // forward in the vector the we go backward in the expression + + auto current = node; + for(auto varIt = searchVariables.begin(); varIt != std::prev(searchVariables.end()); ++varIt) { + AstNode* next = nullptr; + if (current->type == NODE_TYPE_INDEXED_ACCESS) { + next = current->getMemberUnchecked(0); + } else if (current->type == NODE_TYPE_EXPANSION) { + + if (current->numMembers() < 2) { + return false; + } + + auto it = current->getMemberUnchecked(0); + TRI_ASSERT(it); + + //The expansion is at the very end + if (it->type == NODE_TYPE_ITERATOR && it->numMembers() == 2) { + if (it->getMember(0)->type != NODE_TYPE_VARIABLE ) { + return false; + } + + auto attributeAccess = it->getMember(1); + TRI_ASSERT(attributeAccess); + + if (std::next(varIt) != searchVariables.end() && attributeAccess->type == NODE_TYPE_ATTRIBUTE_ACCESS){ + next = attributeAccess; + } else { + return false; + } + + } else { + // The expansion is not at the very end! we are unable to check if the + // variable will be accessed checking nested expansions is really crazy + // and would probably be best done in a recursive way. If the expansion + // is in the middle, then it would be still the very first node in the + // AST, having 2 subtrees that contain the other search variables + isSafeForOptimization = false; // could be an access - we can not tell + return false; + } + } else { + return false; + } + + if(varIt == searchVariables.end()) { + return false; + } + + if (next && next->type == NODE_TYPE_ATTRIBUTE_ACCESS && + next->getString() == (*varIt)->name ) { + current = next->getMemberUnchecked(0); + } else if (next && next->type == NODE_TYPE_EXPANSION) { + isSafeForOptimization = false; + return false; + } else { + return false; + } + } // for nodes but last + + if(!current) { + return false; + } + + return accessesSearchVariableViaReference(current, searchVariables.back()); +} +} // end - namespace unnamed + +/// @brief determines the to-be-kept attribute of an INTO expression +inline std::unordered_set getReferencedAttributesForKeep( + AstNode const* node, SmallVector searchVariables, bool& isSafeForOptimization){ + + std::unordered_set result; + isSafeForOptimization = true; + + std::function visitor = [&isSafeForOptimization, + &result, + &searchVariables](AstNode const* node) { + if (!isSafeForOptimization) { + return false; + } + + if (node->type == NODE_TYPE_ATTRIBUTE_ACCESS) { + while (node->getMemberUnchecked(0)->type == NODE_TYPE_ATTRIBUTE_ACCESS) { + node = node->getMemberUnchecked(0); + } + if (isTargetVariable(node->getMemberUnchecked(0), searchVariables, isSafeForOptimization)) { + result.emplace(node->getString()); + // do not descend further + return false; + } + } else if (node->type == NODE_TYPE_REFERENCE) { + Variable const* v = static_cast(node->getData()); + if (v->id == searchVariables.front()->id) { + isSafeForOptimization = false; // the expression references the searched variable + return false; + } + } else if (node->type == NODE_TYPE_EXPANSION) { + if (isTargetVariable(node, searchVariables, isSafeForOptimization)) { + auto sub = node->getMemberUnchecked(1); + if (sub->type == NODE_TYPE_EXPANSION) { + sub = sub->getMemberUnchecked(0)->getMemberUnchecked(1); + } + if (sub->type == NODE_TYPE_ATTRIBUTE_ACCESS) { + while (sub->getMemberUnchecked(0)->type == NODE_TYPE_ATTRIBUTE_ACCESS) { + sub = sub->getMemberUnchecked(0); + } + result.emplace(sub->getString()); + // do not descend further + return false; + } + } + } + else if (node->type == NODE_TYPE_INDEXED_ACCESS) { + auto sub = node->getMemberUnchecked(0); + if (sub->type == NODE_TYPE_REFERENCE) { + Variable const* v = static_cast(sub->getData()); + if (v->id == searchVariables.back()->id) { + isSafeForOptimization = false; + return false; + } + } + } + return true; + }; + + // Traverse AST and call visitor before recursing on each node + // as long as visitor returns true the traversal continues. In + // that branch + Ast::traverseReadOnly(node, visitor, ::doNothingVisitor); + + return result; +} + + +} // end - namsepace ast +} // end - namespace aql +} // end - namespace arangodb + +#endif diff --git a/arangod/Aql/ExecutionNode.h b/arangod/Aql/ExecutionNode.h index 1552765084..40330f3b90 100644 --- a/arangod/Aql/ExecutionNode.h +++ b/arangod/Aql/ExecutionNode.h @@ -24,21 +24,24 @@ // Execution plans like the one below are made of Nodes that inherit the // ExecutionNode class as a base class. // +// clang-format off +// // Execution plan: // Id NodeType Est. Comment // 1 SingletonNode 1 * ROOT -// 2 EnumerateCollectionNode 6400 - FOR d IN coll /* full collection -// scan */ 3 CalculationNode 6400 - LET #1 = -// DISTANCE(d.`lat`, d.`lon`, 0, 0) /* simple expression */ /* collections -// used: d : coll */ 4 SortNode 6400 - SORT #1 ASC 5 -// LimitNode 5 - LIMIT 0, 5 6 ReturnNode 5 - -// RETURN d +// 2 EnumerateCollectionNode 6400 - FOR d IN coll /* full collection scan */ +// 3 CalculationNode 6400 - LET #1 = DISTANCE(d.`lat`, d.`lon`, 0, 0) /* simple expression */ /* collections used: d : coll */ +// 4 SortNode 6400 - SORT #1 ASC +// 5 LimitNode 5 - LIMIT 0, 5 +// 6 ReturnNode 5 - RETURN d +// +// clang-format on // // Even though the Singleton Node has a label saying it is the "ROOT" node it // is not in our definiton. Root Nodes are leaf nodes (at the bottom of the // list). // -// To get down (direction to root) from 4 to 5 you need to call getFirst Parent +// To get down (direction to root) from 4 to 5 you need to call getFirstParent // on the SortNode(4) to receive a pointer to the LimitNode(5). If you want to // go up from 5 to 4 (away from root) you need to call getFirstDependency at // the LimitNode (5) to get a pointer to the SortNode(4). diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index cb2318cad9..b8c15f8c6c 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -24,6 +24,7 @@ #include "OptimizerRules.h" #include "Aql/AqlItemBlock.h" +#include "Aql/AstHelper.h" #include "Aql/ClusterNodes.h" #include "Aql/CollectNode.h" #include "Aql/CollectOptions.h" @@ -180,7 +181,7 @@ void replaceGatherNodeVariables(arangodb::aql::ExecutionPlan* plan, // stringifying an expression may fail with "too long" error buffer.clear(); expr->stringify(&buffer); - if (cmp.size() == buffer.size() && + if (cmp.size() == buffer.size() && cmp.compare(0, cmp.size(), buffer.c_str(), buffer.size()) == 0) { // finally a match! it.var = it3.second; @@ -1228,6 +1229,7 @@ void arangodb::aql::removeUnnecessaryFiltersRule(Optimizer* opt, void arangodb::aql::removeCollectVariablesRule(Optimizer* opt, std::unique_ptr plan, OptimizerRule const& rule) { + SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::COLLECT, true); @@ -1250,38 +1252,66 @@ void arangodb::aql::removeCollectVariablesRule(Optimizer* opt, modified = true; } else if (outVariable != nullptr && !collectNode->count() && !collectNode->hasExpressionVariable() && !collectNode->hasKeepVariables()) { + // outVariable used later, no count, no INTO expression, no KEEP // e.g. COLLECT something INTO g - // we will now check how many part of "g" are used later + // we will now check how many parts of "g" are used later + std::unordered_set keepAttributes; - bool stop = false; - auto p = collectNode->getFirstParent(); - while (p != nullptr) { - if (p->getType() == EN::CALCULATION) { - auto cc = ExecutionNode::castTo(p); + SmallVector::allocator_type::arena_type a; + SmallVector searchVariables{a}; + searchVariables.push_back(outVariable); + + bool doOptimize = true; + auto planNode = collectNode->getFirstParent(); + while (planNode != nullptr && doOptimize) { + if (planNode->getType() == EN::CALCULATION) { + auto cc = ExecutionNode::castTo(planNode); Expression const* exp = cc->expression(); - if (exp != nullptr && exp->node() != nullptr) { + if (exp != nullptr && exp->node() != nullptr && !searchVariables.empty()) { bool isSafeForOptimization; auto usedThere = - Ast::getReferencedAttributesForKeep(exp->node(), outVariable, + ast::getReferencedAttributesForKeep(exp->node(), searchVariables, isSafeForOptimization); if (isSafeForOptimization) { for (auto const& it : usedThere) { keepAttributes.emplace(it); } } else { - stop = true; + doOptimize = false; + break; } + + } // end - expression exists + } else if (planNode->getType() == EN::COLLECT) { + auto innerCollectNode = ExecutionNode::castTo(planNode); + if (innerCollectNode->hasOutVariable()) { + // We have the following situation: + // + // COLLECT v1 = doc._id INTO g1 + // COLLECT v2 = doc._id INTO g2 + // + searchVariables.push_back(innerCollectNode->outVariable()); + } else { + // when we find another COLLECT, it will invalidate all + // previous variables in the scope + searchVariables.clear(); + } + } else { + auto here = planNode->getVariableIdsUsedHere(); + if(here.find(searchVariables.back()->id) != here.end()){ + // the outVariable of the last collect should not be used by any following node directly + doOptimize = false; + break; } } - if (stop) { - break; - } - p = p->getFirstParent(); - } - if (!stop) { + planNode = planNode->getFirstParent(); + + } // end - inspection of nodes below the found collect node - while valid planNode + + if (doOptimize) { std::vector keepVariables; // we are allowed to do the optimization auto current = n->getFirstDependency(); @@ -1302,14 +1332,14 @@ void arangodb::aql::removeCollectVariablesRule(Optimizer* opt, break; } current = current->getFirstDependency(); - } + } // while current if (keepAttributes.empty() && !keepVariables.empty()) { collectNode->setKeepVariables(std::move(keepVariables)); modified = true; } - } - } + } // end - if doOptimize + } // end - if collectNode has outVariable collectNode->clearAggregates( [&varsUsedLater, &modified]( @@ -1321,19 +1351,19 @@ void arangodb::aql::removeCollectVariablesRule(Optimizer* opt, } return false; }); - } + } // for node in nodes opt->addPlan(std::move(plan), rule, modified); } class PropagateConstantAttributesHelper { public: - explicit PropagateConstantAttributesHelper(ExecutionPlan* plan) + explicit PropagateConstantAttributesHelper(ExecutionPlan* plan) : _plan(plan), _modified(false) {} bool modified() const { return _modified; } - /// @brief inspects a plan and propages constant values in expressions + /// @brief inspects a plan and propagates constant values in expressions void propagateConstants() { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; @@ -1502,7 +1532,7 @@ class PropagateConstantAttributesHelper { void insertConstantAttribute(AstNode* parentNode, size_t accessIndex) { Variable const* variable = nullptr; std::string name; - + AstNode* member = parentNode->getMember(accessIndex); if (!getAttribute(member, variable, name)) { @@ -1522,7 +1552,7 @@ class PropagateConstantAttributesHelper { current = current->getMember(0); if (current->type == NODE_TYPE_REFERENCE) { auto setter = _plan->getVarSetBy(static_cast(current->getData())->id); - if (setter != nullptr && + if (setter != nullptr && (setter->getType() == EN::ENUMERATE_COLLECTION || setter->getType() == EN::INDEX)) { auto collection = ::getCollection(setter); if (collection != nullptr) { @@ -2881,17 +2911,17 @@ struct SortToIndexNode final : public WalkerWorker { std::unordered_map _variableDefinitions; std::vector> _filters; bool _modified; - + public: explicit SortToIndexNode(ExecutionPlan* plan) : _plan(plan), _sortNode(nullptr), _modified(false) { _filters.emplace_back(); } - + /// @brief gets the attributes from the filter conditions that will have a /// constant value (e.g. doc.value == 123) or than can be proven to be != null - void getSpecialAttributes(AstNode const* node, - Variable const* variable, + void getSpecialAttributes(AstNode const* node, + Variable const* variable, std::vector>& constAttributes, arangodb::HashSet>& nonNullAttributes) const { if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) { @@ -2899,8 +2929,8 @@ struct SortToIndexNode final : public WalkerWorker { getSpecialAttributes(node->getMemberUnchecked(0), variable, constAttributes, nonNullAttributes); getSpecialAttributes(node->getMemberUnchecked(1), variable, constAttributes, nonNullAttributes); return; - } - + } + if (!node->isComparisonOperator()) { return; } @@ -2944,15 +2974,15 @@ struct SortToIndexNode final : public WalkerWorker { // doc.value >= const value check = lhs; } - + if (check == nullptr) { // condition is useless for us return; } - + std::pair> result; - if (check->isAttributeAccessForVariable(result, false) && + if (check->isAttributeAccessForVariable(result, false) && result.first == variable) { if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) { // found a constant value @@ -2964,7 +2994,7 @@ struct SortToIndexNode final : public WalkerWorker { } } - void processCollectionAttributes(Variable const* variable, + void processCollectionAttributes(Variable const* variable, std::vector>& constAttributes, arangodb::HashSet>& nonNullAttributes) const { // resolve all FILTER variables into their appropriate filter conditions @@ -2997,7 +3027,7 @@ struct SortToIndexNode final : public WalkerWorker { SortCondition sortCondition(_plan, _sorts, constAttributes, nonNullAttributes, _variableDefinitions); - if (!sortCondition.isEmpty() && + if (!sortCondition.isEmpty() && sortCondition.isOnlyAttributeAccess() && sortCondition.isUnidirectional()) { // we have found a sort condition, which is unidirectional @@ -3073,7 +3103,7 @@ struct SortToIndexNode final : public WalkerWorker { // index conditions do not guarantee sortedness return true; } - + if (isSparse) { return true; } @@ -3094,7 +3124,7 @@ struct SortToIndexNode final : public WalkerWorker { // if we get here, we either have one index or multiple indexes on the same // attributes bool handled = false; - + if (indexes.size() == 1 && isSorted) { // if we have just a single index and we can use it for the filtering // condition, then we can use the index for sorting, too. regardless of if @@ -3244,7 +3274,7 @@ struct SortToIndexNode final : public WalkerWorker { case EN::FILTER: { auto inVariable = ExecutionNode::castTo(en)->inVariable()->id; _filters.back().emplace_back(inVariable); - return false; + return false; } case EN::CALCULATION: { @@ -3294,7 +3324,7 @@ struct SortToIndexNode final : public WalkerWorker { } return true; } - + void after(ExecutionNode* en) override final { if (en->getType() == EN::SUBQUERY) { TRI_ASSERT(!_filters.empty()); @@ -4032,7 +4062,7 @@ void arangodb::aql::collectInClusterRule(Optimizer* opt, std::unique_ptrgetType() == ExecutionNode::REMOTE) { auto previous = current->getFirstDependency(); // now we are on a DB server - + { // check if we will deal with more than one shard // if the remote one has one shard, the optimization will actually @@ -4639,7 +4669,7 @@ void arangodb::aql::restrictToSingleShardRule(Optimizer* opt, arangodb::HashSet toUnlink; std::map> modificationRestrictions; - + // forward a shard key restriction from one collection to the other if the two collections // are used in a smart join (and use distributeShardsLike on each other) auto forwardRestrictionToPrototype = [&plan](ExecutionNode const* current, std::string const& shardId) { @@ -4653,14 +4683,14 @@ void arangodb::aql::restrictToSingleShardRule(Optimizer* opt, } auto setter = plan->getVarSetBy(prototypeOutVariable->id); - if (setter == nullptr || + if (setter == nullptr || (setter->getType() != EN::INDEX && setter->getType() != EN::ENUMERATE_COLLECTION)) { return; } auto s1 = ::getCollection(current)->shardIds(); auto s2 = ::getCollection(setter)->shardIds(); - + if (s1->size() != s2->size()) { // different number of shard ids... should not happen if we have a prototype return; @@ -6532,7 +6562,7 @@ bool checkGeoFilterExpression(ExecutionPlan* plan, AstNode const* node, GeoIndex } static bool optimizeSortNode(ExecutionPlan* plan, SortNode* sort, GeoIndexInfo& info) { - // note: info will only be modified if the function returns true + // note: info will only be modified if the function returns true TRI_ASSERT(sort->getType() == EN::SORT); // we're looking for "SORT DISTANCE(x,y,a,b)" SortElementVector const& elements = sort->elements(); @@ -6553,7 +6583,7 @@ static bool optimizeSortNode(ExecutionPlan* plan, SortNode* sort, GeoIndexInfo& return false; // the expression must exist and must have an astNode } - // info will only be modified if the function returns true + // info will only be modified if the function returns true bool legacy = elements[0].ascending; // DESC is only supported on S2 index if (!info.sorted && checkDistanceFunc(plan, expr->node(), legacy, info)) { info.sorted = true; // do not parse another SORT @@ -6579,7 +6609,7 @@ static void optimizeFilterNode(ExecutionPlan* plan, FilterNode* fn, GeoIndexInfo // now check who introduced our variable ExecutionNode* setter = plan->getVarSetBy(variable->id); if (setter == nullptr || setter->getType() != EN::CALCULATION) { - return; + return; } CalculationNode* calc = ExecutionNode::castTo(setter); Expression* expr = calc->expression(); @@ -6833,7 +6863,7 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, std::unique_ptr limit = nullptr; info.sorted = false; // don't allow picking up either sort or limit from here on - canUseSortLimit = false; + canUseSortLimit = false; } current = current->getFirstParent(); // inspect next node } diff --git a/tests/js/server/aql/aql-optimizer-rule-remove-collect-variables.js b/tests/js/server/aql/aql-optimizer-rule-remove-collect-variables.js index efbf349825..374c3136f5 100644 --- a/tests/js/server/aql/aql-optimizer-rule-remove-collect-variables.js +++ b/tests/js/server/aql/aql-optimizer-rule-remove-collect-variables.js @@ -38,7 +38,7 @@ var isEqual = helper.isEqual; function optimizerRuleTestSuite () { var ruleName = "remove-collect-variables"; - // various choices to control the optimizer: + // various choices to control the optimizer: var paramNone = { optimizer: { rules: [ "-all" ] } }; var paramEnabled = { optimizer: { rules: [ "-all", "+" + ruleName ] } }; var paramDisabled = { optimizer: { rules: [ "+all", "-" + ruleName ] } }; @@ -64,7 +64,7 @@ function optimizerRuleTestSuite () { //////////////////////////////////////////////////////////////////////////////// testRuleDisabled : function () { - var queries = [ + var queries = [ "FOR i IN 1..10 COLLECT a = i INTO group RETURN a", "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN a", "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b }" @@ -81,10 +81,10 @@ function optimizerRuleTestSuite () { //////////////////////////////////////////////////////////////////////////////// testRuleNoEffect : function () { - var queries = [ + var queries = [ "FOR i IN 1..10 COLLECT a = i RETURN a", "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j RETURN a", - "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b, group: group }" + "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b, group: group }", ]; queries.forEach(function(query) { @@ -98,10 +98,10 @@ function optimizerRuleTestSuite () { //////////////////////////////////////////////////////////////////////////////// testRuleHasEffect : function () { - var queries = [ + var queries = [ "FOR i IN 1..10 COLLECT a = i INTO group RETURN a", "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN a", - "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b }" + "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b }", ]; queries.forEach(function(query) { @@ -116,7 +116,7 @@ function optimizerRuleTestSuite () { testPlans : function () { - var plans = [ + var plans = [ [ "FOR i IN 1..10 COLLECT a = i INTO group RETURN a", [ "SingletonNode", "CalculationNode", "EnumerateListNode", "SortNode", "CollectNode", "ReturnNode" ] ], [ "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN a", [ "SingletonNode", "CalculationNode", "EnumerateListNode", "CalculationNode", "EnumerateListNode", "SortNode", "CollectNode", "ReturnNode" ] ], [ "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b }", [ "SingletonNode", "CalculationNode", "EnumerateListNode", "CalculationNode", "EnumerateListNode", "SortNode", "CollectNode", "CalculationNode", "ReturnNode" ] ] @@ -134,14 +134,15 @@ function optimizerRuleTestSuite () { //////////////////////////////////////////////////////////////////////////////// testResults : function () { - var queries = [ + var queries = [ [ "FOR i IN 1..10 COLLECT a = i INTO group RETURN a", [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] ], - [ "FOR i IN 1..2 FOR j IN 1..2 COLLECT a = i, b = j INTO group RETURN [ a, b ]", [ [ 1, 1 ], [ 1, 2 ], [ 2, 1 ], [ 2, 2 ] ] ] + [ "FOR i IN 1..2 FOR j IN 1..2 COLLECT a = i, b = j INTO group RETURN [ a, b ]", [ [ 1, 1 ], [ 1, 2 ], [ 2, 1 ], [ 2, 2 ] ] ], ]; queries.forEach(function(query) { var planDisabled = AQL_EXPLAIN(query[0], { }, paramDisabled); var planEnabled = AQL_EXPLAIN(query[0], { }, paramEnabled); + var resultDisabled = AQL_EXECUTE(query[0], { }, paramDisabled).json; var resultEnabled = AQL_EXECUTE(query[0], { }, paramEnabled).json; @@ -153,7 +154,231 @@ function optimizerRuleTestSuite () { assertEqual(resultDisabled, query[1]); assertEqual(resultEnabled, query[1]); }); - } + }, + + testNestingRuleNotUsed1 : function() { + const query = ` + LET items = [{_id: 'ID'}] + FOR item1 IN items + FOR item2 IN items + FOR item3 IN items + COLLECT id1 = item1._id INTO g1 + COLLECT id2 = g1[0].item2._id INTO g2 + RETURN g2[0] + `; + const expected = [ + { "g1" : [ { "item1" : { "_id" : "ID" }, + "item2" : { "_id" : "ID" }, + "item3" : { "_id" : "ID" }, + } + ], + "item1" : { "_id" : "ID" }, + "item2" : { "_id" : "ID" }, + "item3" : { "_id" : "ID" }, + "id1" : "ID" + } + ]; + let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json; + assertEqual(expected, resultEnabled); + + let explain = AQL_EXPLAIN(query, { }, paramEnabled); + assertEqual(-1, explain.plan.rules.indexOf(ruleName)); + }, + + testNestingRuleNotUsed2 : function() { + const query = ` + LET items = [{_id: 'ID'}] + FOR item1 IN items + FOR item2 IN items + FOR item3 IN items + COLLECT id = item1._id INTO first + COLLECT id2 = first[0].item2._id INTO other + let b = other[0] + RETURN b + `; + const expected = [ + { "first" : [ { "item1" : { "_id" : "ID" }, + "item2" : { "_id" : "ID" }, + "item3" : { "_id" : "ID" }, + } + ], + "item1" : { "_id" : "ID" }, + "item2" : { "_id" : "ID" }, + "item3" : { "_id" : "ID" }, + "id" : "ID" + } + ]; + let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json; + assertEqual(expected, resultEnabled); + + let explain = AQL_EXPLAIN(query, { }, paramEnabled); + assertEqual(-1, explain.plan.rules.indexOf(ruleName)); + }, + + testNestingRuleNotUsed3 : function() { + const query = ` + LET items = [{_id: 'ID'}] + FOR item1 IN items + FOR item2 IN items + FOR item3 IN items + COLLECT id = item1._id INTO first + COLLECT id2 = first[0].item2._id INTO other + RETURN other + `; + const expected = [ [ + { "first" : [ { "item1" : { "_id" : "ID" }, + "item2" : { "_id" : "ID" }, + "item3" : { "_id" : "ID" }, + } + ], + "item1" : { "_id" : "ID" }, + "item2" : { "_id" : "ID" }, + "item3" : { "_id" : "ID" }, + "id" : "ID" + } + ] ]; + let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json; + assertEqual(expected, resultEnabled); + + let explain = AQL_EXPLAIN(query, { }, paramEnabled); + assertEqual(-1, explain.plan.rules.indexOf(ruleName)); + }, + + testNestingRuleUsed1 : function() { + const query = ` + LET items = [{_id: 'ID'}] + FOR item1 IN items + FOR item2 IN items + FOR item3 IN items + COLLECT id = item1._id INTO first + COLLECT id2 = first[0].item2._id INTO other + RETURN other[0].id + `; + const expected = [ "ID" ]; + let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json; + assertEqual(expected, resultEnabled); + + let explain = AQL_EXPLAIN(query, { }, paramEnabled); + assertNotEqual(-1, explain.plan.rules.indexOf(ruleName)); + }, + + testNestingRuleUsed2 : function() { + const query = ` + LET items = [ { "_id" : 42 }] + FOR item1 IN items + FOR item2 IN items + FOR item3 IN items + COLLECT id = item1._id INTO first + COLLECT id2 = first[0].item2._id INTO other + let b = 1 + other[0].first[0].item1._id + RETURN b + `; + const expected = [ 43 ]; + let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json; + assertEqual(expected, resultEnabled); + + let explain = AQL_EXPLAIN(query, { }, paramEnabled); + assertNotEqual(-1, explain.plan.rules.indexOf(ruleName)); + + }, + + testNestingRuleUsed3 : function() { + const query = ` + LET items = [ { "_id" : 42 }] + FOR item1 IN items + FOR item2 IN items + FOR item3 IN items + COLLECT id = item1._id INTO first + COLLECT id2 = first[0].item2._id INTO other + let b = [ other[0].first[0], "blub" ] + RETURN b + `; + const expected = [ + [ { "item3" : { "_id" : 42 }, + "item1" : { "_id" : 42 }, + "item2" : { "_id" : 42 } } , "blub" ] ]; + + let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json; + assertEqual(expected, resultEnabled); + + let explain = AQL_EXPLAIN(query, { }, paramEnabled); + assertNotEqual(-1, explain.plan.rules.indexOf(ruleName)); + }, + + testNestingRuleUsed4 : function() { + const query = ` + LET items = [{_id: 'ID'}] + FOR item1 IN items + FOR item2 IN items + FOR item3 IN items + COLLECT id = item1._id INTO first + COLLECT id2 = first[0].id INTO other + RETURN other[0].id + `; + const expected = [ "ID" ]; + let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json; + assertEqual(expected, resultEnabled); + + let explain = AQL_EXPLAIN(query, { }, paramEnabled); + assertNotEqual(-1, explain.plan.rules.indexOf(ruleName)); + }, + + testNestingRuleUsed5 : function() { + const query = ` + LET items = [{_id: 'ID'}] + FOR item1 IN items + FOR item2 IN items + FOR item3 IN items + COLLECT id = item1._id INTO first + COLLECT id2 = first[0].id INTO other + RETURN other[0].first + `; + const expected = [ [ { "item3" : { "_id" : "ID" }, + "item1" : { "_id" : "ID" }, + "item2" : { "_id" : "ID" } } ] ]; + let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json; + assertEqual(expected, resultEnabled); + + let explain = AQL_EXPLAIN(query, { }, paramEnabled); + assertNotEqual(-1, explain.plan.rules.indexOf(ruleName)); + }, + + testNestingRuleUsed6 : function() { + const query = ` + LET items = [{_id: 'ID'}] + FOR item1 IN items + FOR item2 IN items + FOR item3 IN items + COLLECT id = item1._id INTO first + COLLECT id2 = first[0].id INTO other + RETURN other[0].first[0] + `; + const expected = [ { "item3" : { "_id" : "ID" }, + "item1" : { "_id" : "ID" }, + "item2" : { "_id" : "ID" } } ]; + let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json; + assertEqual(expected, resultEnabled); + + let explain = AQL_EXPLAIN(query, { }, paramEnabled); + assertNotEqual(-1, explain.plan.rules.indexOf(ruleName)); + }, + + testNestingRuleUsed7 : function() { + const query = ` + LET items = [{_id: 'ID'}] + FOR item1 IN items + COLLECT unused1 = item1._id INTO first + COLLECT unused2 = first[0].item1._id INTO other + RETURN other[0].first[0].item1._id + `; + const expected = [ "ID" ]; + + let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json; + assertEqual(expected, resultEnabled); + + let explain = AQL_EXPLAIN(query, { }, paramEnabled); + assertNotEqual(-1, explain.plan.rules.indexOf(ruleName)); + }, }; }