1
0
Fork 0

nested collect backport 3.5 (#9587)

* stop optimization for nested collects (#9484)

* Update CHANGELOG
This commit is contained in:
Jan Christoph Uhde 2019-07-29 15:57:29 +02:00 committed by KVS85
parent 7813c76f32
commit f995c46b71
6 changed files with 565 additions and 74 deletions

View File

@ -1,6 +1,8 @@
v3.5.0-rc.6 (2019-07-27) v3.5.0-rc.6 (2019-XX-XX)
------------------------ ------------------------
* Fixed issue #9459: Optimization rule remove-collect-variables does not KEEP all necessary data.
* Added gzip and encryption options to arangoimport and arangoexport. * Added gzip and encryption options to arangoimport and arangoexport.
* Added missing REST API route GET /_api/transaction for retrieving the list of * Added missing REST API route GET /_api/transaction for retrieving the list of

View File

@ -826,12 +826,12 @@ AstNode* Ast::createNodeBinaryOperator(AstNodeType type, AstNode const* lhs,
// contain an attribute access, e.g. doc.value1 == doc.value2 // contain an attribute access, e.g. doc.value1 == doc.value2
bool swap = false; bool swap = false;
if (type == NODE_TYPE_OPERATOR_BINARY_EQ && if (type == NODE_TYPE_OPERATOR_BINARY_EQ &&
rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS && rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS &&
lhs->type != NODE_TYPE_ATTRIBUTE_ACCESS) { lhs->type != NODE_TYPE_ATTRIBUTE_ACCESS) {
// value == doc.value => doc.value == value // value == doc.value => doc.value == value
swap = true; swap = true;
} else if (type == NODE_TYPE_OPERATOR_BINARY_NE && } else if (type == NODE_TYPE_OPERATOR_BINARY_NE &&
rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS && rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS &&
lhs->type != NODE_TYPE_ATTRIBUTE_ACCESS) { lhs->type != NODE_TYPE_ATTRIBUTE_ACCESS) {
// value != doc.value => doc.value != value // value != doc.value => doc.value != value
swap = true; swap = true;
@ -1926,9 +1926,9 @@ void Ast::validateAndOptimize() {
} }
} else if (node->type == NODE_TYPE_AGGREGATIONS) { } else if (node->type == NODE_TYPE_AGGREGATIONS) {
--ctx->stopOptimizationRequests; --ctx->stopOptimizationRequests;
} else if (node->type == NODE_TYPE_ARRAY && } else if (node->type == NODE_TYPE_ARRAY &&
node->hasFlag(DETERMINED_CONSTANT) && node->hasFlag(DETERMINED_CONSTANT) &&
!node->hasFlag(VALUE_CONSTANT) && !node->hasFlag(VALUE_CONSTANT) &&
node->numMembers() < 10) { node->numMembers() < 10) {
// optimization attempt: we are speculating that this array contains function // optimization attempt: we are speculating that this array contains function
// call parameters, which may have been optimized somehow. // call parameters, which may have been optimized somehow.
@ -3206,12 +3206,12 @@ AstNode* Ast::optimizeFunctionCall(AstNode* node) {
} }
#if 0 #if 0
} else if (func->name == "LIKE") { } else if (func->name == "LIKE") {
// optimize a LIKE(x, y) into a plain x == y or a range scan in case the // optimize a LIKE(x, y) into a plain x == y or a range scan in case the
// search is case-sensitive and the pattern is either a full match or a // search is case-sensitive and the pattern is either a full match or a
// left-most prefix // left-most prefix
// this is desirable in 99.999% of all cases, but would cause the following incompatibilities: // this is desirable in 99.999% of all cases, but would cause the following incompatibilities:
// - the AQL LIKE function will implicitly cast its operands to strings, whereas // - the AQL LIKE function will implicitly cast its operands to strings, whereas
// operator == in AQL will not do this. So LIKE(1, '1') would behave differently // operator == in AQL will not do this. So LIKE(1, '1') would behave differently
// when executed via the AQL LIKE function or via 1 == '1' // when executed via the AQL LIKE function or via 1 == '1'
// - for left-most prefix searches (e.g. LIKE(text, 'abc%')) we need to determine // - for left-most prefix searches (e.g. LIKE(text, 'abc%')) we need to determine
@ -3232,7 +3232,7 @@ AstNode* Ast::optimizeFunctionCall(AstNode* node) {
caseInsensitive = caseArg->isTrue(); caseInsensitive = caseArg->isTrue();
} }
} }
auto patternArg = args->getMember(1); auto patternArg = args->getMember(1);
if (!caseInsensitive && patternArg->isStringValue()) { if (!caseInsensitive && patternArg->isStringValue()) {
@ -3265,7 +3265,7 @@ AstNode* Ast::optimizeFunctionCall(AstNode* node) {
AstNode* op = createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND, lhs, rhs); AstNode* op = createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND, lhs, rhs);
if (wildcardIsLastChar) { if (wildcardIsLastChar) {
// replace LIKE with >= && <= // replace LIKE with >= && <=
return op; return op;
} }
// add >= && <=, but keep LIKE in place // add >= && <=, but keep LIKE in place

231
arangod/Aql/AstHelper.h Normal file
View File

@ -0,0 +1,231 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Christoph Uhde
////////////////////////////////////////////////////////////////////////////////
#ifndef ARANGOD_AQL_AST_HELPER_H
#define ARANGOD_AQL_AST_HELPER_H 1
#include "Aql/Ast.h"
#include "Aql/AstNode.h"
#include "Aql/Variable.h"
#include "Basics/SmallVector.h"
#include "Logger/Logger.h"
#include <unordered_set>
namespace {
auto doNothingVisitor = [](arangodb::aql::AstNode const*) {};
}
namespace arangodb {
namespace aql {
namespace ast {
namespace {
template < typename T>
T& operator<<(T& os, SmallVector<Variable const*> const& vec) {
os << "[ ";
for(auto const& var: vec) {
os << var->name << " ";
}
return os << "]";
}
bool accessesSearchVariableViaReference(AstNode const* current, Variable const* searchVariable) {
if (current->type == NODE_TYPE_INDEXED_ACCESS) {
auto sub = current->getMemberUnchecked(0);
if (sub->type == NODE_TYPE_REFERENCE) {
Variable const* v = static_cast<Variable const*>(sub->getData());
if (v->id == searchVariable->id) {
return true;
}
}
} else if (current->type == NODE_TYPE_EXPANSION) {
if (current->numMembers() < 2) {
return false;
}
auto it = current->getMemberUnchecked(0);
if (it->type != NODE_TYPE_ITERATOR || it->numMembers() != 2) {
return false;
}
auto sub1 = it->getMember(0);
auto sub2 = it->getMember(1);
if (sub1->type != NODE_TYPE_VARIABLE || sub2->type != NODE_TYPE_REFERENCE) {
return false;
}
Variable const* v = static_cast<Variable const*>(sub2->getData());
if (v->id == searchVariable->id) {
return true;
}
}
return false;
};
bool isTargetVariable(AstNode const* node, SmallVector<Variable const*>& searchVariables, bool& isSafeForOptimization) {
TRI_ASSERT(!searchVariables.empty());
TRI_ASSERT(node->type == NODE_TYPE_INDEXED_ACCESS || node->type == NODE_TYPE_EXPANSION);
// given and expression like g3[0].`g2`[0].`g1`[0].`item1`.`_id`
// this loop resolves subtrees of the form: .`g2`[0].`g1`[0]
// search variables should equal [g1, g2, g3]. Therefor we need not match the
// variable names from the vector with those in the expression while going
// forward in the vector the we go backward in the expression
auto current = node;
for(auto varIt = searchVariables.begin(); varIt != std::prev(searchVariables.end()); ++varIt) {
AstNode* next = nullptr;
if (current->type == NODE_TYPE_INDEXED_ACCESS) {
next = current->getMemberUnchecked(0);
} else if (current->type == NODE_TYPE_EXPANSION) {
if (current->numMembers() < 2) {
return false;
}
auto it = current->getMemberUnchecked(0);
TRI_ASSERT(it);
//The expansion is at the very end
if (it->type == NODE_TYPE_ITERATOR && it->numMembers() == 2) {
if (it->getMember(0)->type != NODE_TYPE_VARIABLE ) {
return false;
}
auto attributeAccess = it->getMember(1);
TRI_ASSERT(attributeAccess);
if (std::next(varIt) != searchVariables.end() && attributeAccess->type == NODE_TYPE_ATTRIBUTE_ACCESS){
next = attributeAccess;
} else {
return false;
}
} else {
// The expansion is not at the very end! we are unable to check if the
// variable will be accessed checking nested expansions is really crazy
// and would probably be best done in a recursive way. If the expansion
// is in the middle, then it would be still the very first node in the
// AST, having 2 subtrees that contain the other search variables
isSafeForOptimization = false; // could be an access - we can not tell
return false;
}
} else {
return false;
}
if(varIt == searchVariables.end()) {
return false;
}
if (next && next->type == NODE_TYPE_ATTRIBUTE_ACCESS &&
next->getString() == (*varIt)->name ) {
current = next->getMemberUnchecked(0);
} else if (next && next->type == NODE_TYPE_EXPANSION) {
isSafeForOptimization = false;
return false;
} else {
return false;
}
} // for nodes but last
if(!current) {
return false;
}
return accessesSearchVariableViaReference(current, searchVariables.back());
}
} // end - namespace unnamed
/// @brief determines the to-be-kept attribute of an INTO expression
inline std::unordered_set<std::string> getReferencedAttributesForKeep(
AstNode const* node, SmallVector<Variable const*> searchVariables, bool& isSafeForOptimization){
std::unordered_set<std::string> result;
isSafeForOptimization = true;
std::function<bool(AstNode const*)> visitor = [&isSafeForOptimization,
&result,
&searchVariables](AstNode const* node) {
if (!isSafeForOptimization) {
return false;
}
if (node->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
while (node->getMemberUnchecked(0)->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
node = node->getMemberUnchecked(0);
}
if (isTargetVariable(node->getMemberUnchecked(0), searchVariables, isSafeForOptimization)) {
result.emplace(node->getString());
// do not descend further
return false;
}
} else if (node->type == NODE_TYPE_REFERENCE) {
Variable const* v = static_cast<Variable const*>(node->getData());
if (v->id == searchVariables.front()->id) {
isSafeForOptimization = false; // the expression references the searched variable
return false;
}
} else if (node->type == NODE_TYPE_EXPANSION) {
if (isTargetVariable(node, searchVariables, isSafeForOptimization)) {
auto sub = node->getMemberUnchecked(1);
if (sub->type == NODE_TYPE_EXPANSION) {
sub = sub->getMemberUnchecked(0)->getMemberUnchecked(1);
}
if (sub->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
while (sub->getMemberUnchecked(0)->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
sub = sub->getMemberUnchecked(0);
}
result.emplace(sub->getString());
// do not descend further
return false;
}
}
}
else if (node->type == NODE_TYPE_INDEXED_ACCESS) {
auto sub = node->getMemberUnchecked(0);
if (sub->type == NODE_TYPE_REFERENCE) {
Variable const* v = static_cast<Variable const*>(sub->getData());
if (v->id == searchVariables.back()->id) {
isSafeForOptimization = false;
return false;
}
}
}
return true;
};
// Traverse AST and call visitor before recursing on each node
// as long as visitor returns true the traversal continues. In
// that branch
Ast::traverseReadOnly(node, visitor, ::doNothingVisitor);
return result;
}
} // end - namsepace ast
} // end - namespace aql
} // end - namespace arangodb
#endif

View File

@ -24,21 +24,24 @@
// Execution plans like the one below are made of Nodes that inherit the // Execution plans like the one below are made of Nodes that inherit the
// ExecutionNode class as a base class. // ExecutionNode class as a base class.
// //
// clang-format off
//
// Execution plan: // Execution plan:
// Id NodeType Est. Comment // Id NodeType Est. Comment
// 1 SingletonNode 1 * ROOT // 1 SingletonNode 1 * ROOT
// 2 EnumerateCollectionNode 6400 - FOR d IN coll /* full collection // 2 EnumerateCollectionNode 6400 - FOR d IN coll /* full collection scan */
// scan */ 3 CalculationNode 6400 - LET #1 = // 3 CalculationNode 6400 - LET #1 = DISTANCE(d.`lat`, d.`lon`, 0, 0) /* simple expression */ /* collections used: d : coll */
// DISTANCE(d.`lat`, d.`lon`, 0, 0) /* simple expression */ /* collections // 4 SortNode 6400 - SORT #1 ASC
// used: d : coll */ 4 SortNode 6400 - SORT #1 ASC 5 // 5 LimitNode 5 - LIMIT 0, 5
// LimitNode 5 - LIMIT 0, 5 6 ReturnNode 5 - // 6 ReturnNode 5 - RETURN d
// RETURN d //
// clang-format on
// //
// Even though the Singleton Node has a label saying it is the "ROOT" node it // Even though the Singleton Node has a label saying it is the "ROOT" node it
// is not in our definiton. Root Nodes are leaf nodes (at the bottom of the // is not in our definiton. Root Nodes are leaf nodes (at the bottom of the
// list). // list).
// //
// To get down (direction to root) from 4 to 5 you need to call getFirst Parent // To get down (direction to root) from 4 to 5 you need to call getFirstParent
// on the SortNode(4) to receive a pointer to the LimitNode(5). If you want to // on the SortNode(4) to receive a pointer to the LimitNode(5). If you want to
// go up from 5 to 4 (away from root) you need to call getFirstDependency at // go up from 5 to 4 (away from root) you need to call getFirstDependency at
// the LimitNode (5) to get a pointer to the SortNode(4). // the LimitNode (5) to get a pointer to the SortNode(4).

View File

@ -24,6 +24,7 @@
#include "OptimizerRules.h" #include "OptimizerRules.h"
#include "Aql/AqlItemBlock.h" #include "Aql/AqlItemBlock.h"
#include "Aql/AstHelper.h"
#include "Aql/ClusterNodes.h" #include "Aql/ClusterNodes.h"
#include "Aql/CollectNode.h" #include "Aql/CollectNode.h"
#include "Aql/CollectOptions.h" #include "Aql/CollectOptions.h"
@ -180,7 +181,7 @@ void replaceGatherNodeVariables(arangodb::aql::ExecutionPlan* plan,
// stringifying an expression may fail with "too long" error // stringifying an expression may fail with "too long" error
buffer.clear(); buffer.clear();
expr->stringify(&buffer); expr->stringify(&buffer);
if (cmp.size() == buffer.size() && if (cmp.size() == buffer.size() &&
cmp.compare(0, cmp.size(), buffer.c_str(), buffer.size()) == 0) { cmp.compare(0, cmp.size(), buffer.c_str(), buffer.size()) == 0) {
// finally a match! // finally a match!
it.var = it3.second; it.var = it3.second;
@ -1228,6 +1229,7 @@ void arangodb::aql::removeUnnecessaryFiltersRule(Optimizer* opt,
void arangodb::aql::removeCollectVariablesRule(Optimizer* opt, void arangodb::aql::removeCollectVariablesRule(Optimizer* opt,
std::unique_ptr<ExecutionPlan> plan, std::unique_ptr<ExecutionPlan> plan,
OptimizerRule const& rule) { OptimizerRule const& rule) {
SmallVector<ExecutionNode*>::allocator_type::arena_type a; SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a}; SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, EN::COLLECT, true); plan->findNodesOfType(nodes, EN::COLLECT, true);
@ -1250,38 +1252,66 @@ void arangodb::aql::removeCollectVariablesRule(Optimizer* opt,
modified = true; modified = true;
} else if (outVariable != nullptr && !collectNode->count() && } else if (outVariable != nullptr && !collectNode->count() &&
!collectNode->hasExpressionVariable() && !collectNode->hasKeepVariables()) { !collectNode->hasExpressionVariable() && !collectNode->hasKeepVariables()) {
// outVariable used later, no count, no INTO expression, no KEEP // outVariable used later, no count, no INTO expression, no KEEP
// e.g. COLLECT something INTO g // e.g. COLLECT something INTO g
// we will now check how many part of "g" are used later // we will now check how many parts of "g" are used later
std::unordered_set<std::string> keepAttributes; std::unordered_set<std::string> keepAttributes;
bool stop = false; SmallVector<Variable const*>::allocator_type::arena_type a;
auto p = collectNode->getFirstParent(); SmallVector<Variable const*> searchVariables{a};
while (p != nullptr) { searchVariables.push_back(outVariable);
if (p->getType() == EN::CALCULATION) {
auto cc = ExecutionNode::castTo<CalculationNode const*>(p); bool doOptimize = true;
auto planNode = collectNode->getFirstParent();
while (planNode != nullptr && doOptimize) {
if (planNode->getType() == EN::CALCULATION) {
auto cc = ExecutionNode::castTo<CalculationNode const*>(planNode);
Expression const* exp = cc->expression(); Expression const* exp = cc->expression();
if (exp != nullptr && exp->node() != nullptr) { if (exp != nullptr && exp->node() != nullptr && !searchVariables.empty()) {
bool isSafeForOptimization; bool isSafeForOptimization;
auto usedThere = auto usedThere =
Ast::getReferencedAttributesForKeep(exp->node(), outVariable, ast::getReferencedAttributesForKeep(exp->node(), searchVariables,
isSafeForOptimization); isSafeForOptimization);
if (isSafeForOptimization) { if (isSafeForOptimization) {
for (auto const& it : usedThere) { for (auto const& it : usedThere) {
keepAttributes.emplace(it); keepAttributes.emplace(it);
} }
} else { } else {
stop = true; doOptimize = false;
break;
} }
} // end - expression exists
} else if (planNode->getType() == EN::COLLECT) {
auto innerCollectNode = ExecutionNode::castTo<CollectNode const*>(planNode);
if (innerCollectNode->hasOutVariable()) {
// We have the following situation:
//
// COLLECT v1 = doc._id INTO g1
// COLLECT v2 = doc._id INTO g2
//
searchVariables.push_back(innerCollectNode->outVariable());
} else {
// when we find another COLLECT, it will invalidate all
// previous variables in the scope
searchVariables.clear();
}
} else {
auto here = planNode->getVariableIdsUsedHere();
if(here.find(searchVariables.back()->id) != here.end()){
// the outVariable of the last collect should not be used by any following node directly
doOptimize = false;
break;
} }
} }
if (stop) {
break;
}
p = p->getFirstParent();
}
if (!stop) { planNode = planNode->getFirstParent();
} // end - inspection of nodes below the found collect node - while valid planNode
if (doOptimize) {
std::vector<Variable const*> keepVariables; std::vector<Variable const*> keepVariables;
// we are allowed to do the optimization // we are allowed to do the optimization
auto current = n->getFirstDependency(); auto current = n->getFirstDependency();
@ -1302,14 +1332,14 @@ void arangodb::aql::removeCollectVariablesRule(Optimizer* opt,
break; break;
} }
current = current->getFirstDependency(); current = current->getFirstDependency();
} } // while current
if (keepAttributes.empty() && !keepVariables.empty()) { if (keepAttributes.empty() && !keepVariables.empty()) {
collectNode->setKeepVariables(std::move(keepVariables)); collectNode->setKeepVariables(std::move(keepVariables));
modified = true; modified = true;
} }
} } // end - if doOptimize
} } // end - if collectNode has outVariable
collectNode->clearAggregates( collectNode->clearAggregates(
[&varsUsedLater, &modified]( [&varsUsedLater, &modified](
@ -1321,19 +1351,19 @@ void arangodb::aql::removeCollectVariablesRule(Optimizer* opt,
} }
return false; return false;
}); });
}
} // for node in nodes
opt->addPlan(std::move(plan), rule, modified); opt->addPlan(std::move(plan), rule, modified);
} }
class PropagateConstantAttributesHelper { class PropagateConstantAttributesHelper {
public: public:
explicit PropagateConstantAttributesHelper(ExecutionPlan* plan) explicit PropagateConstantAttributesHelper(ExecutionPlan* plan)
: _plan(plan), _modified(false) {} : _plan(plan), _modified(false) {}
bool modified() const { return _modified; } bool modified() const { return _modified; }
/// @brief inspects a plan and propages constant values in expressions /// @brief inspects a plan and propagates constant values in expressions
void propagateConstants() { void propagateConstants() {
SmallVector<ExecutionNode*>::allocator_type::arena_type a; SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a}; SmallVector<ExecutionNode*> nodes{a};
@ -1502,7 +1532,7 @@ class PropagateConstantAttributesHelper {
void insertConstantAttribute(AstNode* parentNode, size_t accessIndex) { void insertConstantAttribute(AstNode* parentNode, size_t accessIndex) {
Variable const* variable = nullptr; Variable const* variable = nullptr;
std::string name; std::string name;
AstNode* member = parentNode->getMember(accessIndex); AstNode* member = parentNode->getMember(accessIndex);
if (!getAttribute(member, variable, name)) { if (!getAttribute(member, variable, name)) {
@ -1522,7 +1552,7 @@ class PropagateConstantAttributesHelper {
current = current->getMember(0); current = current->getMember(0);
if (current->type == NODE_TYPE_REFERENCE) { if (current->type == NODE_TYPE_REFERENCE) {
auto setter = _plan->getVarSetBy(static_cast<Variable const*>(current->getData())->id); auto setter = _plan->getVarSetBy(static_cast<Variable const*>(current->getData())->id);
if (setter != nullptr && if (setter != nullptr &&
(setter->getType() == EN::ENUMERATE_COLLECTION || setter->getType() == EN::INDEX)) { (setter->getType() == EN::ENUMERATE_COLLECTION || setter->getType() == EN::INDEX)) {
auto collection = ::getCollection(setter); auto collection = ::getCollection(setter);
if (collection != nullptr) { if (collection != nullptr) {
@ -2881,17 +2911,17 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
std::unordered_map<VariableId, AstNode const*> _variableDefinitions; std::unordered_map<VariableId, AstNode const*> _variableDefinitions;
std::vector<std::vector<RegisterId>> _filters; std::vector<std::vector<RegisterId>> _filters;
bool _modified; bool _modified;
public: public:
explicit SortToIndexNode(ExecutionPlan* plan) explicit SortToIndexNode(ExecutionPlan* plan)
: _plan(plan), _sortNode(nullptr), _modified(false) { : _plan(plan), _sortNode(nullptr), _modified(false) {
_filters.emplace_back(); _filters.emplace_back();
} }
/// @brief gets the attributes from the filter conditions that will have a /// @brief gets the attributes from the filter conditions that will have a
/// constant value (e.g. doc.value == 123) or than can be proven to be != null /// constant value (e.g. doc.value == 123) or than can be proven to be != null
void getSpecialAttributes(AstNode const* node, void getSpecialAttributes(AstNode const* node,
Variable const* variable, Variable const* variable,
std::vector<std::vector<arangodb::basics::AttributeName>>& constAttributes, std::vector<std::vector<arangodb::basics::AttributeName>>& constAttributes,
arangodb::HashSet<std::vector<arangodb::basics::AttributeName>>& nonNullAttributes) const { arangodb::HashSet<std::vector<arangodb::basics::AttributeName>>& nonNullAttributes) const {
if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) { if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) {
@ -2899,8 +2929,8 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
getSpecialAttributes(node->getMemberUnchecked(0), variable, constAttributes, nonNullAttributes); getSpecialAttributes(node->getMemberUnchecked(0), variable, constAttributes, nonNullAttributes);
getSpecialAttributes(node->getMemberUnchecked(1), variable, constAttributes, nonNullAttributes); getSpecialAttributes(node->getMemberUnchecked(1), variable, constAttributes, nonNullAttributes);
return; return;
} }
if (!node->isComparisonOperator()) { if (!node->isComparisonOperator()) {
return; return;
} }
@ -2944,15 +2974,15 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
// doc.value >= const value // doc.value >= const value
check = lhs; check = lhs;
} }
if (check == nullptr) { if (check == nullptr) {
// condition is useless for us // condition is useless for us
return; return;
} }
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> result; std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> result;
if (check->isAttributeAccessForVariable(result, false) && if (check->isAttributeAccessForVariable(result, false) &&
result.first == variable) { result.first == variable) {
if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) { if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
// found a constant value // found a constant value
@ -2964,7 +2994,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
} }
} }
void processCollectionAttributes(Variable const* variable, void processCollectionAttributes(Variable const* variable,
std::vector<std::vector<arangodb::basics::AttributeName>>& constAttributes, std::vector<std::vector<arangodb::basics::AttributeName>>& constAttributes,
arangodb::HashSet<std::vector<arangodb::basics::AttributeName>>& nonNullAttributes) const { arangodb::HashSet<std::vector<arangodb::basics::AttributeName>>& nonNullAttributes) const {
// resolve all FILTER variables into their appropriate filter conditions // resolve all FILTER variables into their appropriate filter conditions
@ -2997,7 +3027,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
SortCondition sortCondition(_plan, _sorts, constAttributes, nonNullAttributes, _variableDefinitions); SortCondition sortCondition(_plan, _sorts, constAttributes, nonNullAttributes, _variableDefinitions);
if (!sortCondition.isEmpty() && if (!sortCondition.isEmpty() &&
sortCondition.isOnlyAttributeAccess() && sortCondition.isOnlyAttributeAccess() &&
sortCondition.isUnidirectional()) { sortCondition.isUnidirectional()) {
// we have found a sort condition, which is unidirectional // we have found a sort condition, which is unidirectional
@ -3073,7 +3103,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
// index conditions do not guarantee sortedness // index conditions do not guarantee sortedness
return true; return true;
} }
if (isSparse) { if (isSparse) {
return true; return true;
} }
@ -3094,7 +3124,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
// if we get here, we either have one index or multiple indexes on the same // if we get here, we either have one index or multiple indexes on the same
// attributes // attributes
bool handled = false; bool handled = false;
if (indexes.size() == 1 && isSorted) { if (indexes.size() == 1 && isSorted) {
// if we have just a single index and we can use it for the filtering // if we have just a single index and we can use it for the filtering
// condition, then we can use the index for sorting, too. regardless of if // condition, then we can use the index for sorting, too. regardless of if
@ -3244,7 +3274,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
case EN::FILTER: { case EN::FILTER: {
auto inVariable = ExecutionNode::castTo<FilterNode const*>(en)->inVariable()->id; auto inVariable = ExecutionNode::castTo<FilterNode const*>(en)->inVariable()->id;
_filters.back().emplace_back(inVariable); _filters.back().emplace_back(inVariable);
return false; return false;
} }
case EN::CALCULATION: { case EN::CALCULATION: {
@ -3294,7 +3324,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
} }
return true; return true;
} }
void after(ExecutionNode* en) override final { void after(ExecutionNode* en) override final {
if (en->getType() == EN::SUBQUERY) { if (en->getType() == EN::SUBQUERY) {
TRI_ASSERT(!_filters.empty()); TRI_ASSERT(!_filters.empty());
@ -4032,7 +4062,7 @@ void arangodb::aql::collectInClusterRule(Optimizer* opt, std::unique_ptr<Executi
} else if (current->getType() == ExecutionNode::REMOTE) { } else if (current->getType() == ExecutionNode::REMOTE) {
auto previous = current->getFirstDependency(); auto previous = current->getFirstDependency();
// now we are on a DB server // now we are on a DB server
{ {
// check if we will deal with more than one shard // check if we will deal with more than one shard
// if the remote one has one shard, the optimization will actually // if the remote one has one shard, the optimization will actually
@ -4639,7 +4669,7 @@ void arangodb::aql::restrictToSingleShardRule(Optimizer* opt,
arangodb::HashSet<ExecutionNode*> toUnlink; arangodb::HashSet<ExecutionNode*> toUnlink;
std::map<Collection const*, std::unordered_set<std::string>> modificationRestrictions; std::map<Collection const*, std::unordered_set<std::string>> modificationRestrictions;
// forward a shard key restriction from one collection to the other if the two collections // forward a shard key restriction from one collection to the other if the two collections
// are used in a smart join (and use distributeShardsLike on each other) // are used in a smart join (and use distributeShardsLike on each other)
auto forwardRestrictionToPrototype = [&plan](ExecutionNode const* current, std::string const& shardId) { auto forwardRestrictionToPrototype = [&plan](ExecutionNode const* current, std::string const& shardId) {
@ -4653,14 +4683,14 @@ void arangodb::aql::restrictToSingleShardRule(Optimizer* opt,
} }
auto setter = plan->getVarSetBy(prototypeOutVariable->id); auto setter = plan->getVarSetBy(prototypeOutVariable->id);
if (setter == nullptr || if (setter == nullptr ||
(setter->getType() != EN::INDEX && setter->getType() != EN::ENUMERATE_COLLECTION)) { (setter->getType() != EN::INDEX && setter->getType() != EN::ENUMERATE_COLLECTION)) {
return; return;
} }
auto s1 = ::getCollection(current)->shardIds(); auto s1 = ::getCollection(current)->shardIds();
auto s2 = ::getCollection(setter)->shardIds(); auto s2 = ::getCollection(setter)->shardIds();
if (s1->size() != s2->size()) { if (s1->size() != s2->size()) {
// different number of shard ids... should not happen if we have a prototype // different number of shard ids... should not happen if we have a prototype
return; return;
@ -6532,7 +6562,7 @@ bool checkGeoFilterExpression(ExecutionPlan* plan, AstNode const* node, GeoIndex
} }
static bool optimizeSortNode(ExecutionPlan* plan, SortNode* sort, GeoIndexInfo& info) { static bool optimizeSortNode(ExecutionPlan* plan, SortNode* sort, GeoIndexInfo& info) {
// note: info will only be modified if the function returns true // note: info will only be modified if the function returns true
TRI_ASSERT(sort->getType() == EN::SORT); TRI_ASSERT(sort->getType() == EN::SORT);
// we're looking for "SORT DISTANCE(x,y,a,b)" // we're looking for "SORT DISTANCE(x,y,a,b)"
SortElementVector const& elements = sort->elements(); SortElementVector const& elements = sort->elements();
@ -6553,7 +6583,7 @@ static bool optimizeSortNode(ExecutionPlan* plan, SortNode* sort, GeoIndexInfo&
return false; // the expression must exist and must have an astNode return false; // the expression must exist and must have an astNode
} }
// info will only be modified if the function returns true // info will only be modified if the function returns true
bool legacy = elements[0].ascending; // DESC is only supported on S2 index bool legacy = elements[0].ascending; // DESC is only supported on S2 index
if (!info.sorted && checkDistanceFunc(plan, expr->node(), legacy, info)) { if (!info.sorted && checkDistanceFunc(plan, expr->node(), legacy, info)) {
info.sorted = true; // do not parse another SORT info.sorted = true; // do not parse another SORT
@ -6579,7 +6609,7 @@ static void optimizeFilterNode(ExecutionPlan* plan, FilterNode* fn, GeoIndexInfo
// now check who introduced our variable // now check who introduced our variable
ExecutionNode* setter = plan->getVarSetBy(variable->id); ExecutionNode* setter = plan->getVarSetBy(variable->id);
if (setter == nullptr || setter->getType() != EN::CALCULATION) { if (setter == nullptr || setter->getType() != EN::CALCULATION) {
return; return;
} }
CalculationNode* calc = ExecutionNode::castTo<CalculationNode*>(setter); CalculationNode* calc = ExecutionNode::castTo<CalculationNode*>(setter);
Expression* expr = calc->expression(); Expression* expr = calc->expression();
@ -6833,7 +6863,7 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, std::unique_ptr<ExecutionPlan>
limit = nullptr; limit = nullptr;
info.sorted = false; info.sorted = false;
// don't allow picking up either sort or limit from here on // don't allow picking up either sort or limit from here on
canUseSortLimit = false; canUseSortLimit = false;
} }
current = current->getFirstParent(); // inspect next node current = current->getFirstParent(); // inspect next node
} }

View File

@ -38,7 +38,7 @@ var isEqual = helper.isEqual;
function optimizerRuleTestSuite () { function optimizerRuleTestSuite () {
var ruleName = "remove-collect-variables"; var ruleName = "remove-collect-variables";
// various choices to control the optimizer: // various choices to control the optimizer:
var paramNone = { optimizer: { rules: [ "-all" ] } }; var paramNone = { optimizer: { rules: [ "-all" ] } };
var paramEnabled = { optimizer: { rules: [ "-all", "+" + ruleName ] } }; var paramEnabled = { optimizer: { rules: [ "-all", "+" + ruleName ] } };
var paramDisabled = { optimizer: { rules: [ "+all", "-" + ruleName ] } }; var paramDisabled = { optimizer: { rules: [ "+all", "-" + ruleName ] } };
@ -64,7 +64,7 @@ function optimizerRuleTestSuite () {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
testRuleDisabled : function () { testRuleDisabled : function () {
var queries = [ var queries = [
"FOR i IN 1..10 COLLECT a = i INTO group RETURN a", "FOR i IN 1..10 COLLECT a = i INTO group RETURN a",
"FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN a", "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN a",
"FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b }" "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b }"
@ -81,10 +81,10 @@ function optimizerRuleTestSuite () {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
testRuleNoEffect : function () { testRuleNoEffect : function () {
var queries = [ var queries = [
"FOR i IN 1..10 COLLECT a = i RETURN a", "FOR i IN 1..10 COLLECT a = i RETURN a",
"FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j RETURN a", "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j RETURN a",
"FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b, group: group }" "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b, group: group }",
]; ];
queries.forEach(function(query) { queries.forEach(function(query) {
@ -98,10 +98,10 @@ function optimizerRuleTestSuite () {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
testRuleHasEffect : function () { testRuleHasEffect : function () {
var queries = [ var queries = [
"FOR i IN 1..10 COLLECT a = i INTO group RETURN a", "FOR i IN 1..10 COLLECT a = i INTO group RETURN a",
"FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN a", "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN a",
"FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b }" "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b }",
]; ];
queries.forEach(function(query) { queries.forEach(function(query) {
@ -116,7 +116,7 @@ function optimizerRuleTestSuite () {
testPlans : function () { testPlans : function () {
var plans = [ var plans = [
[ "FOR i IN 1..10 COLLECT a = i INTO group RETURN a", [ "SingletonNode", "CalculationNode", "EnumerateListNode", "SortNode", "CollectNode", "ReturnNode" ] ], [ "FOR i IN 1..10 COLLECT a = i INTO group RETURN a", [ "SingletonNode", "CalculationNode", "EnumerateListNode", "SortNode", "CollectNode", "ReturnNode" ] ],
[ "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN a", [ "SingletonNode", "CalculationNode", "EnumerateListNode", "CalculationNode", "EnumerateListNode", "SortNode", "CollectNode", "ReturnNode" ] ], [ "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN a", [ "SingletonNode", "CalculationNode", "EnumerateListNode", "CalculationNode", "EnumerateListNode", "SortNode", "CollectNode", "ReturnNode" ] ],
[ "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b }", [ "SingletonNode", "CalculationNode", "EnumerateListNode", "CalculationNode", "EnumerateListNode", "SortNode", "CollectNode", "CalculationNode", "ReturnNode" ] ] [ "FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b }", [ "SingletonNode", "CalculationNode", "EnumerateListNode", "CalculationNode", "EnumerateListNode", "SortNode", "CollectNode", "CalculationNode", "ReturnNode" ] ]
@ -134,14 +134,15 @@ function optimizerRuleTestSuite () {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
testResults : function () { testResults : function () {
var queries = [ var queries = [
[ "FOR i IN 1..10 COLLECT a = i INTO group RETURN a", [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] ], [ "FOR i IN 1..10 COLLECT a = i INTO group RETURN a", [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] ],
[ "FOR i IN 1..2 FOR j IN 1..2 COLLECT a = i, b = j INTO group RETURN [ a, b ]", [ [ 1, 1 ], [ 1, 2 ], [ 2, 1 ], [ 2, 2 ] ] ] [ "FOR i IN 1..2 FOR j IN 1..2 COLLECT a = i, b = j INTO group RETURN [ a, b ]", [ [ 1, 1 ], [ 1, 2 ], [ 2, 1 ], [ 2, 2 ] ] ],
]; ];
queries.forEach(function(query) { queries.forEach(function(query) {
var planDisabled = AQL_EXPLAIN(query[0], { }, paramDisabled); var planDisabled = AQL_EXPLAIN(query[0], { }, paramDisabled);
var planEnabled = AQL_EXPLAIN(query[0], { }, paramEnabled); var planEnabled = AQL_EXPLAIN(query[0], { }, paramEnabled);
var resultDisabled = AQL_EXECUTE(query[0], { }, paramDisabled).json; var resultDisabled = AQL_EXECUTE(query[0], { }, paramDisabled).json;
var resultEnabled = AQL_EXECUTE(query[0], { }, paramEnabled).json; var resultEnabled = AQL_EXECUTE(query[0], { }, paramEnabled).json;
@ -153,7 +154,231 @@ function optimizerRuleTestSuite () {
assertEqual(resultDisabled, query[1]); assertEqual(resultDisabled, query[1]);
assertEqual(resultEnabled, query[1]); assertEqual(resultEnabled, query[1]);
}); });
} },
testNestingRuleNotUsed1 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id1 = item1._id INTO g1
COLLECT id2 = g1[0].item2._id INTO g2
RETURN g2[0]
`;
const expected = [
{ "g1" : [ { "item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" },
"item3" : { "_id" : "ID" },
}
],
"item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" },
"item3" : { "_id" : "ID" },
"id1" : "ID"
}
];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleNotUsed2 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].item2._id INTO other
let b = other[0]
RETURN b
`;
const expected = [
{ "first" : [ { "item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" },
"item3" : { "_id" : "ID" },
}
],
"item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" },
"item3" : { "_id" : "ID" },
"id" : "ID"
}
];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleNotUsed3 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].item2._id INTO other
RETURN other
`;
const expected = [ [
{ "first" : [ { "item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" },
"item3" : { "_id" : "ID" },
}
],
"item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" },
"item3" : { "_id" : "ID" },
"id" : "ID"
}
] ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed1 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].item2._id INTO other
RETURN other[0].id
`;
const expected = [ "ID" ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed2 : function() {
const query = `
LET items = [ { "_id" : 42 }]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].item2._id INTO other
let b = 1 + other[0].first[0].item1._id
RETURN b
`;
const expected = [ 43 ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed3 : function() {
const query = `
LET items = [ { "_id" : 42 }]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].item2._id INTO other
let b = [ other[0].first[0], "blub" ]
RETURN b
`;
const expected = [
[ { "item3" : { "_id" : 42 },
"item1" : { "_id" : 42 },
"item2" : { "_id" : 42 } } , "blub" ] ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed4 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].id INTO other
RETURN other[0].id
`;
const expected = [ "ID" ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed5 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].id INTO other
RETURN other[0].first
`;
const expected = [ [ { "item3" : { "_id" : "ID" },
"item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" } } ] ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed6 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].id INTO other
RETURN other[0].first[0]
`;
const expected = [ { "item3" : { "_id" : "ID" },
"item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" } } ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed7 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
COLLECT unused1 = item1._id INTO first
COLLECT unused2 = first[0].item1._id INTO other
RETURN other[0].first[0].item1._id
`;
const expected = [ "ID" ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
}; };
} }