1
0
Fork 0

nested collect backport 3.5 (#9587)

* stop optimization for nested collects (#9484)

* Update CHANGELOG
This commit is contained in:
Jan Christoph Uhde 2019-07-29 15:57:29 +02:00 committed by KVS85
parent 7813c76f32
commit f995c46b71
6 changed files with 565 additions and 74 deletions

View File

@ -1,6 +1,8 @@
v3.5.0-rc.6 (2019-07-27)
v3.5.0-rc.6 (2019-XX-XX)
------------------------
* Fixed issue #9459: Optimization rule remove-collect-variables does not KEEP all necessary data.
* Added gzip and encryption options to arangoimport and arangoexport.
* Added missing REST API route GET /_api/transaction for retrieving the list of

231
arangod/Aql/AstHelper.h Normal file
View File

@ -0,0 +1,231 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Christoph Uhde
////////////////////////////////////////////////////////////////////////////////
#ifndef ARANGOD_AQL_AST_HELPER_H
#define ARANGOD_AQL_AST_HELPER_H 1
#include "Aql/Ast.h"
#include "Aql/AstNode.h"
#include "Aql/Variable.h"
#include "Basics/SmallVector.h"
#include "Logger/Logger.h"
#include <unordered_set>
namespace {
auto doNothingVisitor = [](arangodb::aql::AstNode const*) {};
}
namespace arangodb {
namespace aql {
namespace ast {
namespace {
template < typename T>
T& operator<<(T& os, SmallVector<Variable const*> const& vec) {
os << "[ ";
for(auto const& var: vec) {
os << var->name << " ";
}
return os << "]";
}
bool accessesSearchVariableViaReference(AstNode const* current, Variable const* searchVariable) {
if (current->type == NODE_TYPE_INDEXED_ACCESS) {
auto sub = current->getMemberUnchecked(0);
if (sub->type == NODE_TYPE_REFERENCE) {
Variable const* v = static_cast<Variable const*>(sub->getData());
if (v->id == searchVariable->id) {
return true;
}
}
} else if (current->type == NODE_TYPE_EXPANSION) {
if (current->numMembers() < 2) {
return false;
}
auto it = current->getMemberUnchecked(0);
if (it->type != NODE_TYPE_ITERATOR || it->numMembers() != 2) {
return false;
}
auto sub1 = it->getMember(0);
auto sub2 = it->getMember(1);
if (sub1->type != NODE_TYPE_VARIABLE || sub2->type != NODE_TYPE_REFERENCE) {
return false;
}
Variable const* v = static_cast<Variable const*>(sub2->getData());
if (v->id == searchVariable->id) {
return true;
}
}
return false;
};
bool isTargetVariable(AstNode const* node, SmallVector<Variable const*>& searchVariables, bool& isSafeForOptimization) {
TRI_ASSERT(!searchVariables.empty());
TRI_ASSERT(node->type == NODE_TYPE_INDEXED_ACCESS || node->type == NODE_TYPE_EXPANSION);
// given and expression like g3[0].`g2`[0].`g1`[0].`item1`.`_id`
// this loop resolves subtrees of the form: .`g2`[0].`g1`[0]
// search variables should equal [g1, g2, g3]. Therefor we need not match the
// variable names from the vector with those in the expression while going
// forward in the vector the we go backward in the expression
auto current = node;
for(auto varIt = searchVariables.begin(); varIt != std::prev(searchVariables.end()); ++varIt) {
AstNode* next = nullptr;
if (current->type == NODE_TYPE_INDEXED_ACCESS) {
next = current->getMemberUnchecked(0);
} else if (current->type == NODE_TYPE_EXPANSION) {
if (current->numMembers() < 2) {
return false;
}
auto it = current->getMemberUnchecked(0);
TRI_ASSERT(it);
//The expansion is at the very end
if (it->type == NODE_TYPE_ITERATOR && it->numMembers() == 2) {
if (it->getMember(0)->type != NODE_TYPE_VARIABLE ) {
return false;
}
auto attributeAccess = it->getMember(1);
TRI_ASSERT(attributeAccess);
if (std::next(varIt) != searchVariables.end() && attributeAccess->type == NODE_TYPE_ATTRIBUTE_ACCESS){
next = attributeAccess;
} else {
return false;
}
} else {
// The expansion is not at the very end! we are unable to check if the
// variable will be accessed checking nested expansions is really crazy
// and would probably be best done in a recursive way. If the expansion
// is in the middle, then it would be still the very first node in the
// AST, having 2 subtrees that contain the other search variables
isSafeForOptimization = false; // could be an access - we can not tell
return false;
}
} else {
return false;
}
if(varIt == searchVariables.end()) {
return false;
}
if (next && next->type == NODE_TYPE_ATTRIBUTE_ACCESS &&
next->getString() == (*varIt)->name ) {
current = next->getMemberUnchecked(0);
} else if (next && next->type == NODE_TYPE_EXPANSION) {
isSafeForOptimization = false;
return false;
} else {
return false;
}
} // for nodes but last
if(!current) {
return false;
}
return accessesSearchVariableViaReference(current, searchVariables.back());
}
} // end - namespace unnamed
/// @brief determines the to-be-kept attribute of an INTO expression
inline std::unordered_set<std::string> getReferencedAttributesForKeep(
AstNode const* node, SmallVector<Variable const*> searchVariables, bool& isSafeForOptimization){
std::unordered_set<std::string> result;
isSafeForOptimization = true;
std::function<bool(AstNode const*)> visitor = [&isSafeForOptimization,
&result,
&searchVariables](AstNode const* node) {
if (!isSafeForOptimization) {
return false;
}
if (node->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
while (node->getMemberUnchecked(0)->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
node = node->getMemberUnchecked(0);
}
if (isTargetVariable(node->getMemberUnchecked(0), searchVariables, isSafeForOptimization)) {
result.emplace(node->getString());
// do not descend further
return false;
}
} else if (node->type == NODE_TYPE_REFERENCE) {
Variable const* v = static_cast<Variable const*>(node->getData());
if (v->id == searchVariables.front()->id) {
isSafeForOptimization = false; // the expression references the searched variable
return false;
}
} else if (node->type == NODE_TYPE_EXPANSION) {
if (isTargetVariable(node, searchVariables, isSafeForOptimization)) {
auto sub = node->getMemberUnchecked(1);
if (sub->type == NODE_TYPE_EXPANSION) {
sub = sub->getMemberUnchecked(0)->getMemberUnchecked(1);
}
if (sub->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
while (sub->getMemberUnchecked(0)->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
sub = sub->getMemberUnchecked(0);
}
result.emplace(sub->getString());
// do not descend further
return false;
}
}
}
else if (node->type == NODE_TYPE_INDEXED_ACCESS) {
auto sub = node->getMemberUnchecked(0);
if (sub->type == NODE_TYPE_REFERENCE) {
Variable const* v = static_cast<Variable const*>(sub->getData());
if (v->id == searchVariables.back()->id) {
isSafeForOptimization = false;
return false;
}
}
}
return true;
};
// Traverse AST and call visitor before recursing on each node
// as long as visitor returns true the traversal continues. In
// that branch
Ast::traverseReadOnly(node, visitor, ::doNothingVisitor);
return result;
}
} // end - namsepace ast
} // end - namespace aql
} // end - namespace arangodb
#endif

View File

@ -24,15 +24,18 @@
// Execution plans like the one below are made of Nodes that inherit the
// ExecutionNode class as a base class.
//
// clang-format off
//
// Execution plan:
// Id NodeType Est. Comment
// 1 SingletonNode 1 * ROOT
// 2 EnumerateCollectionNode 6400 - FOR d IN coll /* full collection
// scan */ 3 CalculationNode 6400 - LET #1 =
// DISTANCE(d.`lat`, d.`lon`, 0, 0) /* simple expression */ /* collections
// used: d : coll */ 4 SortNode 6400 - SORT #1 ASC 5
// LimitNode 5 - LIMIT 0, 5 6 ReturnNode 5 -
// RETURN d
// 2 EnumerateCollectionNode 6400 - FOR d IN coll /* full collection scan */
// 3 CalculationNode 6400 - LET #1 = DISTANCE(d.`lat`, d.`lon`, 0, 0) /* simple expression */ /* collections used: d : coll */
// 4 SortNode 6400 - SORT #1 ASC
// 5 LimitNode 5 - LIMIT 0, 5
// 6 ReturnNode 5 - RETURN d
//
// clang-format on
//
// Even though the Singleton Node has a label saying it is the "ROOT" node it
// is not in our definiton. Root Nodes are leaf nodes (at the bottom of the

View File

@ -24,6 +24,7 @@
#include "OptimizerRules.h"
#include "Aql/AqlItemBlock.h"
#include "Aql/AstHelper.h"
#include "Aql/ClusterNodes.h"
#include "Aql/CollectNode.h"
#include "Aql/CollectOptions.h"
@ -1228,6 +1229,7 @@ void arangodb::aql::removeUnnecessaryFiltersRule(Optimizer* opt,
void arangodb::aql::removeCollectVariablesRule(Optimizer* opt,
std::unique_ptr<ExecutionPlan> plan,
OptimizerRule const& rule) {
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, EN::COLLECT, true);
@ -1250,38 +1252,66 @@ void arangodb::aql::removeCollectVariablesRule(Optimizer* opt,
modified = true;
} else if (outVariable != nullptr && !collectNode->count() &&
!collectNode->hasExpressionVariable() && !collectNode->hasKeepVariables()) {
// outVariable used later, no count, no INTO expression, no KEEP
// e.g. COLLECT something INTO g
// we will now check how many part of "g" are used later
// we will now check how many parts of "g" are used later
std::unordered_set<std::string> keepAttributes;
bool stop = false;
auto p = collectNode->getFirstParent();
while (p != nullptr) {
if (p->getType() == EN::CALCULATION) {
auto cc = ExecutionNode::castTo<CalculationNode const*>(p);
SmallVector<Variable const*>::allocator_type::arena_type a;
SmallVector<Variable const*> searchVariables{a};
searchVariables.push_back(outVariable);
bool doOptimize = true;
auto planNode = collectNode->getFirstParent();
while (planNode != nullptr && doOptimize) {
if (planNode->getType() == EN::CALCULATION) {
auto cc = ExecutionNode::castTo<CalculationNode const*>(planNode);
Expression const* exp = cc->expression();
if (exp != nullptr && exp->node() != nullptr) {
if (exp != nullptr && exp->node() != nullptr && !searchVariables.empty()) {
bool isSafeForOptimization;
auto usedThere =
Ast::getReferencedAttributesForKeep(exp->node(), outVariable,
ast::getReferencedAttributesForKeep(exp->node(), searchVariables,
isSafeForOptimization);
if (isSafeForOptimization) {
for (auto const& it : usedThere) {
keepAttributes.emplace(it);
}
} else {
stop = true;
}
}
}
if (stop) {
doOptimize = false;
break;
}
} // end - expression exists
} else if (planNode->getType() == EN::COLLECT) {
auto innerCollectNode = ExecutionNode::castTo<CollectNode const*>(planNode);
if (innerCollectNode->hasOutVariable()) {
// We have the following situation:
//
// COLLECT v1 = doc._id INTO g1
// COLLECT v2 = doc._id INTO g2
//
searchVariables.push_back(innerCollectNode->outVariable());
} else {
// when we find another COLLECT, it will invalidate all
// previous variables in the scope
searchVariables.clear();
}
} else {
auto here = planNode->getVariableIdsUsedHere();
if(here.find(searchVariables.back()->id) != here.end()){
// the outVariable of the last collect should not be used by any following node directly
doOptimize = false;
break;
}
p = p->getFirstParent();
}
if (!stop) {
planNode = planNode->getFirstParent();
} // end - inspection of nodes below the found collect node - while valid planNode
if (doOptimize) {
std::vector<Variable const*> keepVariables;
// we are allowed to do the optimization
auto current = n->getFirstDependency();
@ -1302,14 +1332,14 @@ void arangodb::aql::removeCollectVariablesRule(Optimizer* opt,
break;
}
current = current->getFirstDependency();
}
} // while current
if (keepAttributes.empty() && !keepVariables.empty()) {
collectNode->setKeepVariables(std::move(keepVariables));
modified = true;
}
}
}
} // end - if doOptimize
} // end - if collectNode has outVariable
collectNode->clearAggregates(
[&varsUsedLater, &modified](
@ -1321,8 +1351,8 @@ void arangodb::aql::removeCollectVariablesRule(Optimizer* opt,
}
return false;
});
}
} // for node in nodes
opt->addPlan(std::move(plan), rule, modified);
}
@ -1333,7 +1363,7 @@ class PropagateConstantAttributesHelper {
bool modified() const { return _modified; }
/// @brief inspects a plan and propages constant values in expressions
/// @brief inspects a plan and propagates constant values in expressions
void propagateConstants() {
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};

View File

@ -84,7 +84,7 @@ function optimizerRuleTestSuite () {
var queries = [
"FOR i IN 1..10 COLLECT a = i RETURN a",
"FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j RETURN a",
"FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b, group: group }"
"FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b, group: group }",
];
queries.forEach(function(query) {
@ -101,7 +101,7 @@ function optimizerRuleTestSuite () {
var queries = [
"FOR i IN 1..10 COLLECT a = i INTO group RETURN a",
"FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN a",
"FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b }"
"FOR i IN 1..10 FOR j IN 1..10 COLLECT a = i, b = j INTO group RETURN { a: a, b : b }",
];
queries.forEach(function(query) {
@ -136,12 +136,13 @@ function optimizerRuleTestSuite () {
testResults : function () {
var queries = [
[ "FOR i IN 1..10 COLLECT a = i INTO group RETURN a", [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] ],
[ "FOR i IN 1..2 FOR j IN 1..2 COLLECT a = i, b = j INTO group RETURN [ a, b ]", [ [ 1, 1 ], [ 1, 2 ], [ 2, 1 ], [ 2, 2 ] ] ]
[ "FOR i IN 1..2 FOR j IN 1..2 COLLECT a = i, b = j INTO group RETURN [ a, b ]", [ [ 1, 1 ], [ 1, 2 ], [ 2, 1 ], [ 2, 2 ] ] ],
];
queries.forEach(function(query) {
var planDisabled = AQL_EXPLAIN(query[0], { }, paramDisabled);
var planEnabled = AQL_EXPLAIN(query[0], { }, paramEnabled);
var resultDisabled = AQL_EXECUTE(query[0], { }, paramDisabled).json;
var resultEnabled = AQL_EXECUTE(query[0], { }, paramEnabled).json;
@ -153,7 +154,231 @@ function optimizerRuleTestSuite () {
assertEqual(resultDisabled, query[1]);
assertEqual(resultEnabled, query[1]);
});
},
testNestingRuleNotUsed1 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id1 = item1._id INTO g1
COLLECT id2 = g1[0].item2._id INTO g2
RETURN g2[0]
`;
const expected = [
{ "g1" : [ { "item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" },
"item3" : { "_id" : "ID" },
}
],
"item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" },
"item3" : { "_id" : "ID" },
"id1" : "ID"
}
];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleNotUsed2 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].item2._id INTO other
let b = other[0]
RETURN b
`;
const expected = [
{ "first" : [ { "item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" },
"item3" : { "_id" : "ID" },
}
],
"item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" },
"item3" : { "_id" : "ID" },
"id" : "ID"
}
];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleNotUsed3 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].item2._id INTO other
RETURN other
`;
const expected = [ [
{ "first" : [ { "item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" },
"item3" : { "_id" : "ID" },
}
],
"item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" },
"item3" : { "_id" : "ID" },
"id" : "ID"
}
] ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed1 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].item2._id INTO other
RETURN other[0].id
`;
const expected = [ "ID" ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed2 : function() {
const query = `
LET items = [ { "_id" : 42 }]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].item2._id INTO other
let b = 1 + other[0].first[0].item1._id
RETURN b
`;
const expected = [ 43 ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed3 : function() {
const query = `
LET items = [ { "_id" : 42 }]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].item2._id INTO other
let b = [ other[0].first[0], "blub" ]
RETURN b
`;
const expected = [
[ { "item3" : { "_id" : 42 },
"item1" : { "_id" : 42 },
"item2" : { "_id" : 42 } } , "blub" ] ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed4 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].id INTO other
RETURN other[0].id
`;
const expected = [ "ID" ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed5 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].id INTO other
RETURN other[0].first
`;
const expected = [ [ { "item3" : { "_id" : "ID" },
"item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" } } ] ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed6 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
FOR item2 IN items
FOR item3 IN items
COLLECT id = item1._id INTO first
COLLECT id2 = first[0].id INTO other
RETURN other[0].first[0]
`;
const expected = [ { "item3" : { "_id" : "ID" },
"item1" : { "_id" : "ID" },
"item2" : { "_id" : "ID" } } ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
testNestingRuleUsed7 : function() {
const query = `
LET items = [{_id: 'ID'}]
FOR item1 IN items
COLLECT unused1 = item1._id INTO first
COLLECT unused2 = first[0].item1._id INTO other
RETURN other[0].first[0].item1._id
`;
const expected = [ "ID" ];
let resultEnabled = AQL_EXECUTE(query, { }, paramEnabled).json;
assertEqual(expected, resultEnabled);
let explain = AQL_EXPLAIN(query, { }, paramEnabled);
assertNotEqual(-1, explain.plan.rules.indexOf(ruleName));
},
};
}