1
0
Fork 0

make optimizer rule "patch-update-statements" fire for REPLACE too (#6181)

This commit is contained in:
Jan 2018-08-17 08:45:40 +02:00 committed by GitHub
parent 10800572d4
commit b5eaf1443b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 153 additions and 52 deletions

View File

@ -414,10 +414,10 @@ The following optimizer rules may appear in the `rules` attribute of a plan:
The intention of this rule is to move calculations down in the processing pipeline
as far as possible (below *FILTER*, *LIMIT* and *SUBQUERY* nodes) so they are executed
as late as possible and not before their results are required.
* `patch-update-statements`: will appear if an *UpdateNode* was patched to not buffer
its input completely, but to process it in smaller batches. The rule will fire for an
*UPDATE* query that is fed by a full collection scan, and that does not use any other
indexes and subqueries.
* `patch-update-statements`: will appear if an *UpdateNode* or *ReplaceNode* was patched
to not buffer its input completely, but to process it in smaller batches. The rule will
fire for an *UPDATE* or *REPLACE* query that is fed by a full collection scan or an index
scan only, and that does not use any other collections, indexes, subqueries or traversals.
* `optimize-traversals`: will appear if either the edge or path output variable in an
AQL traversal was optimized away, or if a *FILTER* condition from the query was moved
in the *TraversalNode* for early pruning of results.

View File

@ -95,6 +95,16 @@ Collection* addCollectionToQuery(Query* query, std::string const& cname, bool as
}
namespace {
// static node types used by some optimizer rules
// having them statically available avoids having to build the vectors over and over
// for each AQL query
std::vector<EN::NodeType> const removeUnnecessaryCalculationsNodeTypes{EN::CALCULATION, EN::SUBQUERY};
std::vector<EN::NodeType> const interchangeAdjacentEnumerationsNodeTypes{EN::ENUMERATE_COLLECTION, EN::ENUMERATE_LIST};
std::vector<EN::NodeType> const scatterInClusterNodeTypes{EN::ENUMERATE_COLLECTION, EN::INDEX, EN::INSERT, EN::UPDATE, EN::REPLACE, EN::REMOVE, EN::UPSERT};
std::vector<EN::NodeType> const removeDataModificationOutVariablesNodeTypes{EN::REMOVE, EN::INSERT, EN::UPDATE, EN::REPLACE, EN::UPSERT};
std::vector<EN::NodeType> const patchUpdateStatementsNodeTypes{EN::UPDATE, EN::REPLACE};
static int indexOf(std::vector<std::string> const& haystack, std::string const& needle) {
for (size_t i = 0; i < haystack.size(); ++i) {
@ -120,7 +130,7 @@ static aql::Collection const* getCollection(ExecutionNode const* node) {
}
}
static aql::Variable const* getVariable(ExecutionNode const* node) {
static aql::Variable const* getOutVariable(ExecutionNode const* node) {
auto const* n = dynamic_cast<DocumentProducingNode const*>(node);
if (n != nullptr) {
return n->outVariable();
@ -1863,12 +1873,10 @@ void arangodb::aql::removeRedundantCalculationsRule(
void arangodb::aql::removeUnnecessaryCalculationsRule(
Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
OptimizerRule const* rule) {
std::vector<ExecutionNode::NodeType> const types{EN::CALCULATION,
EN::SUBQUERY};
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, types, true);
plan->findNodesOfType(nodes, ::removeUnnecessaryCalculationsNodeTypes, true);
std::unordered_set<ExecutionNode*> toUnlink;
@ -2560,9 +2568,7 @@ void arangodb::aql::interchangeAdjacentEnumerationsRule(
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
std::vector<ExecutionNode::NodeType> const types = {
ExecutionNode::ENUMERATE_COLLECTION, ExecutionNode::ENUMERATE_LIST};
plan->findNodesOfType(nodes, types, true);
plan->findNodesOfType(nodes, ::interchangeAdjacentEnumerationsNodeTypes, true);
std::unordered_set<ExecutionNode*> nodesSet;
for (auto const& n : nodes) {
@ -2743,7 +2749,7 @@ void arangodb::aql::optimizeClusterSingleShardRule(Optimizer* opt,
opt->disableRule(OptimizerRule::undistributeRemoveAfterEnumCollRule_pass10);
// get first collection from query
Collection const* c = getCollection(nodes[0]);
Collection const* c = ::getCollection(nodes[0]);
TRI_ASSERT(c != nullptr);
auto& vocbase = plan->getAst()->query()->vocbase();
@ -2792,8 +2798,8 @@ void arangodb::aql::optimizeClusterJoinsRule(Optimizer* opt,
if (current->getType() == ExecutionNode::ENUMERATE_COLLECTION ||
current->getType() == ExecutionNode::INDEX) {
Collection const* c1 = getCollection(n);
Collection const* c2 = getCollection(current);
Collection const* c1 = ::getCollection(n);
Collection const* c2 = ::getCollection(current);
bool qualifies = false;
@ -2815,8 +2821,8 @@ void arangodb::aql::optimizeClusterJoinsRule(Optimizer* opt,
}
if (!qualifies && n->getType() == EN::INDEX) {
Variable const* indexVariable = getVariable(n);
Variable const* otherVariable = getVariable(current);
Variable const* indexVariable = ::getOutVariable(n);
Variable const* otherVariable = ::getOutVariable(current);
std::string dist1 = c1->distributeShardsLike();
std::string dist2 = c2->distributeShardsLike();
@ -2992,19 +2998,9 @@ void arangodb::aql::scatterInClusterRule(Optimizer* opt,
// we are a coordinator. now look in the plan for nodes of type
// EnumerateCollectionNode, IndexNode and modification nodes
std::vector<ExecutionNode::NodeType> const types = {
ExecutionNode::ENUMERATE_COLLECTION,
ExecutionNode::INDEX,
ExecutionNode::INSERT,
ExecutionNode::UPDATE,
ExecutionNode::REPLACE,
ExecutionNode::REMOVE,
ExecutionNode::UPSERT
};
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, types, true);
plan->findNodesOfType(nodes, ::scatterInClusterNodeTypes, true);
TRI_ASSERT(
plan->getAst()
@ -4419,7 +4415,7 @@ class RemoveToEnumCollFinder final : public WalkerWorker<ExecutionNode> {
_enumColl = enumColl;
if (getCollection(_enumColl) != rn->collection()) {
if (::getCollection(_enumColl) != rn->collection()) {
break; // abort . . .
}
@ -5093,12 +5089,10 @@ void arangodb::aql::removeDataModificationOutVariablesRule(
Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
OptimizerRule const* rule) {
bool modified = false;
std::vector<ExecutionNode::NodeType> const types = {
EN::REMOVE, EN::INSERT, EN::UPDATE, EN::REPLACE, EN::UPSERT};
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, types, true);
plan->findNodesOfType(nodes, ::removeDataModificationOutVariablesNodeTypes, true);
for (auto const& n : nodes) {
auto node = ExecutionNode::castTo<ModificationNode*>(n);
@ -5125,16 +5119,14 @@ void arangodb::aql::removeDataModificationOutVariablesRule(
void arangodb::aql::patchUpdateStatementsRule(
Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
OptimizerRule const* rule) {
// no need to dive into subqueries here, as UPDATE needs to be on the top
// level
// no need to dive into subqueries here
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, EN::UPDATE, false);
plan->findNodesOfType(nodes, ::patchUpdateStatementsNodeTypes, false);
bool modified = false;
for (auto const& n : nodes) {
// we should only get through here a single time
auto node = ExecutionNode::castTo<ModificationNode*>(n);
TRI_ASSERT(node != nullptr);
@ -5151,17 +5143,18 @@ void arangodb::aql::patchUpdateStatementsRule(
while (dep != nullptr) {
auto const type = dep->getType();
if (type == EN::ENUMERATE_LIST || type == EN::INDEX ||
if (type == EN::ENUMERATE_LIST ||
#ifdef USE_IRESEARCH
type == EN::ENUMERATE_IRESEARCH_VIEW ||
#endif
type == EN::SUBQUERY) {
// not suitable
modified = false;
break;
}
if (type == EN::ENUMERATE_COLLECTION) {
auto collectionNode = ExecutionNode::castTo<EnumerateCollectionNode const*>(dep);
if (collectionNode->collection() != collection) {
if (type == EN::ENUMERATE_COLLECTION || type == EN::INDEX) {
if (::getCollection(dep) != collection) {
// different collection, not suitable
modified = false;
break;
@ -5173,8 +5166,9 @@ void arangodb::aql::patchUpdateStatementsRule(
// abort
break;
}
TRI_ASSERT(!modified);
// saw the same collection in FOR as in UPDATE
if (n->isVarUsedLater(collectionNode->outVariable())) {
if (n->isVarUsedLater(::getOutVariable(dep))) {
// must abort, because the variable produced by the FOR loop is
// read after it is updated
break;
@ -6260,8 +6254,6 @@ void arangodb::aql::geoIndexRule(Optimizer* opt,
plan->findNodesOfType(nodes, EN::ENUMERATE_COLLECTION, true);
for (ExecutionNode* node : nodes) {
TRI_ASSERT(node->getType() == EN::ENUMERATE_COLLECTION);
GeoIndexInfo info;
ExecutionNode* current = node->getFirstParent();
LimitNode* limit = nullptr;

View File

@ -74,7 +74,9 @@ function optimizerRuleTestSuite () {
testRuleDisabled : function () {
var queries = [
"FOR doc IN " + c.name() + " UPDATE doc WITH { test: 1 } IN " + c.name(),
"FOR doc IN " + c.name() + " UPDATE doc WITH { test: 1 } IN " + c.name() + " RETURN doc"
"FOR doc IN " + c.name() + " UPDATE doc WITH { test: 1 } IN " + c.name() + " RETURN doc",
"FOR doc IN " + c.name() + " REPLACE doc WITH { test: 1 } IN " + c.name(),
"FOR doc IN " + c.name() + " REPLACE doc WITH { test: 1 } IN " + c.name() + " RETURN doc"
];
queries.forEach(function(query) {
@ -90,8 +92,19 @@ function optimizerRuleTestSuite () {
testRuleNoEffect : function () {
var queries = [
"UPDATE 'test0' WITH { test: 1 } IN " + c.name(), // nothing returned
"FOR doc1 IN " + c.name() + " UPDATE doc1 WITH { test: 1 } IN " + c.name() + " FILTER doc1.value == 2 RETURN doc1", // must not kick in here
"FOR doc1 IN " + c.name() + " UPDATE doc1 WITH { test: 1 } IN " + c.name() + " RETURN doc1.value", // must not kick in here
"FOR i IN 1..10 FOR doc1 IN " + c.name() + " UPDATE doc1 WITH { test: 1 } IN " + c.name() + " RETURN doc1", // must not kick in here
"FOR doc1 IN " + c.name() + " FOR i IN 1..10 UPDATE doc1 WITH { test: 1 } IN " + c.name() + " RETURN doc1", // must not kick in here
"FOR doc1 IN " + c.name() + " FOR doc2 IN " + c.name() + " UPDATE doc1 WITH { test: 1 } IN " + c.name() + " RETURN doc1", // must not kick in here
"FOR doc1 IN " + c.name() + " FOR doc2 IN " + c.name() + " UPDATE doc1 WITH { test: 1 } IN " + c.name() + " RETURN doc2" // must not kick in here
"FOR doc1 IN " + c.name() + " FOR doc2 IN " + c.name() + " UPDATE doc1 WITH { test: 1 } IN " + c.name() + " RETURN doc2", // must not kick in here
"REPLACE 'test0' WITH { test: 1 } IN " + c.name(), // nothing returned
"FOR doc1 IN " + c.name() + " REPLACE doc1 WITH { test: 1 } IN " + c.name() + " FILTER doc1.value == 2 RETURN doc1", // must not kick in here
"FOR doc1 IN " + c.name() + " REPLACE doc1 WITH { test: 1 } IN " + c.name() + " RETURN doc1.value", // must not kick in here
"FOR i IN 1..10 FOR doc1 IN " + c.name() + " REPLACE doc1 WITH { test: 1 } IN " + c.name() + " RETURN doc1", // must not kick in here
"FOR doc1 IN " + c.name() + " FOR i IN 1..10 REPLACE doc1 WITH { test: 1 } IN " + c.name() + " RETURN doc1", // must not kick in here
"FOR doc1 IN " + c.name() + " FOR doc2 IN " + c.name() + " REPLACE doc1 WITH { test: 1 } IN " + c.name() + " RETURN doc1", // must not kick in here
"FOR doc1 IN " + c.name() + " FOR doc2 IN " + c.name() + " REPLACE doc1 WITH { test: 1 } IN " + c.name() + " RETURN doc2", // must not kick in here
];
queries.forEach(function(query) {
@ -105,10 +118,17 @@ function optimizerRuleTestSuite () {
////////////////////////////////////////////////////////////////////////////////
testRuleHasEffect : function () {
c.ensureIndex({ type: "skiplist", fields: ["x"] });
var queries = [
"FOR doc IN " + c.name() + " UPDATE doc WITH { test: 1 } IN " + c.name(), // nothing returned
"FOR doc IN " + c.name() + " UPDATE doc WITH { test: 1 } IN " + c.name() + " RETURN 1", // different values returned
"FOR doc IN " + c.name() + " FILTER doc.value > 100 UPDATE doc WITH { test: 1 } IN " + c.name() // nothing returned
"FOR doc IN " + c.name() + " FILTER doc.value > 100 UPDATE doc WITH { test: 1 } IN " + c.name(), // nothing returned
"FOR doc IN " + c.name() + " FILTER doc.x > 100 UPDATE doc WITH { test: 1 } IN " + c.name(), // using index
"FOR doc IN " + c.name() + " REPLACE doc WITH { test: 1 } IN " + c.name(), // nothing returned
"FOR doc IN " + c.name() + " REPLACE doc WITH { test: 1 } IN " + c.name() + " RETURN 1", // different values returned
"FOR doc IN " + c.name() + " FILTER doc.value > 100 REPLACE doc WITH { test: 1 } IN " + c.name(), // nothing returned
"FOR doc IN " + c.name() + " FILTER doc.x > 100 REPLACE doc WITH { test: 1 } IN " + c.name(), // using index
];
queries.forEach(function(query) {
@ -121,7 +141,7 @@ function optimizerRuleTestSuite () {
/// @brief test results
////////////////////////////////////////////////////////////////////////////////
testResultsAfterModification : function () {
testResultsAfterUpdate : function () {
var query = "FOR doc IN " + c.name() + " UPDATE doc WITH { value: -1 } IN " + c.name() + " RETURN doc";
var result = AQL_EXPLAIN(query, { });
assertEqual(-1, result.plan.rules.indexOf(ruleName), query);
@ -132,14 +152,41 @@ function optimizerRuleTestSuite () {
for (var i = 0; i < result.length; ++i) {
assertTrue(result[i].value >= 0);
}
c.toArray().forEach(function(doc) {
assertEqual(-1, doc.value);
});
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test results
////////////////////////////////////////////////////////////////////////////////
testResultsOld : function () {
var query = "FOR doc IN " + c.name() + " UPDATE doc WITH { value: -1 } IN " + c.name() + " RETURN OLD";
testResultsAfterReplace : function () {
var query = "FOR doc IN " + c.name() + " REPLACE doc WITH { xy: -1, bang: true } IN " + c.name() + " RETURN doc";
var result = AQL_EXPLAIN(query, { });
assertEqual(-1, result.plan.rules.indexOf(ruleName), query);
result = AQL_EXECUTE(query).json;
assertEqual(2000, result.length);
for (var i = 0; i < result.length; ++i) {
assertTrue(result[i].value >= 0);
}
c.toArray().forEach(function(doc) {
assertUndefined(doc.value);
assertEqual(-1, doc.xy);
assertTrue(doc.bang);
});
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test results
////////////////////////////////////////////////////////////////////////////////
testResultsUpdateOld : function () {
var query = "FOR doc IN " + c.name() + " UPDATE doc WITH { value: -1, bang: true } IN " + c.name() + " RETURN OLD";
var result = AQL_EXPLAIN(query, { });
assertNotEqual(-1, result.plan.rules.indexOf(ruleName), query);
@ -148,15 +195,21 @@ function optimizerRuleTestSuite () {
for (var i = 0; i < result.length; ++i) {
assertTrue(result[i].value >= 0);
assertUndefined(result[i].bang);
}
c.toArray().forEach(function(doc) {
assertEqual(-1, doc.value);
assertTrue(doc.bang);
});
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test results
////////////////////////////////////////////////////////////////////////////////
testResultsNew : function () {
var query = "FOR doc IN " + c.name() + " UPDATE doc WITH { value: -1 } IN " + c.name() + " RETURN NEW";
testResultsUpdateNew : function () {
var query = "FOR doc IN " + c.name() + " UPDATE doc WITH { value: -1, bang: true } IN " + c.name() + " RETURN NEW";
var result = AQL_EXPLAIN(query, { });
assertNotEqual(-1, result.plan.rules.indexOf(ruleName), query);
@ -165,7 +218,63 @@ function optimizerRuleTestSuite () {
for (var i = 0; i < result.length; ++i) {
assertEqual(-1, result[i].value);
assertTrue(result[i].bang);
}
c.toArray().forEach(function(doc) {
assertEqual(-1, doc.value);
assertTrue(doc.bang);
});
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test results
////////////////////////////////////////////////////////////////////////////////
testResultsReplaceOld : function () {
var query = "FOR doc IN " + c.name() + " REPLACE doc WITH { xy: -1, bang: true } IN " + c.name() + " RETURN OLD";
var result = AQL_EXPLAIN(query, { });
assertNotEqual(-1, result.plan.rules.indexOf(ruleName), query);
result = AQL_EXECUTE(query).json;
assertEqual(2000, result.length);
for (var i = 0; i < result.length; ++i) {
assertTrue(result[i].value >= 0);
assertUndefined(result[i].xy);
assertUndefined(result[i].bang);
}
c.toArray().forEach(function(doc) {
assertUndefined(doc.value);
assertEqual(-1, doc.xy);
assertTrue(doc.bang);
});
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test results
////////////////////////////////////////////////////////////////////////////////
testResultsReplaceNew : function () {
var query = "FOR doc IN " + c.name() + " REPLACE doc WITH { xy: -1, bang: true } IN " + c.name() + " RETURN NEW";
var result = AQL_EXPLAIN(query, { });
assertNotEqual(-1, result.plan.rules.indexOf(ruleName), query);
result = AQL_EXECUTE(query).json;
assertEqual(2000, result.length);
for (var i = 0; i < result.length; ++i) {
assertEqual(-1, result[i].xy);
assertTrue(result[i].bang);
assertUndefined(result[i].value);
}
c.toArray().forEach(function(doc) {
assertEqual(-1, doc.xy);
assertTrue(doc.bang);
assertUndefined(doc.value);
});
}
};