1
0
Fork 0

Bug fix/sort limit rule too lax (#10014)

* Added regression test for consecutive constrained sorts

* Bugfix: sort-limit rule skipped too many node types

* Added CHANGELOG entry

* Apply sort-limit rule before single document operations
This commit is contained in:
Tobias Gödderz 2019-09-18 09:04:43 +02:00 committed by Michael Hackstein
parent 3278568b5a
commit 2f59a79435
4 changed files with 77 additions and 15 deletions

View File

@ -1,6 +1,8 @@
devel devel
----- -----
* Bugfix: The AQL sort-limit optimization was applied in some cases it shouldn't, resulting in undefined behaviour.
* Remove operations documents in the cluster will now use an optimization, if all sharding keys * Remove operations documents in the cluster will now use an optimization, if all sharding keys
were specified. Should the sharding keys not match the values in the actual document were specified. Should the sharding keys not match the values in the actual document
a not found error will be returned. a not found error will be returned.

View File

@ -215,13 +215,13 @@ struct OptimizerRule {
removeTraversalPathVariable, removeTraversalPathVariable,
prepareTraversalsRule, prepareTraversalsRule,
// make sort node aware of subsequent limit statements for internal optimizations
applySortLimitRule,
// when we have single document operations, fill in special cluster // when we have single document operations, fill in special cluster
// handling. // handling.
substituteSingleDocumentOperations, substituteSingleDocumentOperations,
// make sort node aware of subsequent limit statements for internal optimizations
applySortLimitRule,
/// Pass 9: push down calculations beyond FILTERs and LIMITs /// Pass 9: push down calculations beyond FILTERs and LIMITs
moveCalculationsDownRule, moveCalculationsDownRule,

View File

@ -6879,6 +6879,49 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, std::unique_ptr<ExecutionPlan>
opt->addPlan(std::move(plan), rule, mod); opt->addPlan(std::move(plan), rule, mod);
} }
static bool isInnerPassthroughNode(ExecutionNode* node) {
switch (node->getType()) {
case ExecutionNode::CALCULATION:
case ExecutionNode::SUBQUERY:
return true;
case ExecutionNode::SINGLETON:
case ExecutionNode::ENUMERATE_COLLECTION:
case ExecutionNode::ENUMERATE_LIST:
case ExecutionNode::FILTER:
case ExecutionNode::LIMIT:
case ExecutionNode::SORT:
case ExecutionNode::COLLECT:
case ExecutionNode::INSERT:
case ExecutionNode::REMOVE:
case ExecutionNode::REPLACE:
case ExecutionNode::UPDATE:
case ExecutionNode::NORESULTS:
case ExecutionNode::UPSERT:
case ExecutionNode::TRAVERSAL:
case ExecutionNode::INDEX:
case ExecutionNode::SHORTEST_PATH:
case ExecutionNode::K_SHORTEST_PATHS:
case ExecutionNode::ENUMERATE_IRESEARCH_VIEW:
case ExecutionNode::RETURN:
return false;
case ExecutionNode::REMOTE:
case ExecutionNode::DISTRIBUTE:
case ExecutionNode::SCATTER:
case ExecutionNode::GATHER:
case ExecutionNode::REMOTESINGLE:
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_INTERNAL_AQL,
"Invalid node type in sort-limit optimizer rule. Please report this "
"error. Try turning off the sort-limit rule to get your query "
"working.");
default:;
}
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_INTERNAL_AQL,
"Unhandled node type in sort-limit optimizer rule. Please report this "
"error. Try turning off the sort-limit rule to get your query working.");
}
void arangodb::aql::sortLimitRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan, void arangodb::aql::sortLimitRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
OptimizerRule const& rule) { OptimizerRule const& rule) {
SmallVector<ExecutionNode*>::allocator_type::arena_type a; SmallVector<ExecutionNode*>::allocator_type::arena_type a;
@ -6891,21 +6934,14 @@ void arangodb::aql::sortLimitRule(Optimizer* opt, std::unique_ptr<ExecutionPlan>
LimitNode* limit = nullptr; LimitNode* limit = nullptr;
while (current) { while (current) {
if (current->getType() == EN::LIMIT) { if (isInnerPassthroughNode(current)) {
current = current->getFirstParent(); // inspect next node
} else if (current->getType() == EN::LIMIT) {
limit = ExecutionNode::castTo<LimitNode*>(current); limit = ExecutionNode::castTo<LimitNode*>(current);
break; // stop parsing after first LIMIT break; // stop parsing after first LIMIT
} else if (current->getType() == EN::FILTER || current->getType() == EN::RETURN || } else {
current->getType() == EN::ENUMERATE_COLLECTION || break; // stop parsing on any other node
current->getType() == EN::ENUMERATE_LIST ||
current->getType() == EN::ENUMERATE_IRESEARCH_VIEW ||
current->getType() == EN::TRAVERSAL ||
current->getType() == EN::SHORTEST_PATH ||
current->getType() == EN::K_SHORTEST_PATHS ||
current->getType() == EN::INDEX || current->getType() == EN::COLLECT) {
// TODO check other end conditions
break; // stop parsing
} }
current = current->getFirstParent(); // inspect next node
} }
// if we found a limit and we meet the heuristic, make the sort node // if we found a limit and we meet the heuristic, make the sort node

View File

@ -382,6 +382,30 @@ function ahuacatlQueryOptimizerLimitTestSuite () {
assertEqual(sorts[0].strategy, 'constrained-heap'); assertEqual(sorts[0].strategy, 'constrained-heap');
}, },
////////////////////////////////////////////////////////////////////////////////
/// @brief Test two consecutive sorts
/// This is a regression test. Only the latter block may be a constrained sort.
////////////////////////////////////////////////////////////////////////////////
testLimitFullCollectionTwoSortBlocks : function () {
const query = `FOR c IN ${cn} SORT c.value ASC LET b = c.value*2 SORT b DESC LIMIT 10 RETURN b`;
var actual = getQueryResults(query);
assertEqual(10, actual.length);
assertEqual(1998, actual[0]);
assertEqual(1996, actual[1]);
assertEqual(1994, actual[2]);
assertEqual(1980, actual[9]);
var sorts = getSorts(query);
assertEqual(sorts.length, 2);
assertEqual(sorts[0].strategy, "standard");
assertEqual(sorts[1].limit, 10);
assertEqual(sorts[1].strategy, "constrained-heap");
},
}; };
} }