1
0
Fork 0

Bug fix/sort limit rule too lax (#10014)

* Added regression test for consecutive constrained sorts

* Bugfix: sort-limit rule skipped too many node types

* Added CHANGELOG entry

* Apply sort-limit rule before single document operations
This commit is contained in:
Tobias Gödderz 2019-09-18 09:04:43 +02:00 committed by Michael Hackstein
parent 3278568b5a
commit 2f59a79435
4 changed files with 77 additions and 15 deletions

View File

@ -1,6 +1,8 @@
devel
-----
* Bugfix: The AQL sort-limit optimization was applied in some cases it shouldn't, resulting in undefined behaviour.
* Remove operations documents in the cluster will now use an optimization, if all sharding keys
were specified. Should the sharding keys not match the values in the actual document
a not found error will be returned.

View File

@ -215,13 +215,13 @@ struct OptimizerRule {
removeTraversalPathVariable,
prepareTraversalsRule,
// make sort node aware of subsequent limit statements for internal optimizations
applySortLimitRule,
// when we have single document operations, fill in special cluster
// handling.
substituteSingleDocumentOperations,
// make sort node aware of subsequent limit statements for internal optimizations
applySortLimitRule,
/// Pass 9: push down calculations beyond FILTERs and LIMITs
moveCalculationsDownRule,

View File

@ -6879,6 +6879,49 @@ void arangodb::aql::geoIndexRule(Optimizer* opt, std::unique_ptr<ExecutionPlan>
opt->addPlan(std::move(plan), rule, mod);
}
static bool isInnerPassthroughNode(ExecutionNode* node) {
switch (node->getType()) {
case ExecutionNode::CALCULATION:
case ExecutionNode::SUBQUERY:
return true;
case ExecutionNode::SINGLETON:
case ExecutionNode::ENUMERATE_COLLECTION:
case ExecutionNode::ENUMERATE_LIST:
case ExecutionNode::FILTER:
case ExecutionNode::LIMIT:
case ExecutionNode::SORT:
case ExecutionNode::COLLECT:
case ExecutionNode::INSERT:
case ExecutionNode::REMOVE:
case ExecutionNode::REPLACE:
case ExecutionNode::UPDATE:
case ExecutionNode::NORESULTS:
case ExecutionNode::UPSERT:
case ExecutionNode::TRAVERSAL:
case ExecutionNode::INDEX:
case ExecutionNode::SHORTEST_PATH:
case ExecutionNode::K_SHORTEST_PATHS:
case ExecutionNode::ENUMERATE_IRESEARCH_VIEW:
case ExecutionNode::RETURN:
return false;
case ExecutionNode::REMOTE:
case ExecutionNode::DISTRIBUTE:
case ExecutionNode::SCATTER:
case ExecutionNode::GATHER:
case ExecutionNode::REMOTESINGLE:
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_INTERNAL_AQL,
"Invalid node type in sort-limit optimizer rule. Please report this "
"error. Try turning off the sort-limit rule to get your query "
"working.");
default:;
}
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_INTERNAL_AQL,
"Unhandled node type in sort-limit optimizer rule. Please report this "
"error. Try turning off the sort-limit rule to get your query working.");
}
void arangodb::aql::sortLimitRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
OptimizerRule const& rule) {
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
@ -6891,21 +6934,14 @@ void arangodb::aql::sortLimitRule(Optimizer* opt, std::unique_ptr<ExecutionPlan>
LimitNode* limit = nullptr;
while (current) {
if (current->getType() == EN::LIMIT) {
if (isInnerPassthroughNode(current)) {
current = current->getFirstParent(); // inspect next node
} else if (current->getType() == EN::LIMIT) {
limit = ExecutionNode::castTo<LimitNode*>(current);
break; // stop parsing after first LIMIT
} else if (current->getType() == EN::FILTER || current->getType() == EN::RETURN ||
current->getType() == EN::ENUMERATE_COLLECTION ||
current->getType() == EN::ENUMERATE_LIST ||
current->getType() == EN::ENUMERATE_IRESEARCH_VIEW ||
current->getType() == EN::TRAVERSAL ||
current->getType() == EN::SHORTEST_PATH ||
current->getType() == EN::K_SHORTEST_PATHS ||
current->getType() == EN::INDEX || current->getType() == EN::COLLECT) {
// TODO check other end conditions
break; // stop parsing
} else {
break; // stop parsing on any other node
}
current = current->getFirstParent(); // inspect next node
}
// if we found a limit and we meet the heuristic, make the sort node

View File

@ -382,6 +382,30 @@ function ahuacatlQueryOptimizerLimitTestSuite () {
assertEqual(sorts[0].strategy, 'constrained-heap');
},
////////////////////////////////////////////////////////////////////////////////
/// @brief Test two consecutive sorts
/// This is a regression test. Only the latter block may be a constrained sort.
////////////////////////////////////////////////////////////////////////////////
testLimitFullCollectionTwoSortBlocks : function () {
const query = `FOR c IN ${cn} SORT c.value ASC LET b = c.value*2 SORT b DESC LIMIT 10 RETURN b`;
var actual = getQueryResults(query);
assertEqual(10, actual.length);
assertEqual(1998, actual[0]);
assertEqual(1996, actual[1]);
assertEqual(1994, actual[2]);
assertEqual(1980, actual[9]);
var sorts = getSorts(query);
assertEqual(sorts.length, 2);
assertEqual(sorts[0].strategy, "standard");
assertEqual(sorts[1].limit, 10);
assertEqual(sorts[1].strategy, "constrained-heap");
},
};
}