mirror of https://gitee.com/bigwinds/arangodb
Bug fix/fix late materialization used after sort (#10447)
* Fixed applying late materialization with calc node between sort an limit * fixed typo * move micro optimization * Fixed subqueries error
This commit is contained in:
parent
c6fc77858f
commit
caffa6a22e
|
@ -293,7 +293,7 @@ void lateDocumentMaterializationArangoSearchRule(arangodb::aql::Optimizer* opt,
|
||||||
if (arangodb::aql::ExecutionNode::ENUMERATE_IRESEARCH_VIEW == loop->getType()) {
|
if (arangodb::aql::ExecutionNode::ENUMERATE_IRESEARCH_VIEW == loop->getType()) {
|
||||||
auto & viewNode = *EN::castTo<IResearchViewNode*>(loop);
|
auto & viewNode = *EN::castTo<IResearchViewNode*>(loop);
|
||||||
if (viewNode.isLateMaterialized()) {
|
if (viewNode.isLateMaterialized()) {
|
||||||
continue; //loop is aleady optimized
|
continue; // loop is already optimized
|
||||||
}
|
}
|
||||||
ExecutionNode* current = limitNode->getFirstDependency();
|
ExecutionNode* current = limitNode->getFirstDependency();
|
||||||
ExecutionNode* sortNode = nullptr;
|
ExecutionNode* sortNode = nullptr;
|
||||||
|
@ -301,6 +301,7 @@ void lateDocumentMaterializationArangoSearchRule(arangodb::aql::Optimizer* opt,
|
||||||
// without document body usage before that node.
|
// without document body usage before that node.
|
||||||
// this node could be appended with materializer
|
// this node could be appended with materializer
|
||||||
bool stopSearch = false;
|
bool stopSearch = false;
|
||||||
|
bool stickToSortNode = false;
|
||||||
while (current != loop) {
|
while (current != loop) {
|
||||||
switch (current->getType()) {
|
switch (current->getType()) {
|
||||||
case arangodb::aql::ExecutionNode::SORT:
|
case arangodb::aql::ExecutionNode::SORT:
|
||||||
|
@ -321,15 +322,21 @@ void lateDocumentMaterializationArangoSearchRule(arangodb::aql::Optimizer* opt,
|
||||||
default: // make clang happy
|
default: // make clang happy
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (sortNode != nullptr) {
|
if (!stopSearch) {
|
||||||
::arangodb::containers::HashSet<Variable const*> currentUsedVars;
|
::arangodb::containers::HashSet<Variable const*> currentUsedVars;
|
||||||
current->getVariablesUsedHere(currentUsedVars);
|
current->getVariablesUsedHere(currentUsedVars);
|
||||||
if (currentUsedVars.find(&viewNode.outVariable()) != currentUsedVars.end()) {
|
if (currentUsedVars.find(&viewNode.outVariable()) != currentUsedVars.end()) {
|
||||||
// we have a doc body used before selected SortNode. Forget it, let`s look for better sort to use
|
if (sortNode != nullptr) {
|
||||||
sortNode = nullptr;
|
// we have a doc body used before selected SortNode. Forget it, let`s look for better sort to use
|
||||||
// this limit node affects only closest sort, if this sort is invalid
|
sortNode = nullptr;
|
||||||
// we need to check other limit node
|
// this limit node affects only closest sort, if this sort is invalid
|
||||||
stopSearch = true;
|
// we need to check other limit node
|
||||||
|
stopSearch = true;
|
||||||
|
} else {
|
||||||
|
// we are between limit and sort nodes.
|
||||||
|
// late materialization could still be applied but we must insert MATERIALIZE node after sort not after limit
|
||||||
|
stickToSortNode = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (stopSearch) {
|
if (stopSearch) {
|
||||||
|
@ -352,7 +359,7 @@ void lateDocumentMaterializationArangoSearchRule(arangodb::aql::Optimizer* opt,
|
||||||
|
|
||||||
// on cluster we need to materialize node stay close to sort node on db server (to avoid network hop for materialization calls)
|
// on cluster we need to materialize node stay close to sort node on db server (to avoid network hop for materialization calls)
|
||||||
// however on single server we move it to limit node to make materialization as lazy as possible
|
// however on single server we move it to limit node to make materialization as lazy as possible
|
||||||
auto materializeDependency = ServerState::instance()->isCoordinator() ? sortNode : limitNode;
|
auto materializeDependency = ServerState::instance()->isCoordinator() || stickToSortNode ? sortNode : limitNode;
|
||||||
auto* dependencyParent = materializeDependency->getFirstParent();
|
auto* dependencyParent = materializeDependency->getFirstParent();
|
||||||
TRI_ASSERT(dependencyParent);
|
TRI_ASSERT(dependencyParent);
|
||||||
dependencyParent->replaceDependency(materializeDependency, materializeNode);
|
dependencyParent->replaceDependency(materializeDependency, materializeNode);
|
||||||
|
|
|
@ -36,7 +36,7 @@ class ExecutionPlan;
|
||||||
|
|
||||||
namespace iresearch {
|
namespace iresearch {
|
||||||
|
|
||||||
/// @brief moves document materialization from view nodes to sort nodes
|
/// @brief moves document materialization from view nodes to materialize nodes
|
||||||
void lateDocumentMaterializationArangoSearchRule(arangodb::aql::Optimizer* opt,
|
void lateDocumentMaterializationArangoSearchRule(arangodb::aql::Optimizer* opt,
|
||||||
std::unique_ptr<arangodb::aql::ExecutionPlan> plan,
|
std::unique_ptr<arangodb::aql::ExecutionPlan> plan,
|
||||||
arangodb::aql::OptimizerRule const& rule);
|
arangodb::aql::OptimizerRule const& rule);
|
||||||
|
|
|
@ -201,7 +201,7 @@ void arangodb::aql::lateDocumentMaterializationRule(arangodb::aql::Optimizer* op
|
||||||
if (arangodb::aql::ExecutionNode::INDEX == loop->getType()) {
|
if (arangodb::aql::ExecutionNode::INDEX == loop->getType()) {
|
||||||
auto indexNode = EN::castTo<IndexNode*>(loop);
|
auto indexNode = EN::castTo<IndexNode*>(loop);
|
||||||
if (indexNode->isLateMaterialized()) {
|
if (indexNode->isLateMaterialized()) {
|
||||||
continue; // loop is aleady optimized
|
continue; // loop is already optimized
|
||||||
}
|
}
|
||||||
auto current = limitNode->getFirstDependency();
|
auto current = limitNode->getFirstDependency();
|
||||||
ExecutionNode* sortNode = nullptr;
|
ExecutionNode* sortNode = nullptr;
|
||||||
|
@ -209,10 +209,12 @@ void arangodb::aql::lateDocumentMaterializationRule(arangodb::aql::Optimizer* op
|
||||||
// without document body usage before that node.
|
// without document body usage before that node.
|
||||||
// this node could be appended with materializer
|
// this node could be appended with materializer
|
||||||
bool stopSearch = false;
|
bool stopSearch = false;
|
||||||
|
bool stickToSortNode = false;
|
||||||
std::vector<NodeWithAttrs> nodesToChange;
|
std::vector<NodeWithAttrs> nodesToChange;
|
||||||
TRI_idx_iid_t commonIndexId = 0; // use one index only
|
TRI_idx_iid_t commonIndexId = 0; // use one index only
|
||||||
while (current != loop) {
|
while (current != loop) {
|
||||||
switch (current->getType()) {
|
auto type = current->getType();
|
||||||
|
switch (type) {
|
||||||
case arangodb::aql::ExecutionNode::SORT:
|
case arangodb::aql::ExecutionNode::SORT:
|
||||||
if (sortNode == nullptr) { // we need nearest to limit sort node, so keep selected if any
|
if (sortNode == nullptr) { // we need nearest to limit sort node, so keep selected if any
|
||||||
sortNode = current;
|
sortNode = current;
|
||||||
|
@ -230,9 +232,17 @@ void arangodb::aql::lateDocumentMaterializationRule(arangodb::aql::Optimizer* op
|
||||||
} else if (!node.attrs.empty()) {
|
} else if (!node.attrs.empty()) {
|
||||||
if (!attributesMatch(commonIndexId, indexNode, node)) {
|
if (!attributesMatch(commonIndexId, indexNode, node)) {
|
||||||
// the node uses attributes which is not in index
|
// the node uses attributes which is not in index
|
||||||
stopSearch = true;
|
if (nullptr == sortNode) {
|
||||||
|
// we are between limit and sort nodes.
|
||||||
|
// late materialization could still be applied but we must insert MATERIALIZE node after sort not after limit
|
||||||
|
stickToSortNode = true;
|
||||||
|
} else {
|
||||||
|
// this limit node affects only closest sort, if this sort is invalid
|
||||||
|
// we need to check other limit node
|
||||||
|
stopSearch = true;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
nodesToChange.emplace_back(node);
|
nodesToChange.emplace_back(std::move(node));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -246,12 +256,16 @@ void arangodb::aql::lateDocumentMaterializationRule(arangodb::aql::Optimizer* op
|
||||||
default: // make clang happy
|
default: // make clang happy
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (sortNode != nullptr && current->getType() != arangodb::aql::ExecutionNode::CALCULATION) {
|
// Currently only calculation and subquery nodes expected to use loop variable.
|
||||||
|
// We successfully replaced all references to loop variable in calculation nodes only.
|
||||||
|
// However if some other node types will begin to use loop variable
|
||||||
|
// assertion below will be triggered and this rule should be updated.
|
||||||
|
// Subquery node is planned to be supported later.
|
||||||
|
if (!stopSearch && type != arangodb::aql::ExecutionNode::CALCULATION) {
|
||||||
::arangodb::containers::HashSet<Variable const*> currentUsedVars;
|
::arangodb::containers::HashSet<Variable const*> currentUsedVars;
|
||||||
current->getVariablesUsedHere(currentUsedVars);
|
current->getVariablesUsedHere(currentUsedVars);
|
||||||
if (currentUsedVars.find(indexNode->outVariable()) != currentUsedVars.end()) {
|
if (currentUsedVars.find(indexNode->outVariable()) != currentUsedVars.end()) {
|
||||||
// this limit node affects only closest sort, if this sort is invalid
|
TRI_ASSERT(arangodb::aql::ExecutionNode::SUBQUERY == type);
|
||||||
// we need to check other limit node
|
|
||||||
stopSearch = true;
|
stopSearch = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -301,7 +315,7 @@ void arangodb::aql::lateDocumentMaterializationRule(arangodb::aql::Optimizer* op
|
||||||
|
|
||||||
// on cluster we need to materialize node stay close to sort node on db server (to avoid network hop for materialization calls)
|
// on cluster we need to materialize node stay close to sort node on db server (to avoid network hop for materialization calls)
|
||||||
// however on single server we move it to limit node to make materialization as lazy as possible
|
// however on single server we move it to limit node to make materialization as lazy as possible
|
||||||
auto materializeDependency = ServerState::instance()->isCoordinator() ? sortNode : limitNode;
|
auto materializeDependency = ServerState::instance()->isCoordinator() || stickToSortNode ? sortNode : limitNode;
|
||||||
auto dependencyParent = materializeDependency->getFirstParent();
|
auto dependencyParent = materializeDependency->getFirstParent();
|
||||||
TRI_ASSERT(dependencyParent != nullptr);
|
TRI_ASSERT(dependencyParent != nullptr);
|
||||||
dependencyParent->replaceDependency(materializeDependency, materializeNode);
|
dependencyParent->replaceDependency(materializeDependency, materializeNode);
|
||||||
|
|
|
@ -32,7 +32,7 @@ class Optimizer;
|
||||||
struct OptimizerRule;
|
struct OptimizerRule;
|
||||||
class ExecutionPlan;
|
class ExecutionPlan;
|
||||||
|
|
||||||
/// @brief moves document materialization from index nodes to sort nodes
|
/// @brief moves document materialization from index nodes to materialize nodes
|
||||||
void lateDocumentMaterializationRule(arangodb::aql::Optimizer* opt,
|
void lateDocumentMaterializationRule(arangodb::aql::Optimizer* opt,
|
||||||
std::unique_ptr<arangodb::aql::ExecutionPlan> plan,
|
std::unique_ptr<arangodb::aql::ExecutionPlan> plan,
|
||||||
arangodb::aql::OptimizerRule const& rule);
|
arangodb::aql::OptimizerRule const& rule);
|
||||||
|
|
|
@ -211,6 +211,24 @@ function lateDocumentMaterializationArangoSearchRuleTestSuite () {
|
||||||
});
|
});
|
||||||
assertEqual(0, expected.size);
|
assertEqual(0, expected.size);
|
||||||
},
|
},
|
||||||
|
testQueryResultsWithBetweenCalc() {
|
||||||
|
let query = "FOR d IN " + svn + " SEARCH d.value IN [1,2, 11, 12] SORT BM25(d) LET c = NOOPT(CONCAT(d._key, '-C')) LIMIT 10 RETURN c ";
|
||||||
|
let plan = AQL_EXPLAIN(query).plan;
|
||||||
|
if (!isCluster) {
|
||||||
|
assertNotEqual(-1, plan.rules.indexOf(ruleName));
|
||||||
|
let result = AQL_EXECUTE(query);
|
||||||
|
assertEqual(4, result.json.length);
|
||||||
|
let expected = new Set(['c1-C', 'c2-C', 'c_1-C', 'c_2-C']);
|
||||||
|
result.json.forEach(function(doc) {
|
||||||
|
assertTrue(expected.has(doc));
|
||||||
|
expected.delete(doc);
|
||||||
|
});
|
||||||
|
assertEqual(0, expected.size);
|
||||||
|
} else {
|
||||||
|
// on cluster this will not be applied as calculation node will be moved up
|
||||||
|
assertEqual(-1, plan.rules.indexOf(ruleName));
|
||||||
|
}
|
||||||
|
},
|
||||||
testQueryResultsSkipSome() {
|
testQueryResultsSkipSome() {
|
||||||
let query = "FOR d IN " + vn + " SEARCH PHRASE(d.str, 'cat', 'text_en') SORT TFIDF(d) DESC LIMIT 4, 1 RETURN d ";
|
let query = "FOR d IN " + vn + " SEARCH PHRASE(d.str, 'cat', 'text_en') SORT TFIDF(d) DESC LIMIT 4, 1 RETURN d ";
|
||||||
let plan = AQL_EXPLAIN(query).plan;
|
let plan = AQL_EXPLAIN(query).plan;
|
||||||
|
|
|
@ -202,6 +202,49 @@ function lateDocumentMaterializationRuleTestSuite () {
|
||||||
let plan = AQL_EXPLAIN(query).plan;
|
let plan = AQL_EXPLAIN(query).plan;
|
||||||
assertEqual(-1, plan.rules.indexOf(ruleName));
|
assertEqual(-1, plan.rules.indexOf(ruleName));
|
||||||
},
|
},
|
||||||
|
testNotAppliedDueToSubqueryWithDocumentAccess() {
|
||||||
|
for (i = 0; i < numOfCollectionIndexes; ++i) {
|
||||||
|
let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' " +
|
||||||
|
"LET a = NOOPT(d.obj.b) " +
|
||||||
|
"LET e = SUM(FOR c IN " + collectionNames[(i + 1) % numOfCollectionIndexes] + " LET p = CONCAT(d, c.obj.a) RETURN p) " +
|
||||||
|
"SORT CONCAT(a, e) LIMIT 10 RETURN d";
|
||||||
|
let plan = AQL_EXPLAIN(query).plan;
|
||||||
|
assertEqual(-1, plan.rules.indexOf(ruleName));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
testNotAppliedDueToSubqueryWithDocumentAccessByAttribute() { // should be supported later
|
||||||
|
for (i = 0; i < numOfCollectionIndexes; ++i) {
|
||||||
|
let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' " +
|
||||||
|
"LET a = NOOPT(d.obj.b) " +
|
||||||
|
"LET e = SUM(FOR c IN " + collectionNames[(i + 1) % numOfCollectionIndexes] + " LET p = CONCAT(d.obj.a, c.obj.a) RETURN p) " +
|
||||||
|
"SORT CONCAT(a, e) LIMIT 10 RETURN d";
|
||||||
|
let plan = AQL_EXPLAIN(query).plan;
|
||||||
|
assertEqual(-1, plan.rules.indexOf(ruleName));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
testQueryResultsWithSubqueryWithoutDocumentAccess() {
|
||||||
|
for (i = 0; i < numOfCollectionIndexes; ++i) {
|
||||||
|
let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' " +
|
||||||
|
"LET a = NOOPT(d.obj.b) " +
|
||||||
|
"LET e = SUM(FOR c IN " + collectionNames[(i + 1) % numOfCollectionIndexes] + " LET p = CONCAT(c.obj.b, c.obj.a) RETURN p) " +
|
||||||
|
"SORT CONCAT(a, e) LIMIT 10 RETURN d";
|
||||||
|
let plan = AQL_EXPLAIN(query).plan;
|
||||||
|
if (!isCluster) {
|
||||||
|
assertNotEqual(-1, plan.rules.indexOf(ruleName));
|
||||||
|
let result = AQL_EXECUTE(query);
|
||||||
|
assertEqual(2, result.json.length);
|
||||||
|
let expectedKeys = new Set(['c0', 'c2']);
|
||||||
|
result.json.forEach(function(doc) {
|
||||||
|
assertTrue(expectedKeys.has(doc._key));
|
||||||
|
expectedKeys.delete(doc._key);
|
||||||
|
});
|
||||||
|
assertEqual(0, expectedKeys.size);
|
||||||
|
} else {
|
||||||
|
// on cluster this will not be applied as remote node placed before sort node
|
||||||
|
assertEqual(-1, plan.rules.indexOf(ruleName));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
testQueryResultsWithCalculation() {
|
testQueryResultsWithCalculation() {
|
||||||
for (i = 0; i < numOfCollectionIndexes; ++i) {
|
for (i = 0; i < numOfCollectionIndexes; ++i) {
|
||||||
let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' LET c = CONCAT(d.obj.b, RAND()) SORT c LIMIT 10 RETURN d";
|
let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' LET c = CONCAT(d.obj.b, RAND()) SORT c LIMIT 10 RETURN d";
|
||||||
|
@ -279,6 +322,26 @@ function lateDocumentMaterializationRuleTestSuite () {
|
||||||
assertEqual(0, expected.size);
|
assertEqual(0, expected.size);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
testQueryResultsWithBetweenCalc() {
|
||||||
|
for (i = 0; i < numOfCollectionIndexes; ++i) {
|
||||||
|
let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' SORT d.obj.c LET c = CONCAT(NOOPT(d.obj.d), '-C') LIMIT 10 RETURN c";
|
||||||
|
let plan = AQL_EXPLAIN(query).plan;
|
||||||
|
if (!isCluster) {
|
||||||
|
assertNotEqual(-1, plan.rules.indexOf(ruleName));
|
||||||
|
let result = AQL_EXECUTE(query);
|
||||||
|
assertEqual(2, result.json.length);
|
||||||
|
let expected = new Set(['d_val-C', 'd_val_2-C']);
|
||||||
|
result.json.forEach(function(doc) {
|
||||||
|
assertTrue(expected.has(doc));
|
||||||
|
expected.delete(doc);
|
||||||
|
});
|
||||||
|
assertEqual(0, expected.size);
|
||||||
|
} else {
|
||||||
|
// on cluster this will not be applied as calculation node will be moved up
|
||||||
|
assertEqual(-1, plan.rules.indexOf(ruleName));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
testQueryResultsSkipSome() {
|
testQueryResultsSkipSome() {
|
||||||
for (i = 0; i < numOfCollectionIndexes; ++i) {
|
for (i = 0; i < numOfCollectionIndexes; ++i) {
|
||||||
let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' SORT d.obj.c DESC LIMIT 1, 1 RETURN d";
|
let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' SORT d.obj.c DESC LIMIT 1, 1 RETURN d";
|
||||||
|
@ -300,7 +363,7 @@ function lateDocumentMaterializationRuleTestSuite () {
|
||||||
},
|
},
|
||||||
testQueryResultsInSubquery() {
|
testQueryResultsInSubquery() {
|
||||||
for (i = 0; i < numOfCollectionIndexes; ++i) {
|
for (i = 0; i < numOfCollectionIndexes; ++i) {
|
||||||
let query = "FOR c IN " + collectionNames[i % numOfCollectionIndexes] + " FILTER c.obj.a == 'a_val_1' " +
|
let query = "FOR c IN " + collectionNames[i] + " FILTER c.obj.a == 'a_val_1' " +
|
||||||
"FOR d IN " + collectionNames[(i + 1) % numOfCollectionIndexes] + " FILTER c.obj.a == d.obj.a SORT d.obj.c LIMIT 10 RETURN d";
|
"FOR d IN " + collectionNames[(i + 1) % numOfCollectionIndexes] + " FILTER c.obj.a == d.obj.a SORT d.obj.c LIMIT 10 RETURN d";
|
||||||
let plan = AQL_EXPLAIN(query).plan;
|
let plan = AQL_EXPLAIN(query).plan;
|
||||||
assertNotEqual(-1, plan.rules.indexOf(ruleName));
|
assertNotEqual(-1, plan.rules.indexOf(ruleName));
|
||||||
|
@ -316,7 +379,7 @@ function lateDocumentMaterializationRuleTestSuite () {
|
||||||
},
|
},
|
||||||
testQueryResultsInOuterSubquery() {
|
testQueryResultsInOuterSubquery() {
|
||||||
for (i = 0; i < numOfCollectionIndexes; ++i) {
|
for (i = 0; i < numOfCollectionIndexes; ++i) {
|
||||||
let query = "FOR c IN " + collectionNames[i % numOfCollectionIndexes] + " FILTER c.obj.a == 'a_val_1' SORT c.obj.c LIMIT 10 " +
|
let query = "FOR c IN " + collectionNames[i] + " FILTER c.obj.a == 'a_val_1' SORT c.obj.c LIMIT 10 " +
|
||||||
"FOR d IN " + collectionNames[(i + 1) % numOfCollectionIndexes] + " FILTER c.obj.a == d.obj.a RETURN d";
|
"FOR d IN " + collectionNames[(i + 1) % numOfCollectionIndexes] + " FILTER c.obj.a == d.obj.a RETURN d";
|
||||||
let plan = AQL_EXPLAIN(query).plan;
|
let plan = AQL_EXPLAIN(query).plan;
|
||||||
assertNotEqual(-1, plan.rules.indexOf(ruleName));
|
assertNotEqual(-1, plan.rules.indexOf(ruleName));
|
||||||
|
|
Loading…
Reference in New Issue