diff --git a/arangod/Aql/IResearchViewOptimizerRules.cpp b/arangod/Aql/IResearchViewOptimizerRules.cpp index 68267186d8..8be0c959dc 100644 --- a/arangod/Aql/IResearchViewOptimizerRules.cpp +++ b/arangod/Aql/IResearchViewOptimizerRules.cpp @@ -293,7 +293,7 @@ void lateDocumentMaterializationArangoSearchRule(arangodb::aql::Optimizer* opt, if (arangodb::aql::ExecutionNode::ENUMERATE_IRESEARCH_VIEW == loop->getType()) { auto & viewNode = *EN::castTo(loop); if (viewNode.isLateMaterialized()) { - continue; //loop is aleady optimized + continue; // loop is already optimized } ExecutionNode* current = limitNode->getFirstDependency(); ExecutionNode* sortNode = nullptr; @@ -301,6 +301,7 @@ void lateDocumentMaterializationArangoSearchRule(arangodb::aql::Optimizer* opt, // without document body usage before that node. // this node could be appended with materializer bool stopSearch = false; + bool stickToSortNode = false; while (current != loop) { switch (current->getType()) { case arangodb::aql::ExecutionNode::SORT: @@ -321,15 +322,21 @@ void lateDocumentMaterializationArangoSearchRule(arangodb::aql::Optimizer* opt, default: // make clang happy break; } - if (sortNode != nullptr) { + if (!stopSearch) { ::arangodb::containers::HashSet currentUsedVars; current->getVariablesUsedHere(currentUsedVars); if (currentUsedVars.find(&viewNode.outVariable()) != currentUsedVars.end()) { - // we have a doc body used before selected SortNode. Forget it, let`s look for better sort to use - sortNode = nullptr; - // this limit node affects only closest sort, if this sort is invalid - // we need to check other limit node - stopSearch = true; + if (sortNode != nullptr) { + // we have a doc body used before selected SortNode. Forget it, let`s look for better sort to use + sortNode = nullptr; + // this limit node affects only closest sort, if this sort is invalid + // we need to check other limit node + stopSearch = true; + } else { + // we are between limit and sort nodes. + // late materialization could still be applied but we must insert MATERIALIZE node after sort not after limit + stickToSortNode = true; + } } } if (stopSearch) { @@ -352,7 +359,7 @@ void lateDocumentMaterializationArangoSearchRule(arangodb::aql::Optimizer* opt, // on cluster we need to materialize node stay close to sort node on db server (to avoid network hop for materialization calls) // however on single server we move it to limit node to make materialization as lazy as possible - auto materializeDependency = ServerState::instance()->isCoordinator() ? sortNode : limitNode; + auto materializeDependency = ServerState::instance()->isCoordinator() || stickToSortNode ? sortNode : limitNode; auto* dependencyParent = materializeDependency->getFirstParent(); TRI_ASSERT(dependencyParent); dependencyParent->replaceDependency(materializeDependency, materializeNode); diff --git a/arangod/Aql/IResearchViewOptimizerRules.h b/arangod/Aql/IResearchViewOptimizerRules.h index 81836d4bd3..87856d2ef7 100644 --- a/arangod/Aql/IResearchViewOptimizerRules.h +++ b/arangod/Aql/IResearchViewOptimizerRules.h @@ -36,7 +36,7 @@ class ExecutionPlan; namespace iresearch { -/// @brief moves document materialization from view nodes to sort nodes +/// @brief moves document materialization from view nodes to materialize nodes void lateDocumentMaterializationArangoSearchRule(arangodb::aql::Optimizer* opt, std::unique_ptr plan, arangodb::aql::OptimizerRule const& rule); diff --git a/arangod/Aql/IndexNodeOptimizerRules.cpp b/arangod/Aql/IndexNodeOptimizerRules.cpp index 203d2209dd..342024526f 100644 --- a/arangod/Aql/IndexNodeOptimizerRules.cpp +++ b/arangod/Aql/IndexNodeOptimizerRules.cpp @@ -201,7 +201,7 @@ void arangodb::aql::lateDocumentMaterializationRule(arangodb::aql::Optimizer* op if (arangodb::aql::ExecutionNode::INDEX == loop->getType()) { auto indexNode = EN::castTo(loop); if (indexNode->isLateMaterialized()) { - continue; // loop is aleady optimized + continue; // loop is already optimized } auto current = limitNode->getFirstDependency(); ExecutionNode* sortNode = nullptr; @@ -209,10 +209,12 @@ void arangodb::aql::lateDocumentMaterializationRule(arangodb::aql::Optimizer* op // without document body usage before that node. // this node could be appended with materializer bool stopSearch = false; + bool stickToSortNode = false; std::vector nodesToChange; TRI_idx_iid_t commonIndexId = 0; // use one index only while (current != loop) { - switch (current->getType()) { + auto type = current->getType(); + switch (type) { case arangodb::aql::ExecutionNode::SORT: if (sortNode == nullptr) { // we need nearest to limit sort node, so keep selected if any sortNode = current; @@ -230,9 +232,17 @@ void arangodb::aql::lateDocumentMaterializationRule(arangodb::aql::Optimizer* op } else if (!node.attrs.empty()) { if (!attributesMatch(commonIndexId, indexNode, node)) { // the node uses attributes which is not in index - stopSearch = true; + if (nullptr == sortNode) { + // we are between limit and sort nodes. + // late materialization could still be applied but we must insert MATERIALIZE node after sort not after limit + stickToSortNode = true; + } else { + // this limit node affects only closest sort, if this sort is invalid + // we need to check other limit node + stopSearch = true; + } } else { - nodesToChange.emplace_back(node); + nodesToChange.emplace_back(std::move(node)); } } break; @@ -246,12 +256,16 @@ void arangodb::aql::lateDocumentMaterializationRule(arangodb::aql::Optimizer* op default: // make clang happy break; } - if (sortNode != nullptr && current->getType() != arangodb::aql::ExecutionNode::CALCULATION) { + // Currently only calculation and subquery nodes expected to use loop variable. + // We successfully replaced all references to loop variable in calculation nodes only. + // However if some other node types will begin to use loop variable + // assertion below will be triggered and this rule should be updated. + // Subquery node is planned to be supported later. + if (!stopSearch && type != arangodb::aql::ExecutionNode::CALCULATION) { ::arangodb::containers::HashSet currentUsedVars; current->getVariablesUsedHere(currentUsedVars); if (currentUsedVars.find(indexNode->outVariable()) != currentUsedVars.end()) { - // this limit node affects only closest sort, if this sort is invalid - // we need to check other limit node + TRI_ASSERT(arangodb::aql::ExecutionNode::SUBQUERY == type); stopSearch = true; } } @@ -301,7 +315,7 @@ void arangodb::aql::lateDocumentMaterializationRule(arangodb::aql::Optimizer* op // on cluster we need to materialize node stay close to sort node on db server (to avoid network hop for materialization calls) // however on single server we move it to limit node to make materialization as lazy as possible - auto materializeDependency = ServerState::instance()->isCoordinator() ? sortNode : limitNode; + auto materializeDependency = ServerState::instance()->isCoordinator() || stickToSortNode ? sortNode : limitNode; auto dependencyParent = materializeDependency->getFirstParent(); TRI_ASSERT(dependencyParent != nullptr); dependencyParent->replaceDependency(materializeDependency, materializeNode); diff --git a/arangod/Aql/IndexNodeOptimizerRules.h b/arangod/Aql/IndexNodeOptimizerRules.h index 8bee6a238d..a106a0202b 100644 --- a/arangod/Aql/IndexNodeOptimizerRules.h +++ b/arangod/Aql/IndexNodeOptimizerRules.h @@ -32,7 +32,7 @@ class Optimizer; struct OptimizerRule; class ExecutionPlan; -/// @brief moves document materialization from index nodes to sort nodes +/// @brief moves document materialization from index nodes to materialize nodes void lateDocumentMaterializationRule(arangodb::aql::Optimizer* opt, std::unique_ptr plan, arangodb::aql::OptimizerRule const& rule); diff --git a/tests/js/server/aql/aql-optimizer-rule-late-document-materialization-arangosearch.js b/tests/js/server/aql/aql-optimizer-rule-late-document-materialization-arangosearch.js index 48480c0ba8..ed39bbd1b2 100644 --- a/tests/js/server/aql/aql-optimizer-rule-late-document-materialization-arangosearch.js +++ b/tests/js/server/aql/aql-optimizer-rule-late-document-materialization-arangosearch.js @@ -211,6 +211,24 @@ function lateDocumentMaterializationArangoSearchRuleTestSuite () { }); assertEqual(0, expected.size); }, + testQueryResultsWithBetweenCalc() { + let query = "FOR d IN " + svn + " SEARCH d.value IN [1,2, 11, 12] SORT BM25(d) LET c = NOOPT(CONCAT(d._key, '-C')) LIMIT 10 RETURN c "; + let plan = AQL_EXPLAIN(query).plan; + if (!isCluster) { + assertNotEqual(-1, plan.rules.indexOf(ruleName)); + let result = AQL_EXECUTE(query); + assertEqual(4, result.json.length); + let expected = new Set(['c1-C', 'c2-C', 'c_1-C', 'c_2-C']); + result.json.forEach(function(doc) { + assertTrue(expected.has(doc)); + expected.delete(doc); + }); + assertEqual(0, expected.size); + } else { + // on cluster this will not be applied as calculation node will be moved up + assertEqual(-1, plan.rules.indexOf(ruleName)); + } + }, testQueryResultsSkipSome() { let query = "FOR d IN " + vn + " SEARCH PHRASE(d.str, 'cat', 'text_en') SORT TFIDF(d) DESC LIMIT 4, 1 RETURN d "; let plan = AQL_EXPLAIN(query).plan; diff --git a/tests/js/server/aql/aql-optimizer-rule-late-document-materialization-rocksdb.js b/tests/js/server/aql/aql-optimizer-rule-late-document-materialization-rocksdb.js index aa53ba7f5c..8812ac2424 100644 --- a/tests/js/server/aql/aql-optimizer-rule-late-document-materialization-rocksdb.js +++ b/tests/js/server/aql/aql-optimizer-rule-late-document-materialization-rocksdb.js @@ -202,6 +202,49 @@ function lateDocumentMaterializationRuleTestSuite () { let plan = AQL_EXPLAIN(query).plan; assertEqual(-1, plan.rules.indexOf(ruleName)); }, + testNotAppliedDueToSubqueryWithDocumentAccess() { + for (i = 0; i < numOfCollectionIndexes; ++i) { + let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' " + + "LET a = NOOPT(d.obj.b) " + + "LET e = SUM(FOR c IN " + collectionNames[(i + 1) % numOfCollectionIndexes] + " LET p = CONCAT(d, c.obj.a) RETURN p) " + + "SORT CONCAT(a, e) LIMIT 10 RETURN d"; + let plan = AQL_EXPLAIN(query).plan; + assertEqual(-1, plan.rules.indexOf(ruleName)); + } + }, + testNotAppliedDueToSubqueryWithDocumentAccessByAttribute() { // should be supported later + for (i = 0; i < numOfCollectionIndexes; ++i) { + let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' " + + "LET a = NOOPT(d.obj.b) " + + "LET e = SUM(FOR c IN " + collectionNames[(i + 1) % numOfCollectionIndexes] + " LET p = CONCAT(d.obj.a, c.obj.a) RETURN p) " + + "SORT CONCAT(a, e) LIMIT 10 RETURN d"; + let plan = AQL_EXPLAIN(query).plan; + assertEqual(-1, plan.rules.indexOf(ruleName)); + } + }, + testQueryResultsWithSubqueryWithoutDocumentAccess() { + for (i = 0; i < numOfCollectionIndexes; ++i) { + let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' " + + "LET a = NOOPT(d.obj.b) " + + "LET e = SUM(FOR c IN " + collectionNames[(i + 1) % numOfCollectionIndexes] + " LET p = CONCAT(c.obj.b, c.obj.a) RETURN p) " + + "SORT CONCAT(a, e) LIMIT 10 RETURN d"; + let plan = AQL_EXPLAIN(query).plan; + if (!isCluster) { + assertNotEqual(-1, plan.rules.indexOf(ruleName)); + let result = AQL_EXECUTE(query); + assertEqual(2, result.json.length); + let expectedKeys = new Set(['c0', 'c2']); + result.json.forEach(function(doc) { + assertTrue(expectedKeys.has(doc._key)); + expectedKeys.delete(doc._key); + }); + assertEqual(0, expectedKeys.size); + } else { + // on cluster this will not be applied as remote node placed before sort node + assertEqual(-1, plan.rules.indexOf(ruleName)); + } + } + }, testQueryResultsWithCalculation() { for (i = 0; i < numOfCollectionIndexes; ++i) { let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' LET c = CONCAT(d.obj.b, RAND()) SORT c LIMIT 10 RETURN d"; @@ -279,6 +322,26 @@ function lateDocumentMaterializationRuleTestSuite () { assertEqual(0, expected.size); } }, + testQueryResultsWithBetweenCalc() { + for (i = 0; i < numOfCollectionIndexes; ++i) { + let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' SORT d.obj.c LET c = CONCAT(NOOPT(d.obj.d), '-C') LIMIT 10 RETURN c"; + let plan = AQL_EXPLAIN(query).plan; + if (!isCluster) { + assertNotEqual(-1, plan.rules.indexOf(ruleName)); + let result = AQL_EXECUTE(query); + assertEqual(2, result.json.length); + let expected = new Set(['d_val-C', 'd_val_2-C']); + result.json.forEach(function(doc) { + assertTrue(expected.has(doc)); + expected.delete(doc); + }); + assertEqual(0, expected.size); + } else { + // on cluster this will not be applied as calculation node will be moved up + assertEqual(-1, plan.rules.indexOf(ruleName)); + } + } + }, testQueryResultsSkipSome() { for (i = 0; i < numOfCollectionIndexes; ++i) { let query = "FOR d IN " + collectionNames[i] + " FILTER d.obj.a == 'a_val' SORT d.obj.c DESC LIMIT 1, 1 RETURN d"; @@ -300,7 +363,7 @@ function lateDocumentMaterializationRuleTestSuite () { }, testQueryResultsInSubquery() { for (i = 0; i < numOfCollectionIndexes; ++i) { - let query = "FOR c IN " + collectionNames[i % numOfCollectionIndexes] + " FILTER c.obj.a == 'a_val_1' " + + let query = "FOR c IN " + collectionNames[i] + " FILTER c.obj.a == 'a_val_1' " + "FOR d IN " + collectionNames[(i + 1) % numOfCollectionIndexes] + " FILTER c.obj.a == d.obj.a SORT d.obj.c LIMIT 10 RETURN d"; let plan = AQL_EXPLAIN(query).plan; assertNotEqual(-1, plan.rules.indexOf(ruleName)); @@ -316,7 +379,7 @@ function lateDocumentMaterializationRuleTestSuite () { }, testQueryResultsInOuterSubquery() { for (i = 0; i < numOfCollectionIndexes; ++i) { - let query = "FOR c IN " + collectionNames[i % numOfCollectionIndexes] + " FILTER c.obj.a == 'a_val_1' SORT c.obj.c LIMIT 10 " + + let query = "FOR c IN " + collectionNames[i] + " FILTER c.obj.a == 'a_val_1' SORT c.obj.c LIMIT 10 " + "FOR d IN " + collectionNames[(i + 1) % numOfCollectionIndexes] + " FILTER c.obj.a == d.obj.a RETURN d"; let plan = AQL_EXPLAIN(query).plan; assertNotEqual(-1, plan.rules.indexOf(ruleName));