From 205736a02866056c2e05677967806a1c46ada57d Mon Sep 17 00:00:00 2001 From: Jan Steemann Date: Sun, 15 Feb 2015 13:29:53 +0100 Subject: [PATCH] added optimizer rule "propagate-constant-attributes" --- CHANGELOG | 8 + Documentation/Books/Users/Aql/Optimizer.mdpp | 10 +- arangod/Aql/Optimizer.cpp | 6 + arangod/Aql/Optimizer.h | 5 +- arangod/Aql/OptimizerRules.cpp | 262 ++++++++++++++++++ arangod/Aql/OptimizerRules.h | 6 + .../js/modules/org/arangodb/aql/explainer.js | 5 + .../modules/org/arangodb/aql/explainer.js | 5 + js/server/tests/aql-optimizer-indexes.js | 86 +++++- ...change-adjacent-enumerations-noncluster.js | 2 +- 10 files changed, 387 insertions(+), 8 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index a4801b3586..9a1272a55d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,14 @@ v2.5.0 (XXXX-XX-XX) ------------------- +* added optimizer rule `propagate-constant-attributes` + + This rule will look inside `FILTER` conditions for constant value equality comparisons, + and insert the constant values in other places in `FILTER`s. For example, the rule will + insert `42` instead of `i.value` in the second `FILTER` of the following query: + + FOR i IN c1 FOR j IN c2 FILTER i.value == 42 FILTER j.value == i.value RETURN 1 + * added `filtered` value to AQL query execution statistics This value indicates how many documents were filtered by `FilterNode`s in the AQL query. diff --git a/Documentation/Books/Users/Aql/Optimizer.mdpp b/Documentation/Books/Users/Aql/Optimizer.mdpp index 7b22ade0cd..b71888e570 100644 --- a/Documentation/Books/Users/Aql/Optimizer.mdpp +++ b/Documentation/Books/Users/Aql/Optimizer.mdpp @@ -365,13 +365,15 @@ The following optimizer rules may appear in the `rules` attribute of a plan: optimizations). * `remove-redundant-sorts`: will appear if multiple *SORT* statements can be merged into fewer sorts. -* `remove-collect-into`: will appear if an *INTO* clause was removed from a *COLLECT* - statement because the result of *INTO* is not used. -* `remove-sort-rand`: will appear when a *SORT RAND()* expression is removed by - moving the random iteration into an *EnumerateCollectionNode*. * `interchange-adjacent-enumerations`: will appear if a query contains multiple *FOR* statements whose order were permuted. Permutation of *FOR* statements is performed because it may enable further optimizations by other rules. +* `remove-sort-rand`: will appear when a *SORT RAND()* expression is removed by + moving the random iteration into an *EnumerateCollectionNode*. +* `remove-collect-into`: will appear if an *INTO* clause was removed from a *COLLECT* + statement because the result of *INTO* is not used. +* `propagate-constant-attributes`: will appear when a constant value was inserted + into a filter condition, replacing a dynamic attribute value. * `replace-or-with-in`: will appear if multiple *OR*-combined equality conditions on the same variable or attribute were replaced with an *IN* condition. * `remove-redundant-or`: will appear if multiple *OR* conditions for the same variable diff --git a/arangod/Aql/Optimizer.cpp b/arangod/Aql/Optimizer.cpp index d38ce50313..51cfd24a29 100644 --- a/arangod/Aql/Optimizer.cpp +++ b/arangod/Aql/Optimizer.cpp @@ -483,6 +483,12 @@ void Optimizer::setupRules () { removeCollectIntoRule_pass5, true); + // propagate constant attributes in FILTERs + registerRule("propagate-constant-attributes", + propagateConstantAttributesRule, + propagateConstantAttributesRule_pass5, + true); + ////////////////////////////////////////////////////////////////////////////// /// "Pass 6": use indexes if possible for FILTER and/or SORT nodes ////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/Aql/Optimizer.h b/arangod/Aql/Optimizer.h index cf33fcb564..f33889f536 100644 --- a/arangod/Aql/Optimizer.h +++ b/arangod/Aql/Optimizer.h @@ -161,6 +161,9 @@ namespace triagens { // remove INTO for COLLECT if appropriate removeCollectIntoRule_pass5 = 750, + // propagate constant attributes in FILTERs + propagateConstantAttributesRule_pass5 = 760, + ////////////////////////////////////////////////////////////////////////////// /// "Pass 6": use indexes if possible for FILTER and/or SORT nodes ////////////////////////////////////////////////////////////////////////////// @@ -650,7 +653,7 @@ namespace triagens { /// @brief default value for maximal number of plans to produce //////////////////////////////////////////////////////////////////////////////// - static size_t const DefaultMaxNumberOfPlans = 256; + static size_t const DefaultMaxNumberOfPlans = 192; }; diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index c84ecebb93..5002a735fb 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -503,6 +503,268 @@ int triagens::aql::removeCollectIntoRule (Optimizer* opt, return TRI_ERROR_NO_ERROR; } +// ----------------------------------------------------------------------------- +// --SECTION-- helper class for propagateConstantAttributesRule +// ----------------------------------------------------------------------------- + +class PropagateConstantAttributesHelper { + + public: + + PropagateConstantAttributesHelper () + : _constants(), + _modified(false) { + } + + bool modified () const { + return _modified; + } + +//////////////////////////////////////////////////////////////////////////////// +/// @brief inspects a plan and propages constant values in expressions +//////////////////////////////////////////////////////////////////////////////// + + void propagateConstants (ExecutionPlan* plan) { + std::vector&& nodes = plan->findNodesOfType(EN::FILTER, true); + + for (auto node : nodes) { + auto fn = static_cast(node); + + auto inVar = fn->getVariablesUsedHere(); + TRI_ASSERT(inVar.size() == 1); + + auto setter = plan->getVarSetBy(inVar[0]->id); + if (setter != nullptr && + setter->getType() == EN::CALCULATION) { + auto cn = static_cast(setter); + auto expression = cn->expression(); + + if (expression != nullptr) { + collectConstantAttributes(const_cast(expression->node())); + } + } + } + + if (! _constants.empty()) { + for (auto node : nodes) { + auto fn = static_cast(node); + + auto inVar = fn->getVariablesUsedHere(); + TRI_ASSERT(inVar.size() == 1); + + auto setter = plan->getVarSetBy(inVar[0]->id); + if (setter != nullptr && + setter->getType() == EN::CALCULATION) { + auto cn = static_cast(setter); + auto expression = cn->expression(); + + if (expression != nullptr) { + insertConstantAttributes(const_cast(expression->node())); + } + } + } + } + } + + private: + + AstNode const* getConstant (Variable const* variable, + std::string const& attribute) const { + auto it = _constants.find(variable); + + if (it == _constants.end()) { + return nullptr; + } + + auto it2 = (*it).second.find(attribute); + + if (it2 == (*it).second.end()) { + return nullptr; + } + + return (*it2).second; + } + +//////////////////////////////////////////////////////////////////////////////// +/// @brief inspects an expression (recursively) and notes constant attribute +/// values so they can be propagated later +//////////////////////////////////////////////////////////////////////////////// + + void collectConstantAttributes (AstNode* node) { + if (node == nullptr) { + return; + } + + if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) { + auto lhs = node->getMember(0); + auto rhs = node->getMember(1); + + collectConstantAttributes(lhs); + collectConstantAttributes(rhs); + } + else if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) { + auto lhs = node->getMember(0); + auto rhs = node->getMember(1); + + if (lhs->isConstant() && rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) { + inspectConstantAttribute(rhs, lhs); + } + else if (rhs->isConstant() && lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) { + inspectConstantAttribute(lhs, rhs); + } + } + } + +//////////////////////////////////////////////////////////////////////////////// +/// @brief traverses an AST part recursively and patches it by inserting +/// constant values +//////////////////////////////////////////////////////////////////////////////// + + void insertConstantAttributes (AstNode* node) { + if (node == nullptr) { + return; + } + + if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) { + auto lhs = node->getMember(0); + auto rhs = node->getMember(1); + + insertConstantAttributes(lhs); + insertConstantAttributes(rhs); + } + else if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) { + auto lhs = node->getMember(0); + auto rhs = node->getMember(1); + + if (! lhs->isConstant() && rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) { + insertConstantAttribute(node, 1); + } + if (! rhs->isConstant() && lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) { + insertConstantAttribute(node, 0); + } + } + } + +//////////////////////////////////////////////////////////////////////////////// +/// @brief extract an attribute and its variable from an attribute access +/// (e.g. `a.b.c` will return variable `a` and attribute name `b.c.`. +//////////////////////////////////////////////////////////////////////////////// + + bool getAttribute (AstNode const* attribute, + Variable const*& variable, + std::string& name) { + TRI_ASSERT(attribute != nullptr && + attribute->type == NODE_TYPE_ATTRIBUTE_ACCESS); + TRI_ASSERT(name.empty()); + + while (attribute->type == NODE_TYPE_ATTRIBUTE_ACCESS) { + char const* attributeName = attribute->getStringValue(); + + TRI_ASSERT(attributeName != nullptr); + name = std::string(".") + std::string(attributeName) + name; + attribute = attribute->getMember(0); + } + + if (attribute->type != NODE_TYPE_REFERENCE) { + return false; + } + + variable = static_cast(attribute->getData()); + TRI_ASSERT(variable != nullptr); + + return true; + } + +//////////////////////////////////////////////////////////////////////////////// +/// @brief inspect the constant value assigned to an attribute +/// the attribute value will be stored so it can be inserted for the attribute +/// later +//////////////////////////////////////////////////////////////////////////////// + + void inspectConstantAttribute (AstNode const* attribute, + AstNode const* value) { + Variable const* variable = nullptr; + std::string name; + + if (! getAttribute(attribute, variable, name)) { + return; + } + + auto it = _constants.find(variable); + + if (it == _constants.end()) { + _constants.emplace(std::make_pair(variable, std::unordered_map{ { name, value } })); + return; + } + + auto it2 = (*it).second.find(name); + + if (it2 == (*it).second.end()) { + // first value for the attribute + (*it).second.insert(std::make_pair(name, value)); + } + else { + auto previous = (*it2).second; + + if (previous == nullptr) { + // we have multiple different values for the attribute. better not use this attribute + return; + } + + if (TRI_CompareValuesJson(value->computeJson(), previous->computeJson(), true) != 0) { + // different value found for an already tracked attribute. better not use this attribute + (*it2).second = nullptr; + } + } + } + +//////////////////////////////////////////////////////////////////////////////// +/// @brief patches an AstNode by inserting a constant value into it +//////////////////////////////////////////////////////////////////////////////// + + void insertConstantAttribute (AstNode* parentNode, + size_t accessIndex) { + Variable const* variable = nullptr; + std::string name; + + if (! getAttribute(parentNode->getMember(accessIndex), variable, name)) { + return; + } + + auto constantValue = getConstant(variable, name); + + if (constantValue != nullptr) { + parentNode->changeMember(accessIndex, const_cast(constantValue)); + _modified = true; + } + } + + std::unordered_map> _constants; + + bool _modified; +}; + +//////////////////////////////////////////////////////////////////////////////// +/// @brief propagate constant attributes in FILTERs +//////////////////////////////////////////////////////////////////////////////// + +int triagens::aql::propagateConstantAttributesRule (Optimizer* opt, + ExecutionPlan* plan, + Optimizer::Rule const* rule) { + PropagateConstantAttributesHelper helper; + helper.propagateConstants(plan); + + bool const modified = helper.modified(); + + if (modified) { + plan->findVarUsage(); + } + + opt->addPlan(plan, rule->level, modified); + + return TRI_ERROR_NO_ERROR; +} + //////////////////////////////////////////////////////////////////////////////// /// @brief remove SORT RAND() if appropriate //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/Aql/OptimizerRules.h b/arangod/Aql/OptimizerRules.h index 805d2acbda..4c90a458be 100644 --- a/arangod/Aql/OptimizerRules.h +++ b/arangod/Aql/OptimizerRules.h @@ -68,6 +68,12 @@ namespace triagens { int removeCollectIntoRule (Optimizer*, ExecutionPlan*, Optimizer::Rule const*); +//////////////////////////////////////////////////////////////////////////////// +/// @brief propagate constant attributes in FILTERs +//////////////////////////////////////////////////////////////////////////////// + + int propagateConstantAttributesRule (Optimizer*, ExecutionPlan*, Optimizer::Rule const*); + //////////////////////////////////////////////////////////////////////////////// /// @brief remove SORT RAND() if appropriate //////////////////////////////////////////////////////////////////////////////// diff --git a/js/apps/system/aardvark/frontend/js/modules/org/arangodb/aql/explainer.js b/js/apps/system/aardvark/frontend/js/modules/org/arangodb/aql/explainer.js index 35606b01ca..8ed26bec14 100644 --- a/js/apps/system/aardvark/frontend/js/modules/org/arangodb/aql/explainer.js +++ b/js/apps/system/aardvark/frontend/js/modules/org/arangodb/aql/explainer.js @@ -371,6 +371,11 @@ function processQuery (query, explain) { ranges.forEach(function(range) { var attr = range.attr; + if (range.lowConst.hasOwnProperty("bound") && range.highConst.hasOwnProperty("bound") && + JSON.stringify(range.lowConst.bound) === JSON.stringify(range.highConst.bound)) { + range.equality = true; + } + if (range.equality) { if (range.lowConst.hasOwnProperty("bound")) { results.push(buildBound(attr, [ "==", "==" ], range.lowConst)); diff --git a/js/common/modules/org/arangodb/aql/explainer.js b/js/common/modules/org/arangodb/aql/explainer.js index c50940c253..d96718c2d8 100644 --- a/js/common/modules/org/arangodb/aql/explainer.js +++ b/js/common/modules/org/arangodb/aql/explainer.js @@ -370,6 +370,11 @@ function processQuery (query, explain) { ranges.forEach(function(range) { var attr = range.attr; + if (range.lowConst.hasOwnProperty("bound") && range.highConst.hasOwnProperty("bound") && + JSON.stringify(range.lowConst.bound) === JSON.stringify(range.highConst.bound)) { + range.equality = true; + } + if (range.equality) { if (range.lowConst.hasOwnProperty("bound")) { results.push(buildBound(attr, [ "==", "==" ], range.lowConst)); diff --git a/js/server/tests/aql-optimizer-indexes.js b/js/server/tests/aql-optimizer-indexes.js index a6e9099451..7bc468af2b 100644 --- a/js/server/tests/aql-optimizer-indexes.js +++ b/js/server/tests/aql-optimizer-indexes.js @@ -54,6 +54,88 @@ function optimizerIndexesTestSuite () { db._drop("UnitTestsCollection"); }, +//////////////////////////////////////////////////////////////////////////////// +/// @brief test index usage +//////////////////////////////////////////////////////////////////////////////// + + testValuePropagation : function () { + var queries = [ + "FOR i IN " + c.name() + " FOR j IN " + c.name() + " FILTER i.value == 10 && i.value == j.value RETURN i.value", + "FOR i IN " + c.name() + " FOR j IN " + c.name() + " FILTER i.value == 10 FiLTER i.value == j.value RETURN i.value", + "FOR i IN " + c.name() + " FOR j IN " + c.name() + " FILTER i.value == 10 FiLTER j.value == i.value RETURN i.value", + "FOR i IN " + c.name() + " FOR j IN " + c.name() + " FILTER i.value == j.value && i.value == 10 RETURN i.value", + "FOR i IN " + c.name() + " FOR j IN " + c.name() + " FILTER i.value == j.value FILTER i.value == 10 RETURN i.value", + "FOR i IN " + c.name() + " FILTER i.value == 10 FOR j IN " + c.name() + " FILTER i.value == j.value RETURN i.value", + "FOR i IN " + c.name() + " FILTER i.value == 10 FOR j IN " + c.name() + " FILTER j.value == i.value RETURN i.value", + "FOR i IN " + c.name() + " FOR j IN " + c.name() + " FILTER 10 == i.value && i.value == j.value RETURN i.value", + "FOR i IN " + c.name() + " FOR j IN " + c.name() + " FILTER 10 == i.value FiLTER i.value == j.value RETURN i.value", + "FOR i IN " + c.name() + " FOR j IN " + c.name() + " FILTER 10 == i.value FiLTER j.value == i.value RETURN i.value", + "FOR i IN " + c.name() + " FOR j IN " + c.name() + " FILTER i.value == j.value && 10 == i.value RETURN i.value", + "FOR i IN " + c.name() + " FOR j IN " + c.name() + " FILTER i.value == j.value FILTER 10 == i.value RETURN i.value", + "FOR i IN " + c.name() + " FILTER 10 == i.value FOR j IN " + c.name() + " FILTER i.value == j.value RETURN i.value", + "FOR i IN " + c.name() + " FILTER 10 == i.value FOR j IN " + c.name() + " FILTER j.value == i.value RETURN i.value" + ]; + + queries.forEach(function(query) { + var plan = AQL_EXPLAIN(query).plan; + var indexNodes = 0; + plan.nodes.map(function(node) { + if (node.type === "IndexRangeNode") { + ++indexNodes; + } + }); + + assertNotEqual(-1, plan.rules.indexOf("propagate-constant-attributes")); + assertEqual(2, indexNodes); + + var results = AQL_EXECUTE(query); + assertEqual([ 10 ], results.json, query); + assertEqual(0, results.stats.scannedFull); + assertTrue(results.stats.scannedIndex > 0); + }); + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test index usage +//////////////////////////////////////////////////////////////////////////////// + + testValuePropagationSubquery : function () { + var query = "FOR i IN " + c.name() + " FILTER i.value == 10 " + + "LET sub1 = (FOR j IN " + c.name() + " FILTER j.value == i.value RETURN j.value) " + + "LET sub2 = (FOR j IN " + c.name() + " FILTER j.value == i.value RETURN j.value) " + + "LET sub3 = (FOR j IN " + c.name() + " FILTER j.value == i.value RETURN j.value) " + + "RETURN [ i.value, sub1, sub2, sub3 ]"; + + var plan = AQL_EXPLAIN(query).plan; + + assertNotEqual(-1, plan.rules.indexOf("propagate-constant-attributes")); + + var results = AQL_EXECUTE(query); + assertEqual([ [ 10, [ 10 ], [ 10 ], [ 10 ] ] ], results.json, query); + assertEqual(0, results.stats.scannedFull); + assertTrue(results.stats.scannedIndex > 0); + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test index usage +//////////////////////////////////////////////////////////////////////////////// + + testNoValuePropagationSubquery : function () { + var query = "LET sub1 = (FOR j IN " + c.name() + " FILTER j.value == 10 RETURN j.value) " + + "LET sub2 = (FOR j IN " + c.name() + " FILTER j.value == 11 RETURN j.value) " + + "LET sub3 = (FOR j IN " + c.name() + " FILTER j.value == 12 RETURN j.value) " + + "RETURN [ sub1, sub2, sub3 ]"; + + var plan = AQL_EXPLAIN(query).plan; + + assertEqual(-1, plan.rules.indexOf("propagate-constant-attributes")); + + var results = AQL_EXECUTE(query); + assertEqual([ [ [ 10 ], [ 11 ], [ 12 ] ] ], results.json, query); + assertEqual(0, results.stats.scannedFull); + assertTrue(results.stats.scannedIndex > 0); + }, + //////////////////////////////////////////////////////////////////////////////// /// @brief test index usage //////////////////////////////////////////////////////////////////////////////// @@ -476,7 +558,7 @@ function optimizerIndexesTestSuite () { assertEqual(0, collectionNodes); assertEqual(3, indexNodes); - assertEqual(12, explain.stats.plansCreated); + assertEqual(18, explain.stats.plansCreated); var results = AQL_EXECUTE(query); assertEqual(0, results.stats.scannedFull); @@ -532,7 +614,7 @@ function optimizerIndexesTestSuite () { assertEqual(0, collectionNodes); assertEqual(20, indexNodes); - assertEqual(36, explain.stats.plansCreated); + assertEqual(64, explain.stats.plansCreated); var results = AQL_EXECUTE(query); assertEqual(0, results.stats.scannedFull); diff --git a/js/server/tests/aql-optimizer-rule-interchange-adjacent-enumerations-noncluster.js b/js/server/tests/aql-optimizer-rule-interchange-adjacent-enumerations-noncluster.js index 7562ddce65..cc6267eb97 100644 --- a/js/server/tests/aql-optimizer-rule-interchange-adjacent-enumerations-noncluster.js +++ b/js/server/tests/aql-optimizer-rule-interchange-adjacent-enumerations-noncluster.js @@ -229,7 +229,7 @@ function optimizerRuleTestSuite () { "FOR o IN " + collectionName + " RETURN 1"; var explain = AQL_EXPLAIN(query); - assertEqual(256, explain.stats.plansCreated); // default limit enforced by optimizer + assertEqual(192, explain.stats.plansCreated); // default limit enforced by optimizer }, ////////////////////////////////////////////////////////////////////////////////