From 338a9c6c5402df0a4136808fb590cbba3a11b69f Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Thu, 28 Aug 2014 13:28:04 +0200 Subject: [PATCH] Implement permutation of EnumerateCollectionNodes. Also: restructure leveling of rules. And: remove some debugging output (and add some other). --- arangod/Aql/ExecutionBlock.cpp | 6 -- arangod/Aql/ExecutionPlan.cpp | 7 +- arangod/Aql/ExecutionPlan.h | 7 +- arangod/Aql/Optimizer.cpp | 58 +++++++++---- arangod/Aql/OptimizerRules.cpp | 149 +++++++++++++++++++++++++++++---- 5 files changed, 183 insertions(+), 44 deletions(-) diff --git a/arangod/Aql/ExecutionBlock.cpp b/arangod/Aql/ExecutionBlock.cpp index cedcc42ba8..308b35f53f 100644 --- a/arangod/Aql/ExecutionBlock.cpp +++ b/arangod/Aql/ExecutionBlock.cpp @@ -364,7 +364,6 @@ void ExecutionBlock::clearRegisters (AqlItemBlock* result) { } size_t ExecutionBlock::skipSome (size_t atLeast, size_t atMost) { - std::cout << "ExecutionBlock::skipSome\n"; TRI_ASSERT(0 < atLeast && atLeast <= atMost); size_t skipped = 0; AqlItemBlock* result = nullptr; @@ -379,7 +378,6 @@ size_t ExecutionBlock::skipSome (size_t atLeast, size_t atMost) { // skip exactly outputs, returns if _done after // skipping, and otherwise . . . bool ExecutionBlock::skip (size_t number) { - std::cout << "ExecutionBlock::skip\n"; size_t skipped = skipSome(number, number); size_t nr = skipped; while ( nr != 0 && skipped < number ){ @@ -421,7 +419,6 @@ int ExecutionBlock::getOrSkipSome (size_t atLeast, AqlItemBlock*& result, size_t& skipped) { - std::cout << "ExecutionBlock::getOrSkipSome\n"; TRI_ASSERT(result == nullptr && skipped == 0); if (_done) { return TRI_ERROR_NO_ERROR; @@ -855,7 +852,6 @@ int IndexRangeBlock::initCursor (AqlItemBlock* items, size_t pos) { AqlItemBlock* IndexRangeBlock::getSome (size_t atLeast, size_t atMost) { - std::cout << "IndexRangeBlock::getSome\n"; if (_done) { return nullptr; } @@ -929,8 +925,6 @@ AqlItemBlock* IndexRangeBlock::getSome (size_t atLeast, size_t IndexRangeBlock::skipSome (size_t atLeast, size_t atMost) { - std::cout << "IndexRangeBlock::skipSome\n"; - if (_done) { return 0; } diff --git a/arangod/Aql/ExecutionPlan.cpp b/arangod/Aql/ExecutionPlan.cpp index 5f90ba67de..aefb408d00 100644 --- a/arangod/Aql/ExecutionPlan.cpp +++ b/arangod/Aql/ExecutionPlan.cpp @@ -1068,8 +1068,11 @@ void ExecutionPlan::replaceNode (ExecutionNode* oldNode, } //////////////////////////////////////////////////////////////////////////////// -/// @brief insert before . must be registered with -/// the plan before this method is called +/// @brief insert as a new (the first!) dependency of +/// and make the former first dependency of a +/// dependency of (and no longer a direct dependency of +/// ). +/// must be registered with the plan before this method is called. //////////////////////////////////////////////////////////////////////////////// void ExecutionPlan::insertDependency (ExecutionNode* oldNode, diff --git a/arangod/Aql/ExecutionPlan.h b/arangod/Aql/ExecutionPlan.h index 11592cc9a8..676a8ff5cc 100644 --- a/arangod/Aql/ExecutionPlan.h +++ b/arangod/Aql/ExecutionPlan.h @@ -208,8 +208,11 @@ namespace triagens { ExecutionNode* newNode); //////////////////////////////////////////////////////////////////////////////// -/// @brief insert before . must be registered with -/// the plan before this method is called +/// @brief insert as a new (the first!) dependency of +/// and make the former first dependency of a +/// dependency of (and no longer a direct dependency of +/// ). +/// must be registered with the plan before this method is called. //////////////////////////////////////////////////////////////////////////////// void insertDependency (ExecutionNode* oldNode, diff --git a/arangod/Aql/Optimizer.cpp b/arangod/Aql/Optimizer.cpp index 07ecc37ea4..318927f19d 100644 --- a/arangod/Aql/Optimizer.cpp +++ b/arangod/Aql/Optimizer.cpp @@ -132,11 +132,12 @@ int Optimizer::createPlans (ExecutionPlan* plan) { sortPlans(); std::cout << "Optimisation ends with " << _plans.size() << " plans." << std::endl; - std::cout << "Costs:" << std::endl; for (auto p : _plans.list) { - std::cout << p->getCost() << std::endl; + p->show(); + std::cout << "costing: " << p->getCost() << std::endl; + std::cout << std::endl; } - + return TRI_ERROR_NO_ERROR; } @@ -171,27 +172,52 @@ void Optimizer::setupRules () { // List all the rules in the system here: - // try to find sort blocks which are superseeded by indexes - registerRule("use-index-for-sort", useIndexForSort, 2000); + ////////////////////////////////////////////////////////////////////////////// + // "Pass 1": moving nodes "up" (potentially outside loops): + // please use levels between 1 and 99 here + ////////////////////////////////////////////////////////////////////////////// + // move calculations up the dependency chain (to pull them out of + // inner loops etc.) + registerRule("move-calculations-up", moveCalculationsUpRule, 10); - // try to find a filter after an enumerate collection and find an index . . . - registerRule("use-index-range", useIndexRange, 999); + // move filters up the dependency chain (to make result sets as small + // as possible as early as possible) + registerRule("move-filters-up", moveFiltersUpRule, 20); + + ////////////////////////////////////////////////////////////////////////////// + /// "Pass 2": interchange EnumerateCollection nodes in all possible ways + /// this is level 100, please never let new plans from higher + /// levels go back to this or lower levels! + ////////////////////////////////////////////////////////////////////////////// + + registerRule("interchangeAdjacentEnumerations", + interchangeAdjacentEnumerations, 100); + + ////////////////////////////////////////////////////////////////////////////// + /// "Pass 3": try to remove redundant or unnecessary nodes + /// use levels between 101 and 199 for this + ////////////////////////////////////////////////////////////////////////////// // remove filters from the query that are not necessary at all // filters that are always true will be removed entirely // filters that are always false will be replaced with a NoResults node - registerRule("remove-unnecessary-filters", removeUnnecessaryFiltersRule, 100); + registerRule("remove-unnecessary-filters", removeUnnecessaryFiltersRule, 110); - // move calculations up the dependency chain (to pull them out of inner loops etc.) - registerRule("move-calculations-up", moveCalculationsUpRule, 1000); - - // move filters up the dependency chain (to make result sets as small as possible - // as early as possible) - registerRule("move-filters-up", moveFiltersUpRule, 1010); - // remove calculations that are never necessary - registerRule("remove-unnecessary-calculations", removeUnnecessaryCalculationsRule, 1020); + registerRule("remove-unnecessary-calculations", + removeUnnecessaryCalculationsRule, 120); + + ////////////////////////////////////////////////////////////////////////////// + /// "Pass 4": use indexes if possible for FILTER and/or SORT nodes + /// use levels between 200 and 299 for this + ////////////////////////////////////////////////////////////////////////////// + + // try to find a filter after an enumerate collection and find an index . . . + registerRule("use-index-range", useIndexRange, 210); + + // try to find sort blocks which are superseeded by indexes + registerRule("use-index-for-sort", useIndexForSort, 220); // Now sort them by level: std::stable_sort(_rules.begin(), _rules.end()); diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 3dc320863e..bc382cfcdb 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -825,6 +825,40 @@ int triagens::aql::useIndexForSort (Optimizer* opt, return TRI_ERROR_NO_ERROR; } +//////////////////////////////////////////////////////////////////////////////// +/// @brief helper to compute lots of permutation tuples +/// a permutation tuple is represented as a single vector together with +/// another vector describing the boundaries of the tuples. +/// Example: +/// data: 0,1,2, 3,4, 5,6 +/// starts: 0, 3, 5, (indices of starts of sections) +/// means a tuple of 3 permutations of 3, 2 and 2 points respectively +/// This function computes the next permutation tuple among the +/// lexicographically sorted list of all such tuples. It returns true +/// if it has successfully computed this and false if the tuple is already +/// the lexicographically largest one. If false is returned, the permutation +/// tuple is back to the beginning. +//////////////////////////////////////////////////////////////////////////////// + +static bool nextPermutationTuple (std::vector& data, + std::vector& starts) { + auto begin = data.begin(); // a random access iterator + for (size_t i = starts.size(); i-- != 0; ) { + std::vector::iterator from = begin + starts[i]; + std::vector::iterator to; + if (i == starts.size()-1) { + to = data.end(); + } + else { + to = begin + starts[i+1]; + } + if (std::next_permutation(from, to)) { + return true; + } + } + return false; +} + //////////////////////////////////////////////////////////////////////////////// /// @brief interchange adjacent EnumerateCollectionNodes in all possible ways //////////////////////////////////////////////////////////////////////////////// @@ -837,31 +871,110 @@ int triagens::aql::interchangeAdjacentEnumerations (Optimizer* opt, std::vector nodes = plan->findNodesOfType(triagens::aql::ExecutionNode::ENUMERATE_COLLECTION, true); - + std::unordered_set nodesSet; + for (auto n : nodes) { + TRI_ASSERT(nodesSet.find(n) == nodesSet.end()); + nodesSet.insert(n); + } + + std::vector nodesToPermute; + std::vector permTuple; + std::vector starts; + // We use that the order of the nodes is such that a node B that is among the // recursive dependencies of a node A is later in the vector. - for (size_t i = 0; i < nodes.size(); i++) { - ExecutionNode* n = nodes[i]; - std::vector nn; - nn.push_back(n); - // Now follow the dependencies as long as we see further such nodes: - while (true) { - auto deps = n->getDependencies(); - if (deps.size() == 0) { - break; - } - if (deps[0]->getType() != triagens::aql::ExecutionNode::ENUMERATE_COLLECTION) { - break; - } - n = deps[0]; + for (auto n : nodes) { + + if (nodesSet.find(n) != nodesSet.end()) { + std::vector nn; nn.push_back(n); - } - if (nn.size() > 1) { - // Now we want to compute all permutations of nn + nodesSet.erase(n); + + // Now follow the dependencies as long as we see further such nodes: + auto nwalker = n; + while (true) { + auto deps = nwalker->getDependencies(); + if (deps.size() == 0) { + break; + } + if (deps[0]->getType() != + triagens::aql::ExecutionNode::ENUMERATE_COLLECTION) { + break; + } + nwalker = deps[0]; + nn.push_back(nwalker); + nodesSet.erase(nwalker); + } + if (nn.size() > 1) { + // Move it into the permutation tuple: + starts.push_back(permTuple.size()); + for (auto nnn : nn) { + nodesToPermute.push_back(nnn); + permTuple.push_back(permTuple.size()); + } + } } } + // Now we have collected all the runs of EnumerateCollectionNodes in the + // plan, we need to compute all possible permutations of all of them, + // independently. This is why we need to compute all permutation tuples. + out.push_back(plan, level); + if (! starts.empty()) { + nextPermutationTuple(permTuple, starts); // will never return false + do { + // Clone the plan: + auto newPlan = plan->clone(); + + try { // get rid of plan if any of this fails + // Find the nodes in the new plan corresponding to the ones in the + // old plan that we want to permute: + std::vector newNodes; + for (size_t j = 0; j < nodesToPermute.size(); j++) { + newNodes.push_back(newPlan->getNodeById(nodesToPermute[j]->id())); + } + + // Now get going with the permutations: + for (size_t i = 0; i < starts.size(); i++) { + size_t lowBound = starts[i]; + size_t highBound = (i < starts.size()-1) + ? starts[i+1] + : permTuple.size(); + // We need to remove the nodes + // newNodes[lowBound..highBound-1] in newPlan and replace + // them by the same ones in a different order, given by + // permTuple[lowBound..highBound-1]. + auto parents = newNodes[lowBound]->getParents(); + TRI_ASSERT(parents.size() == 1); + auto parent = parents[0]; // needed for insertion later + + // Unlink all those nodes: + for (size_t j = lowBound; j < highBound; j++) { + newPlan->unlinkNode(newNodes[j]); + } + + // And insert them in the new order: + for (size_t j = highBound; j-- != lowBound; ) { + newPlan->insertDependency(parent, newNodes[permTuple[j]]); + } + } + + // OK, the new plan is ready, let's report it: + out.push_back(newPlan, level); + + // Stop if this gets out of hand: + if (out.size() > opt->maxNumberOfPlans) { + break; + } + } + catch (...) { + delete newPlan; + throw; + } + + } while(nextPermutationTuple(permTuple, starts)); + } return TRI_ERROR_NO_ERROR; }