From 8a402683ed3fbd2f998ba6b47874121d5971db2c Mon Sep 17 00:00:00 2001 From: Willi Goesgens Date: Fri, 22 Aug 2014 16:48:52 +0200 Subject: [PATCH] Optimizers: start implementing the sorting via indexes. --- arangod/Aql/AstNode.h | 8 ++ arangod/Aql/ExecutionPlan.cpp | 6 +- arangod/Aql/Expression.h | 9 ++ arangod/Aql/Optimizer.cpp | 4 + arangod/Aql/OptimizerRules.cpp | 166 +++++++++++++++++++++++++++++++++ arangod/Aql/OptimizerRules.h | 6 ++ arangod/Aql/Query.cpp | 2 +- 7 files changed, 197 insertions(+), 4 deletions(-) diff --git a/arangod/Aql/AstNode.h b/arangod/Aql/AstNode.h index 6738f5bb98..74819a1320 100644 --- a/arangod/Aql/AstNode.h +++ b/arangod/Aql/AstNode.h @@ -354,6 +354,14 @@ namespace triagens { value.type = type; } +//////////////////////////////////////////////////////////////////////////////// +/// @brief check whether this node value is of expectedType +//////////////////////////////////////////////////////////////////////////////// + + inline bool isValueType (AstNodeValueType expectedType) { + return value.type == expectedType; + } + //////////////////////////////////////////////////////////////////////////////// /// @brief return the bool value of a node //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/Aql/ExecutionPlan.cpp b/arangod/Aql/ExecutionPlan.cpp index 46da2cb43f..d50d16cd21 100644 --- a/arangod/Aql/ExecutionPlan.cpp +++ b/arangod/Aql/ExecutionPlan.cpp @@ -88,13 +88,13 @@ ExecutionPlan* ExecutionPlan::instanciateFromAst (Ast* ast) { plan->_root = plan->fromNode(ast, root); plan->findVarUsage(); // just for debugging -/* - auto JsonPlan = plan->_root->toJson(); + auto JsonPlan = plan->_root->toJson(TRI_UNKNOWN_MEM_ZONE, false); auto JsonString = JsonPlan.toString(); std::cout << JsonString << "\n"; +/* auto otherPlan = ExecutionPlan::instanciateFromJson (ast, JsonPlan); - auto otherJsonString = otherPlan->_root->toJson().toString(); + auto otherJsonString = otherPlan->_root->toJson(TRI_UNKNOWN_MEM_ZONE, false).toString(); std::cout << otherJsonString << "\n"; TRI_ASSERT(otherJsonString == JsonString); return otherPlan; diff --git a/arangod/Aql/Expression.h b/arangod/Aql/Expression.h index 906437221b..13faa8cd01 100644 --- a/arangod/Aql/Expression.h +++ b/arangod/Aql/Expression.h @@ -141,6 +141,15 @@ namespace triagens { std::vector const&, std::vector const&); + +//////////////////////////////////////////////////////////////////////////////// +/// @brief check whether this is a simple expression. +//////////////////////////////////////////////////////////////////////////////// + + bool isSimple () { + return _type == SIMPLE; + } + // ----------------------------------------------------------------------------- // --SECTION-- private functions // ----------------------------------------------------------------------------- diff --git a/arangod/Aql/Optimizer.cpp b/arangod/Aql/Optimizer.cpp index 2bd363991b..b5a029fcc9 100644 --- a/arangod/Aql/Optimizer.cpp +++ b/arangod/Aql/Optimizer.cpp @@ -41,6 +41,10 @@ using namespace triagens::aql; Optimizer::Optimizer () { // List all the rules in the system here: + // try to find sort blocks which are superseeded by indexes + // registerRule (useIndexForSort, 888); + + // try to find a filter after an enumerate collection and find an index . . . registerRule(useIndexRange, 999); diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index d34749f6f7..918b2688d6 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -531,6 +531,172 @@ int triagens::aql::useIndexRange (Optimizer* opt, return TRI_ERROR_NO_ERROR; } + +//////////////////////////////////////////////////////////////////////////////// +/// @brief try to match sorts to indices +//////////////////////////////////////////////////////////////////////////////// + +class sortToIndexNode : public WalkerWorker { + SortNode* _thisNode; + CalculationNode* _thisSortNodesCalculationNode; + RangesInfo* _ranges; + ExecutionPlan* _plan; + std::vector _vars; + std::vector _myVars; + Variable const* _var; + Optimizer::PlanList _out; + ExecutionNode* _prev; + size_t _executionNodesFound; + + public: + sortToIndexNode (ExecutionPlan* plan, + std::vector& vars, + Optimizer::PlanList& out) + : + _thisSortNodesCalculationNode(nullptr), + _plan(plan), + _vars(vars), + _out(out), + _prev(nullptr), + _executionNodesFound(0) { + _ranges = new RangesInfo(); + _myVars.reserve(vars.size()); + } + + void before (ExecutionNode* en) { + std::cout << "type:" << en->getTypeString() << "\n"; + size_t n = _vars.size(); + auto outvar = en->getVariablesSetHere(); + + if ((_executionNodesFound < n) && + en->getType() == triagens::aql::ExecutionNode::CALCULATION) { + + // Look up whether this is one of the calculation nodes we reference. + for (size_t i = 0; i < n; i++) { + if (_vars[i]->id == outvar[0]->id) { + _myVars[i] = static_cast(en); + _executionNodesFound++; + break; + } + } + if (_executionNodesFound == n) { + // ok we got all, study them. + // TODO + } + } + else if (_executionNodesFound == n) { + if (en->getType() == triagens::aql::ExecutionNode::FILTER) { + /// TODO: check whether to ABORT here? + } + if (en->getType() == triagens::aql::ExecutionNode::SORT) { + // TODO: subsequent sort - check whether its still needed. + } + if (en->getType() == triagens::aql::ExecutionNode::INDEX_RANGE) { + // TODO: we should also match INDEX_RANGE later on. + } + else if (en->getType() == triagens::aql::ExecutionNode::ENUMERATE_COLLECTION) { + std::cout << "blub\n"; + + std::vector rangeInfo; + std::vector attrSet; + std::vector attrs; + + auto node = static_cast(en); + auto var = node->getVariablesSetHere()[0]; // should only be 1 + auto exp = _myVars[0]->expression(); + + if (!exp->isSimple()) { + return; + } + + auto expNode = exp->node(); + + // digg through nested Attributes: + while (expNode->type == triagens::aql::NODE_TYPE_ATTRIBUTE_ACCESS) { + attrSet.push_back(expNode->getStringValue()); + expNode = expNode->getMember (0); + } + // we now should have the Collection Reference: + std::cout << var->name << " \n"; + if (expNode->type == triagens::aql::NODE_TYPE_REFERENCE) { + auto subVar = static_cast(expNode->getData()); + if (subVar->name == var->name) { + // Yes, the requested collec tion is a reference to this. + } + } + expNode = exp->node(); + + std::cout << expNode->getStringValue() << " -- " << var->name << " \n"; + + TRI_ASSERT(attrSet.size() > 0) + attrs.push_back(attrSet[attrSet.size() - 1]); + + std::vector idxs = node->getIndexes(attrs); + + rangeInfo.push_back(new RangeInfo(var->name, expNode->getStringValue(), nullptr, nullptr)); + // make one new plan for every index in that replaces the + // enumerate collection node with a RangeIndexNode . . . + for (auto idx: idxs) { + if ((idx->_type == TRI_IDX_TYPE_SKIPLIST_INDEX) || + (idx->_type == TRI_IDX_TYPE_HASH_INDEX) ) { + //can only use the index if it is a skip list or (a hash and we + //are checking equality) + std::cout << "FOUND INDEX!\n"; + + auto newPlan = _plan->clone(); + ExecutionNode* newNode = nullptr; + try{ + newNode = new IndexRangeNode( newPlan->nextId(), node->vocbase(), + node->collection(), node->outVariable(), idx, rangeInfo); + newPlan->registerNode(newNode); + } + catch (...) { + if (newNode != nullptr) { + delete newNode; + } + delete newPlan; + throw; + } + newPlan->replaceNode(newPlan->getNodeById(node->id()), newNode); + auto JsonPlan = newPlan->toJson(TRI_UNKNOWN_MEM_ZONE, false); + auto JsonString = JsonPlan.toString(); + std::cout <<"Added foo" << JsonString << "\n"; + + _out.push_back(newPlan); + + } + } + + } + } + } +}; + + + + +int triagens::aql::useIndexForSort (Optimizer* opt, + ExecutionPlan* plan, + Optimizer::PlanList& out, + bool& keep) { + keep = true; + std::vector nodes + = plan->findNodesOfType(triagens::aql::ExecutionNode::SORT, true); + + for (auto n : nodes) { + auto oneNode = static_cast(n); + auto invars = oneNode->getVariablesUsedHere(); + ////TRI_ASSERT(invars.size() == 1);/// todo: do we care about the invars? <- yes there may be more. + sortToIndexNode finder(plan, invars, out); + ///_thisNode = oneNode; + oneNode->walk(&finder); + } + + return TRI_ERROR_NO_ERROR; + + +} + // Local Variables: // mode: outline-minor // outline-regexp: "^\\(/// @brief\\|/// {@inheritDoc}\\|/// @addtogroup\\|// --SECTION--\\|/// @\\}\\)" diff --git a/arangod/Aql/OptimizerRules.h b/arangod/Aql/OptimizerRules.h index 0aea58eafb..1b158eac92 100644 --- a/arangod/Aql/OptimizerRules.h +++ b/arangod/Aql/OptimizerRules.h @@ -80,6 +80,12 @@ namespace triagens { int useIndexRange (Optimizer*, ExecutionPlan*, Optimizer::PlanList&, bool&); +//////////////////////////////////////////////////////////////////////////////// +/// @brief try to use the index for sorting +//////////////////////////////////////////////////////////////////////////////// + + int useIndexForSort (Optimizer*, ExecutionPlan*, Optimizer::PlanList&, bool&); + } // namespace aql } // namespace triagens diff --git a/arangod/Aql/Query.cpp b/arangod/Aql/Query.cpp index f98f018e88..6b8a5396fe 100644 --- a/arangod/Aql/Query.cpp +++ b/arangod/Aql/Query.cpp @@ -202,7 +202,7 @@ QueryResult Query::execute () { parser.ast()->injectBindParameters(_bindParameters); // optimize the ast parser.ast()->optimize(); - // std::cout << "AST: " << triagens::basics::JsonHelper::toString(parser.ast()->toJson(TRI_UNKNOWN_MEM_ZONE)) << "\n"; + // std::cout << "AST: " << triagens::basics::JsonHelper::toString(parser.ast()->toJson(TRI_UNKNOWN_MEM_ZONE, false)) << "\n"; } // create the transaction object, but do not start it yet