arangodb/arangod/Aql/OptimizerRules.cpp

////////////////////////////////////////////////////////////////////////////////
/// @brief rules for the query optimizer
///
/// @file
///
/// DISCLAIMER
///
/// Copyright 2010-2014 triagens GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
///     http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is triAGENS GmbH, Cologne, Germany
///
/// @author Max Neunhoeffer
/// @author Jan Steemann
/// @author Copyright 2014, triagens GmbH, Cologne, Germany
////////////////////////////////////////////////////////////////////////////////

#include "OptimizerRules.h"
#include "Aql/AggregateNode.h"
#include "Aql/AggregationOptions.h"
#include "Aql/ClusterNodes.h"
#include "Aql/ConditionFinder.h"
#include "Aql/ExecutionEngine.h"
#include "Aql/ExecutionNode.h"
#include "Aql/Function.h"
#include "Aql/Index.h"
#include "Aql/IndexNode.h"
#include "Aql/ModificationNodes.h"
#include "Aql/SortCondition.h"
#include "Aql/SortNode.h"
#include "Aql/TraversalConditionFinder.h"
#include "Aql/Variable.h"
#include "Aql/types.h"
#include "Basics/json-utilities.h"

using namespace triagens::aql;
using Json = triagens::basics::Json;
using EN   = triagens::aql::ExecutionNode;

// -----------------------------------------------------------------------------
// --SECTION--                                           rules for the optimizer
// -----------------------------------------------------------------------------

////////////////////////////////////////////////////////////////////////////////
/// @brief adds a SORT operation for IN right-hand side operands
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::sortInValuesRule (Optimizer* opt,
                                      ExecutionPlan* plan,
                                      Optimizer::Rule const* rule) {
  bool modified = false;
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::FILTER, true));

  for (auto const& n : nodes) {
    // filter nodes always have one input variable
    auto varsUsedHere = n->getVariablesUsedHere();
    TRI_ASSERT(varsUsedHere.size() == 1);

    // now check who introduced our variable
    auto variable = varsUsedHere[0];
    auto setter = plan->getVarSetBy(variable->id);

    if (setter == nullptr ||
        setter->getType() != EN::CALCULATION) {
      // filter variable was not introduced by a calculation.
      continue;
    }

    // filter variable was introduced a CalculationNode. now check the expression
    auto s = static_cast<CalculationNode*>(setter);
    auto filterExpression = s->expression();
    auto inNode = filterExpression->nodeForModification();

    TRI_ASSERT(inNode != nullptr);

    // check the filter condition
    if ((inNode->type != NODE_TYPE_OPERATOR_BINARY_IN && inNode->type != NODE_TYPE_OPERATOR_BINARY_NIN) ||
        inNode->canThrow() ||
        ! inNode->isDeterministic()) {
      // we better not tamper with this filter
      continue;
    }

    auto rhs = inNode->getMember(1);

    if (rhs->type != NODE_TYPE_REFERENCE) {
      continue;
    }

    auto loop = n->getLoop();

    if (loop == nullptr) {
      // FILTER is not used inside a loop. so it will be used at most once
      // not need to sort the IN values then
      continue;
    }

    variable = static_cast<Variable const*>(rhs->getData());
    setter = plan->getVarSetBy(variable->id);

    if (setter == nullptr ||
        (setter->getType() != EN::CALCULATION && setter->getType() != EN::SUBQUERY)) {
      // variable itself was not introduced by a calculation.
      continue;
    }

    if (loop == setter->getLoop()) {
      // the FILTER and its value calculation are contained in the same loop
      // this means the FILTER will be executed as many times as its value
      // calculation. sorting the IN values will not provide a benefit here
      continue;
    }

    static size_t const Threshold = 8;
    auto ast = plan->getAst();
    AstNode const* originalArg = nullptr;

    if (setter->getType() == EN::CALCULATION) {
      AstNode const* originalNode = static_cast<CalculationNode*>(setter)->expression()->node();
      TRI_ASSERT(originalNode != nullptr);

      AstNode const* testNode = originalNode;

      if (originalNode->type == NODE_TYPE_FCALL &&
          static_cast<Function const*>(originalNode->getData())->externalName == "NOOPT") {
        // bypass NOOPT(...)
        TRI_ASSERT(originalNode->numMembers() == 1);
        auto args = originalNode->getMember(0);

        if (args->numMembers() > 0) {
          testNode = args->getMember(0);
        }
      }

      if (testNode->type == NODE_TYPE_VALUE ||
          testNode->type == NODE_TYPE_OBJECT) {
        // not really usable...
        continue;
      }

      if (testNode->type == NODE_TYPE_ARRAY &&
          testNode->numMembers() < Threshold) {
        // number of values is below threshold
        continue;
      }

      if (testNode->isSorted()) {
        // already sorted
        continue;
      }

      originalArg = originalNode;
    }
    else {
      TRI_ASSERT(setter->getType() == EN::SUBQUERY);
      auto sub = static_cast<SubqueryNode*>(setter);

      // estimate items in subquery
      size_t nrItems = 0;
      sub->getSubquery()->getCost(nrItems);

      if (nrItems < Threshold) {
        continue;
      }

      originalArg = ast->createNodeReference(sub->outVariable());
    }

    TRI_ASSERT(originalArg != nullptr);

    auto args = ast->createNodeArray();
    args->addMember(originalArg);
    auto sorted = ast->createNodeFunctionCall("SORTED_UNIQUE", args);

    auto outVar = ast->variables()->createTemporaryVariable();
    ExecutionNode* calculationNode = nullptr;
    auto expression = new Expression(ast, sorted);
    try {
      calculationNode = new CalculationNode(plan, plan->nextId(), expression, outVar);
    }
    catch (...) {
      delete expression;
      throw;
    }
    plan->registerNode(calculationNode);

    // make the new node a parent of the original calculation node
    calculationNode->addDependency(setter);
    auto const& oldParents = setter->getParents();
    TRI_ASSERT(! oldParents.empty());
    calculationNode->addParent(oldParents[0]);

    oldParents[0]->removeDependencies();
    oldParents[0]->addDependency(calculationNode);
    setter->setParent(calculationNode);

    if (setter->getType() == EN::CALCULATION) {
      // mark the original node as being removable, even if it can throw
      // this is special as the optimizer will normally not remove any nodes
      // if they throw - even when fully unused otherwise
      static_cast<CalculationNode*>(setter)->canRemoveIfThrows(true);
    }

    // finally adjust the variable inside the IN calculation
    inNode->changeMember(1, ast->createNodeReference(outVar));
    // set sortedness bit for the IN operator
    inNode->setBoolValue(true);

    modified = true;
  }

  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief remove redundant sorts
/// this rule modifies the plan in place:
/// - sorts that are covered by earlier sorts will be removed
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::removeRedundantSortsRule (Optimizer* opt,
                                              ExecutionPlan* plan,
                                              Optimizer::Rule const* rule) {
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::SORT, true));

  if (nodes.empty()) {
    // quick exit
    opt->addPlan(plan, rule, false);
    return;
  }

  std::unordered_set<ExecutionNode*> toUnlink;

  triagens::basics::StringBuffer buffer(TRI_UNKNOWN_MEM_ZONE);

  for (auto const& n : nodes) {
    if (toUnlink.find(n) != toUnlink.end()) {
      // encountered a sort node that we already deleted
      continue;
    }

    auto const sortNode = static_cast<SortNode*>(n);

    auto sortInfo = sortNode->getSortInformation(plan, &buffer);

    if (sortInfo.isValid && ! sortInfo.criteria.empty()) {
      // we found a sort that we can understand
      std::vector<ExecutionNode*> stack;

      sortNode->addDependencies(stack);

      int nodesRelyingOnSort = 0;

      while (! stack.empty()) {
        auto current = stack.back();
        stack.pop_back();

        if (current->getType() == EN::SORT) {
          // we found another sort. now check if they are compatible!

          auto other = static_cast<SortNode*>(current)->getSortInformation(plan, &buffer);

          switch (sortInfo.isCoveredBy(other)) {
            case SortInformation::unequal: {
              // different sort criteria
              if (nodesRelyingOnSort == 0) {
                // a sort directly followed by another sort: now remove one of them

                if (other.canThrow || ! other.isDeterministic) {
                  // if the sort can throw or is non-deterministic, we must not remove it
                  break;
                }

                if (sortNode->isStable()) {
                  // we should not optimize predecessors of a stable sort (used in a COLLECT node)
                  // the stable sort is for a reason, and removing any predecessors sorts might
                  // change the result
                  break;
                }

                // remove sort that is a direct predecessor of a sort
                toUnlink.emplace(current);
              }
              break;
            }

            case SortInformation::otherLessAccurate: {
              toUnlink.emplace(current);
              break;
            }

            case SortInformation::ourselvesLessAccurate: {
              // the sort at the start of the pipeline makes the sort at the end
              // superfluous, so we'll remove it
              toUnlink.emplace(n);
              break;
            }

            case SortInformation::allEqual: {
              // the sort at the end of the pipeline makes the sort at the start
              // superfluous, so we'll remove it
              toUnlink.emplace(current);
              break;
            }
          }
        }
        else if (current->getType() == EN::FILTER) {
          // ok: a filter does not depend on sort order
        }
        else if (current->getType() == EN::CALCULATION) {
          // ok: a filter does not depend on sort order only if it does not throw
          if (current->canThrow()) {
            ++nodesRelyingOnSort;
          }
        }
        else if (current->getType() == EN::ENUMERATE_LIST ||
                 current->getType() == EN::ENUMERATE_COLLECTION ||
                 current->getType() == EN::TRAVERSAL) {
          // ok, but we cannot remove two different sorts if one of these node types is between them
          // example: in the following query, the one sort will be optimized away:
          //   FOR i IN [ { a: 1 }, { a: 2 } , { a: 3 } ] SORT i.a ASC SORT i.a DESC RETURN i
          // but in the following query, the sorts will stay:
          //   FOR i IN [ { a: 1 }, { a: 2 } , { a: 3 } ] SORT i.a ASC LET a = i.a SORT i.a DESC RETURN i
          ++nodesRelyingOnSort;
        }
        else {
          // abort at all other type of nodes. we cannot remove a sort beyond them
          // this includes COLLECT and LIMIT
          break;
        }

        if (! current->hasDependency()) {
          // node either has no or more than one dependency. we don't know what to do and must abort
          // note: this will also handle Singleton nodes
          break;
        }

        current->addDependencies(stack);
      }

      if (toUnlink.find(n) == toUnlink.end() &&
          sortNode->simplify(plan)) {
        // sort node had only constant expressions. it will make no difference if we execute it or not
        // so we can remove it
        toUnlink.emplace(n);
      }
    }
  }

  if (! toUnlink.empty()) {
    plan->unlinkNodes(toUnlink);
  }

  opt->addPlan(plan, rule, ! toUnlink.empty());
}

////////////////////////////////////////////////////////////////////////////////
/// @brief remove all unnecessary filters
/// this rule modifies the plan in place:
/// - filters that are always true are removed completely
/// - filters that are always false will be replaced by a NoResults node
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::removeUnnecessaryFiltersRule (Optimizer* opt,
                                                  ExecutionPlan* plan,
                                                  Optimizer::Rule const* rule) {
  bool modified = false;
  std::unordered_set<ExecutionNode*> toUnlink;
  // should we enter subqueries??
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::FILTER, true));

  for (auto const& n : nodes) {
    // filter nodes always have one input variable
    auto varsUsedHere = n->getVariablesUsedHere();
    TRI_ASSERT(varsUsedHere.size() == 1);

    // now check who introduced our variable
    auto variable = varsUsedHere[0];
    auto setter = plan->getVarSetBy(variable->id);

    if (setter == nullptr ||
        setter->getType() != EN::CALCULATION) {
      // filter variable was not introduced by a calculation.
      continue;
    }

    // filter variable was introduced a CalculationNode. now check the expression
    auto s = static_cast<CalculationNode*>(setter);
    auto root = s->expression()->node();

    TRI_ASSERT(root != nullptr);

    if (root->canThrow() || ! root->isDeterministic()) {
      // we better not tamper with this filter
      continue;
    }

    // filter expression is constant and thus cannot throw
    // we can now evaluate it safely
    TRI_ASSERT(! s->expression()->canThrow());

    if (root->isTrue()) {
      // filter is always true
      // remove filter node and merge with following node
      toUnlink.emplace(n);
      modified = true;
    }
    else if (root->isFalse()) {
      // filter is always false
      // now insert a NoResults node below it
      auto noResults = new NoResultsNode(plan, plan->nextId());
      plan->registerNode(noResults);
      plan->replaceNode(n, noResults);
      modified = true;
    }
  }

  if (! toUnlink.empty()) {
    plan->unlinkNodes(toUnlink);
  }

  opt->addPlan(plan, rule, modified);
}

#if 0
struct CollectVariableFinder {
  Variable const* searchVariable;
  std::unordered_set<std::string>& attributeNames;
  std::vector<AstNode const*> stack;
  bool canUseOptimization;
  bool isArgumentToLength;

  CollectVariableFinder (AggregateNode const* collectNode,
                         std::unordered_set<std::string>& attributeNames)
    : searchVariable(collectNode->outVariable()),
      attributeNames(attributeNames),
      stack(),
      canUseOptimization(true),
      isArgumentToLength(false) {

    TRI_ASSERT(searchVariable != nullptr);
    stack.reserve(4);
  }

  void analyze (AstNode const* node) {
    TRI_ASSERT(node != nullptr);

    if (! canUseOptimization) {
      // we already know we cannot apply this optimization
      return;
    }

    stack.push_back(node);

    size_t const n = node->numMembers();
    for (size_t i = 0; i < n; ++i) {
      auto sub = node->getMember(i);
      if (sub != nullptr) {
        // recurse into subnodes
        analyze(sub);
      }
    }

    if (node->type == NODE_TYPE_REFERENCE) {
      auto variable = static_cast<Variable const*>(node->getData());

      TRI_ASSERT(variable != nullptr);

      if (variable->id == searchVariable->id) {
        bool handled = false;
        auto const size = stack.size();

        if (size >= 3 &&
            stack[size - 3]->type == NODE_TYPE_EXPANSION) {
          // our variable is used in an expansion, e.g. g[*].attribute
          auto expandNode = stack[size - 3];
          TRI_ASSERT(expandNode->numMembers() == 2);
          TRI_ASSERT(expandNode->getMember(0)->type == NODE_TYPE_ITERATOR);

          auto expansion = expandNode->getMember(1);
          TRI_ASSERT(expansion != nullptr);
          while (expansion->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
            // note which attribute is used with our variable
            if (expansion->getMember(0)->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
              expansion = expansion->getMember(0);
            }
            else {
              attributeNames.emplace(expansion->getStringValue());
              handled = true;
              break;
            }
          }
        }
        else if (size >= 3 &&
                 stack[size - 2]->type == NODE_TYPE_ARRAY &&
                 stack[size - 3]->type == NODE_TYPE_FCALL) {
          auto func = static_cast<Function const*>(stack[size - 3]->getData());

          if (func->externalName == "LENGTH" &&
              stack[size - 2]->numMembers() == 1) {
            // call to function LENGTH() with our variable as its single argument
            handled = true;
            isArgumentToLength = true;
          }
        }

        if (! handled) {
          canUseOptimization = false;
        }
      }
    }

    stack.pop_back();
  }

};
#endif

////////////////////////////////////////////////////////////////////////////////
/// @brief specialize the variables used in a COLLECT INTO
////////////////////////////////////////////////////////////////////////////////

#if 0
void triagens::aql::specializeCollectVariables (Optimizer* opt,
                                                ExecutionPlan* plan,
                                                Optimizer::Rule const* rule) {
  bool modified = false;
  std::vector<ExecutionNode*> nodes = plan->findNodesOfType(EN::AGGREGATE, true);

  for (auto n : nodes) {
    auto collectNode = static_cast<AggregateNode*>(n);
    TRI_ASSERT(collectNode != nullptr);

    auto deps = collectNode->getDependencies();
    if (deps.size() != 1) {
      continue;
    }

    if (! collectNode->hasOutVariable() ||
        collectNode->hasExpressionVariable() ||
        collectNode->count()) {
      // COLLECT without INTO or a COLLECT that already uses an
      // expression variable or a COLLECT that only counts
      continue;
    }

    auto outVariable = collectNode->outVariable();
    // must have an outVariable if we got here
    TRI_ASSERT(outVariable != nullptr);

    std::unordered_set<std::string> attributeNames;
    CollectVariableFinder finder(collectNode, attributeNames);

    // check all following nodes for usage of the out variable
    std::vector<ExecutionNode*> parents(n->getParents());

    while (! parents.empty() &&
           finder.canUseOptimization) {
      auto current = parents.back();
      parents.pop_back();

      for (auto it : current->getParents()) {
        parents.emplace_back(it);
      }

      // now check current node for usage of out variable
      auto const&& variablesUsed = current->getVariablesUsedHere();

      bool found = false;
      for (auto it : variablesUsed) {
        if (it == outVariable) {
          found = true;
          break;
        }
      }

      if (found) {
        // variable is used. now find out how it is used
        if (current->getType() != EN::CALCULATION) {
          // variable is used outside of a calculation... skip optimization
          // TODO
          break;
        }

        auto calculationNode = static_cast<CalculationNode*>(current);
        auto expression = calculationNode->expression();
        TRI_ASSERT(expression != nullptr);

        finder.analyze(expression->node());
      }
    }

    if (finder.canUseOptimization) {
      // can use the optimization

      if (! finder.attributeNames.empty()) {
        auto obj = plan->getAst()->createNodeObject();

        for (auto const& attributeName : finder.attributeNames) {
          for (auto it : collectNode->getVariablesUsedHere()) {
            if (it->name == attributeName) {
              auto refNode = plan->getAst()->createNodeReference(it);
              auto element = plan->getAst()->createNodeObjectElement(it->name.c_str(), refNode);
              obj->addMember(element);
            }
          }
        }

        if (obj->numMembers() == attributeNames.size()) {
          collectNode->removeDependency(deps[0]);
          auto calculationNode = plan->createTemporaryCalculation(obj);
          calculationNode->addDependency(deps[0]);
          collectNode->addDependency(calculationNode);

          collectNode->setExpressionVariable(calculationNode->outVariable());
          modified = true;
        }
      }
    }
  }

  opt->addPlan(plan, rule, modified);
}
#endif

////////////////////////////////////////////////////////////////////////////////
/// @brief remove INTO of a COLLECT if not used
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::removeCollectIntoRule (Optimizer* opt,
                                           ExecutionPlan* plan,
                                           Optimizer::Rule const* rule) {
  bool modified = false;
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::AGGREGATE, true));

  for (auto const& n : nodes) {
    auto collectNode = static_cast<AggregateNode*>(n);
    TRI_ASSERT(collectNode != nullptr);

    auto outVariable = collectNode->outVariable();

    if (outVariable == nullptr) {
      // no out variable. nothing to do
      continue;
    }

    auto varsUsedLater = n->getVarsUsedLater();
    if (varsUsedLater.find(outVariable) != varsUsedLater.end()) {
      // outVariable is used later
      continue;
    }

    // outVariable is not used later. remove it!
    collectNode->clearOutVariable();
    modified = true;
  }

  opt->addPlan(plan, rule, modified);
}

// -----------------------------------------------------------------------------
// --SECTION--                  helper class for propagateConstantAttributesRule
// -----------------------------------------------------------------------------

class PropagateConstantAttributesHelper {

  public:

    PropagateConstantAttributesHelper ()
      : _constants(),
        _modified(false) {
    }

    bool modified () const {
      return _modified;
    }

////////////////////////////////////////////////////////////////////////////////
/// @brief inspects a plan and propages constant values in expressions
////////////////////////////////////////////////////////////////////////////////

    void propagateConstants (ExecutionPlan* plan) {
      std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::FILTER, true));

      for (auto const& node : nodes) {
        auto fn = static_cast<FilterNode*>(node);

        auto inVar = fn->getVariablesUsedHere();
        TRI_ASSERT(inVar.size() == 1);

        auto setter = plan->getVarSetBy(inVar[0]->id);
        if (setter != nullptr &&
            setter->getType() == EN::CALCULATION) {
          auto cn = static_cast<CalculationNode*>(setter);
          auto expression = cn->expression();

          if (expression != nullptr) {
            collectConstantAttributes(const_cast<AstNode*>(expression->node()));
          }
        }
      }

      if (! _constants.empty()) {
        for (auto const& node : nodes) {
          auto fn = static_cast<FilterNode*>(node);

          auto inVar = fn->getVariablesUsedHere();
          TRI_ASSERT(inVar.size() == 1);

          auto setter = plan->getVarSetBy(inVar[0]->id);
          if (setter != nullptr &&
              setter->getType() == EN::CALCULATION) {
            auto cn = static_cast<CalculationNode*>(setter);
            auto expression = cn->expression();

            if (expression != nullptr) {
              insertConstantAttributes(const_cast<AstNode*>(expression->node()));
            }
          }
        }
      }
    }

  private:

    AstNode const* getConstant (Variable const* variable,
                                std::string const& attribute) const {
      auto it = _constants.find(variable);

      if (it == _constants.end()) {
        return nullptr;
      }

      auto it2 = (*it).second.find(attribute);

      if (it2 == (*it).second.end()) {
        return nullptr;
      }

      return (*it2).second;
    }

////////////////////////////////////////////////////////////////////////////////
/// @brief inspects an expression (recursively) and notes constant attribute
/// values so they can be propagated later
////////////////////////////////////////////////////////////////////////////////

    void collectConstantAttributes (AstNode* node) {
      if (node == nullptr) {
        return;
      }

      if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) {
        auto lhs = node->getMember(0);
        auto rhs = node->getMember(1);

        collectConstantAttributes(lhs);
        collectConstantAttributes(rhs);
      }
      else if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
        auto lhs = node->getMember(0);
        auto rhs = node->getMember(1);

        if (lhs->isConstant() && rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
          inspectConstantAttribute(rhs, lhs);
        }
        else if (rhs->isConstant() && lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
          inspectConstantAttribute(lhs, rhs);
        }
      }
    }

////////////////////////////////////////////////////////////////////////////////
/// @brief traverses an AST part recursively and patches it by inserting
/// constant values
////////////////////////////////////////////////////////////////////////////////

    void insertConstantAttributes (AstNode* node) {
      if (node == nullptr) {
        return;
      }

      if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) {
        auto lhs = node->getMember(0);
        auto rhs = node->getMember(1);

        insertConstantAttributes(lhs);
        insertConstantAttributes(rhs);
      }
      else if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
        auto lhs = node->getMember(0);
        auto rhs = node->getMember(1);

        if (! lhs->isConstant() && rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
          insertConstantAttribute(node, 1);
        }
        if (! rhs->isConstant() && lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
          insertConstantAttribute(node, 0);
        }
      }
    }

////////////////////////////////////////////////////////////////////////////////
/// @brief extract an attribute and its variable from an attribute access
/// (e.g. `a.b.c` will return variable `a` and attribute name `b.c.`.
////////////////////////////////////////////////////////////////////////////////

    bool getAttribute (AstNode const* attribute,
                       Variable const*& variable,
                       std::string& name) {
      TRI_ASSERT(attribute != nullptr &&
                 attribute->type == NODE_TYPE_ATTRIBUTE_ACCESS);
      TRI_ASSERT(name.empty());

      while (attribute->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
        name = std::string(".") + std::string(attribute->getStringValue(), attribute->getStringLength()) + name;
        attribute = attribute->getMember(0);
      }

      if (attribute->type != NODE_TYPE_REFERENCE) {
        return false;
      }

      variable = static_cast<Variable const*>(attribute->getData());
      TRI_ASSERT(variable != nullptr);

      return true;
    }

////////////////////////////////////////////////////////////////////////////////
/// @brief inspect the constant value assigned to an attribute
/// the attribute value will be stored so it can be inserted for the attribute
/// later
////////////////////////////////////////////////////////////////////////////////

    void inspectConstantAttribute (AstNode const* attribute,
                                   AstNode const* value) {
      Variable const* variable = nullptr;
      std::string name;

      if (! getAttribute(attribute, variable, name)) {
        return;
      }

      auto it = _constants.find(variable);

      if (it == _constants.end()) {
        _constants.emplace(variable, std::unordered_map<std::string, AstNode const*>{ { name, value } });
        return;
      }

      auto it2 = (*it).second.find(name);

      if (it2 == (*it).second.end()) {
        // first value for the attribute
        (*it).second.emplace(name, value);
      }
      else {
        auto previous = (*it2).second;

        if (previous == nullptr) {
          // we have multiple different values for the attribute. better not use this attribute
          return;
        }

        if (TRI_CompareValuesJson(value->computeJson(), previous->computeJson(), true) != 0) {
          // different value found for an already tracked attribute. better not use this attribute
          (*it2).second = nullptr;
        }
      }
    }

////////////////////////////////////////////////////////////////////////////////
/// @brief patches an AstNode by inserting a constant value into it
////////////////////////////////////////////////////////////////////////////////

    void insertConstantAttribute (AstNode* parentNode,
                                  size_t accessIndex) {
      Variable const* variable = nullptr;
      std::string name;

      if (! getAttribute(parentNode->getMember(accessIndex), variable, name)) {
        return;
      }

      auto constantValue = getConstant(variable, name);

      if (constantValue != nullptr) {
        parentNode->changeMember(accessIndex, const_cast<AstNode*>(constantValue));
        _modified = true;
      }
    }

    std::unordered_map<Variable const*, std::unordered_map<std::string, AstNode const*>> _constants;

    bool _modified;
};

////////////////////////////////////////////////////////////////////////////////
/// @brief propagate constant attributes in FILTERs
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::propagateConstantAttributesRule (Optimizer* opt,
                                                     ExecutionPlan* plan,
                                                     Optimizer::Rule const* rule) {
  PropagateConstantAttributesHelper helper;
  helper.propagateConstants(plan);

  opt->addPlan(plan, rule, helper.modified());
}

////////////////////////////////////////////////////////////////////////////////
/// @brief remove SORT RAND() if appropriate
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::removeSortRandRule (Optimizer* opt,
                                        ExecutionPlan* plan,
                                        Optimizer::Rule const* rule) {
  bool modified = false;
  // should we enter subqueries??
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::SORT, true));

  for (auto const& n : nodes) {
    auto node = static_cast<SortNode*>(n);
    auto const& elements = node->getElements();
    if (elements.size() != 1) {
      // we're looking for "SORT RAND()", which has just one sort criterion
      continue;
    }

    auto const variable = elements[0].first;
    TRI_ASSERT(variable != nullptr);

    auto setter = plan->getVarSetBy(variable->id);

    if (setter == nullptr ||
        setter->getType() != EN::CALCULATION) {
      continue;
    }

    auto cn = static_cast<CalculationNode*>(setter);
    auto const expression = cn->expression();

    if (expression == nullptr ||
        expression->node() == nullptr ||
        expression->node()->type != NODE_TYPE_FCALL) {
      // not the right type of node
      continue;
    }

    auto funcNode = expression->node();
    auto func = static_cast<Function const*>(funcNode->getData());

    // we're looking for "RAND()", which is a function call
    // with an empty parameters array
    if (func->externalName != "RAND" ||
        funcNode->numMembers() != 1 ||
        funcNode->getMember(0)->numMembers() != 0) {
      continue;
    }

    // now we're sure we got SORT RAND() !

    // we found what we were looking for!
    // now check if the dependencies qualify
    if (! n->hasDependency()) {
      break;
    }

    auto current = n->getFirstDependency();
    ExecutionNode* collectionNode = nullptr;

    while (current != nullptr) {
      if (current->canThrow()) {
        // we shouldn't bypass a node that can throw
        collectionNode = nullptr;
        break;
      }

      switch (current->getType()) {
        case EN::SORT:
        case EN::AGGREGATE:
        case EN::FILTER:
        case EN::SUBQUERY:
        case EN::ENUMERATE_LIST:
        case EN::TRAVERSAL:
        case EN::INDEX: {
          // if we found another SortNode, an AggregateNode, FilterNode, a SubqueryNode,
          // an EnumerateListNode, a TraversalNode or an IndexNode
          // this means we cannot apply our optimization
          collectionNode = nullptr;
          current = nullptr;
          continue; // this will exit the while loop
        }

        case EN::ENUMERATE_COLLECTION: {
          if (collectionNode == nullptr) {
            // note this node
            collectionNode = current;
            break;
          }
          else {
            // we already found another collection node before. this means we
            // should not apply our optimization
            collectionNode = nullptr;
            current = nullptr;
            continue; // this will exit the while loop
          }
          // cannot get here
          TRI_ASSERT(false);
        }

        default: {
          // ignore all other nodes
        }
      }

      if (! current->hasDependency()) {
        break;
      }

      current = current->getFirstDependency();
    }

    if (collectionNode != nullptr) {
      // we found a node to modify!
      TRI_ASSERT(collectionNode->getType() == EN::ENUMERATE_COLLECTION);
      // set the random iteration flag for the EnumerateCollectionNode
      static_cast<EnumerateCollectionNode*>(collectionNode)->setRandom();

      // remove the SortNode
      // note: the CalculationNode will be removed by "remove-unnecessary-calculations"
      // rule if not used

      plan->unlinkNode(n);
      modified = true;
    }
  }

  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief move calculations up in the plan
/// this rule modifies the plan in place
/// it aims to move up calculations as far up in the plan as possible, to
/// avoid redundant calculations in inner loops
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::moveCalculationsUpRule (Optimizer* opt,
                                            ExecutionPlan* plan,
                                            Optimizer::Rule const* rule) {
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::CALCULATION, true));
  bool modified = false;

  for (auto const& n : nodes) {
    auto nn = static_cast<CalculationNode*>(n);

    if (nn->expression()->canThrow() ||
        ! nn->expression()->isDeterministic()) {
      // we will only move expressions up that cannot throw and that are deterministic
      continue;
    }

    std::unordered_set<Variable const*> neededVars;
    n->getVariablesUsedHere(neededVars);

    std::vector<ExecutionNode*> stack;

    n->addDependencies(stack);

    while (! stack.empty()) {
      auto current = stack.back();
      stack.pop_back();

      bool found = false;

      for (auto const& v : current->getVariablesSetHere()) {
        if (neededVars.find(v) != neededVars.end()) {
          // shared variable, cannot move up any more
          found = true;
          break;
        }
      }

      if (found) {
        // done with optimizing this calculation node
        break;
      }


      if (! current->hasDependency()) {
        // node either has no or more than one dependency. we don't know what to do and must abort
        // note: this will also handle Singleton nodes
        break;
      }

      current->addDependencies(stack);

      // first, unlink the calculation from the plan
      plan->unlinkNode(n);
      // and re-insert into before the current node
      plan->insertDependency(current, n);
      modified = true;
    }

  }

  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief move calculations down in the plan
/// this rule modifies the plan in place
/// it aims to move calculations as far down in the plan as possible, beyond
/// FILTER and LIMIT operations
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::moveCalculationsDownRule (Optimizer* opt,
                                              ExecutionPlan* plan,
                                              Optimizer::Rule const* rule) {
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::CALCULATION, true));
  bool modified = false;

  for (auto const& n : nodes) {
    auto nn = static_cast<CalculationNode*>(n);
    if (nn->expression()->canThrow() ||
        ! nn->expression()->isDeterministic()) {
      // we will only move expressions down that cannot throw and that are deterministic
      continue;
    }

    // this is the variable that the calculation will set
    auto variable = nn->outVariable();

    std::vector<ExecutionNode*> stack;
    n->addParents(stack);

    bool shouldMove = false;
    ExecutionNode* lastNode = nullptr;

    while (! stack.empty()) {
      auto current = stack.back();
      stack.pop_back();

      lastNode = current;
      bool done = false;

      for (auto const& v : current->getVariablesUsedHere()) {
        if (v == variable) {
          // the node we're looking at needs the variable we're setting.
          // can't push further!
          done = true;
          break;
        }
      }

      if (done) {
        // done with optimizing this calculation node
        break;
      }

      auto const currentType = current->getType();

      if (currentType == EN::FILTER ||
          currentType == EN::SORT ||
          currentType == EN::LIMIT ||
          currentType == EN::SUBQUERY) {
        // we found something interesting that justifies moving our node down
        shouldMove = true;
      }
      else if (currentType == EN::INDEX ||
               currentType == EN::ENUMERATE_COLLECTION ||
               currentType == EN::ENUMERATE_LIST ||
               currentType == EN::TRAVERSAL ||
               currentType == EN::AGGREGATE ||
               currentType == EN::NORESULTS) {
        // we will not push further down than such nodes
        shouldMove = false;
        break;
      }

      if (! current->hasParent()) {
        break;
      }

      current->addParents(stack);
    }

    if (shouldMove && lastNode != nullptr) {
      // first, unlink the calculation from the plan
      plan->unlinkNode(n);

      // and re-insert into before the current node
      plan->insertDependency(lastNode, n);
      modified = true;
    }

  }

  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief fuse calculations in the plan
/// this rule modifies the plan in place
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::fuseCalculationsRule (Optimizer* opt,
                                          ExecutionPlan* plan,
                                          Optimizer::Rule const* rule) {
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::CALCULATION, true));

  if (nodes.size() < 2) {
    opt->addPlan(plan, rule, false);
    return;
  }

  std::unordered_set<ExecutionNode*> toUnlink;

  for (auto const& n : nodes) {
    auto nn = static_cast<CalculationNode*>(n);
    if (nn->expression()->canThrow() ||
        ! nn->expression()->isDeterministic()) {
      // we will only fuse calculations of expressions that cannot throw and that are deterministic
      continue;
    }

    if (toUnlink.find(n) != toUnlink.end()) {
      // do not process the same node twice
      continue;
    }

    std::unordered_map<Variable const*, ExecutionNode*> toInsert;
    for (auto& it : nn->getVariablesUsedHere()) {
      if (! n->isVarUsedLater(it)) {
        toInsert.emplace(it, n);
      }
    }

    TRI_ASSERT(n->hasDependency());
    std::vector<ExecutionNode*> stack{ n->getFirstDependency() };

    while (! stack.empty()) {
      auto current = stack.back();
      stack.pop_back();

      bool handled = false;

      if (current->getType() == EN::CALCULATION) {
        auto otherExpression = static_cast<CalculationNode const*>(current)->expression();

        if (otherExpression->isDeterministic() &&
            ! otherExpression->canThrow() &&
            otherExpression->canRunOnDBServer() == nn->expression()->canRunOnDBServer()) {
          // found another calculation node
          auto varsSet(std::move(current->getVariablesSetHere()));
          if (varsSet.size() == 1) {
            // check if it is a calculation for a variable that we are looking for
            auto it = toInsert.find(varsSet[0]);

            if (it != toInsert.end()) {
              // remove the variable from the list of search variables
              toInsert.erase(it);

              // replace the variable reference in the original expression with the expression for that variable
              auto expression = nn->expression();
              TRI_ASSERT(expression != nullptr);
              expression->replaceVariableReference((*it).first, otherExpression->node());

              toUnlink.emplace(current);

              // insert the calculations' own referenced variables into the list of search variables
              for (auto& it2 : current->getVariablesUsedHere()) {
                if (! n->isVarUsedLater(it2)) {
                  toInsert.emplace(it2, n);
                }
              }

              handled = true;
            }
          }
        }
      }

      if (! handled) {
        // remove all variables from our list that might be used elsewhere
        for (auto& it : current->getVariablesUsedHere()) {
          toInsert.erase(it);
        }
      }

      if (toInsert.empty()) {
        // done
        break;
      }

      if (! current->hasDependency()) {
        break;
      }

      stack.emplace_back(current->getFirstDependency());
    }
  }

  if (! toUnlink.empty()) {
    plan->unlinkNodes(toUnlink);
  }

  opt->addPlan(plan, rule, ! toUnlink.empty());
}

////////////////////////////////////////////////////////////////////////////////
/// @brief determine the "right" type of AggregateNode and
/// add a sort node for each COLLECT (note: the sort may be removed later)
/// this rule cannot be turned off (otherwise, the query result might be wrong!)
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::specializeCollectRule (Optimizer* opt,
                                           ExecutionPlan* plan,
                                           Optimizer::Rule const* rule) {
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::AGGREGATE, true));
  bool modified = false;

  for (auto const& n : nodes) {
    auto collectNode = static_cast<AggregateNode*>(n);

    if (collectNode->isSpecialized()) {
      // already specialized this node
      continue;
    }

    auto const& aggregateVariables = collectNode->aggregateVariables();

    // test if we can use an alternative version of COLLECT with a hash table
    bool const canUseHashAggregation = (! aggregateVariables.empty() &&
                                        (! collectNode->hasOutVariable() || collectNode->count()) &&
                                        collectNode->getOptions().canUseHashMethod());

    if (canUseHashAggregation) {
      // create a new plan with the adjusted COLLECT node
      std::unique_ptr<ExecutionPlan> newPlan(plan->clone());

      // use the cloned COLLECT node
      auto newCollectNode = static_cast<AggregateNode*>(newPlan->getNodeById(collectNode->id()));
      TRI_ASSERT(newCollectNode != nullptr);

      // specialize the AggregateNode so it will become a HashAggregateBlock later
      // additionally, add a SortNode BEHIND the AggregateNode (to sort the final result)
      newCollectNode->aggregationMethod(AggregationOptions::AggregationMethod::AGGREGATION_METHOD_HASH);
      newCollectNode->specialized();

      if (! collectNode->isDistinctCommand()) {
        // add the post-SORT
        std::vector<std::pair<Variable const*, bool>> sortElements;
        for (auto const& v : newCollectNode->aggregateVariables()) {
          sortElements.emplace_back(std::make_pair(v.first, true));
        }

        auto sortNode = new SortNode(newPlan.get(), newPlan->nextId(), sortElements, false);
        newPlan->registerNode(sortNode);

        TRI_ASSERT(newCollectNode->hasParent());
        auto const& parents = newCollectNode->getParents();
        auto parent = parents[0];

        sortNode->addDependency(newCollectNode);
        parent->replaceDependency(newCollectNode, sortNode);
      }
      newPlan->findVarUsage();

      if (nodes.size() > 1) {
        // this will tell the optimizer to optimize the cloned plan with this specific rule again
        opt->addPlan(newPlan.release(), rule, true, static_cast<int>(rule->level - 1));
      }
      else {
        // no need to run this specific rule again on the cloned plan
        opt->addPlan(newPlan.release(), rule, true);
      }
    }

    // mark node as specialized, so we do not process it again
    collectNode->specialized();

    // finally, adjust the original plan and create a sorted version of COLLECT

    // specialize the AggregateNode so it will become a SortedAggregateBlock later
    collectNode->aggregationMethod(AggregationOptions::AggregationMethod::AGGREGATION_METHOD_SORTED);

    // insert a SortNode IN FRONT OF the AggregateNode
    if (! aggregateVariables.empty()) {
      std::vector<std::pair<Variable const*, bool>> sortElements;
      for (auto const& v : aggregateVariables) {
        sortElements.emplace_back(std::make_pair(v.second, true));
      }

      auto sortNode = new SortNode(plan, plan->nextId(), sortElements, true);
      plan->registerNode(sortNode);

      TRI_ASSERT(collectNode->hasDependency());
      auto dep = collectNode->getFirstDependency();
      sortNode->addDependency(dep);
      collectNode->replaceDependency(dep, sortNode);

      modified = true;
    }
  }

  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief split and-combined filters and break them into smaller parts
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::splitFiltersRule (Optimizer* opt,
                                      ExecutionPlan* plan,
                                      Optimizer::Rule const* rule) {
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::FILTER, true));
  bool modified = false;

  for (auto const& n : nodes) {
    auto inVars(std::move(n->getVariablesUsedHere()));
    TRI_ASSERT(inVars.size() == 1);
    auto setter = plan->getVarSetBy(inVars[0]->id);

    if (setter == nullptr || setter->getType() != EN::CALCULATION) {
      continue;
    }

    auto cn = static_cast<CalculationNode*>(setter);
    auto const expression = cn->expression();

    if (expression->canThrow() ||
        ! expression->isDeterministic() ||
        expression->node()->type != NODE_TYPE_OPERATOR_BINARY_AND) {
      continue;
    }

    std::vector<AstNode*> stack{ expression->nodeForModification() };

    while (! stack.empty()) {
      auto current = stack.back();
      stack.pop_back();

      if (current->type == NODE_TYPE_OPERATOR_BINARY_AND) {
        stack.emplace_back(current->getMember(0));
        stack.emplace_back(current->getMember(1));
      }
      else {
        modified = true;

        ExecutionNode* calculationNode = nullptr;
        auto outVar = plan->getAst()->variables()->createTemporaryVariable();
        auto expression = new Expression(plan->getAst(), current);
        try {
          calculationNode = new CalculationNode(plan, plan->nextId(), expression, outVar);
        }
        catch (...) {
          delete expression;
          throw;
        }
        plan->registerNode(calculationNode);

        plan->insertDependency(n, calculationNode);

        auto filterNode = new FilterNode(plan, plan->nextId(), outVar);
        plan->registerNode(filterNode);

        plan->insertDependency(n, filterNode);
      }
    }

    if (modified) {
      plan->unlinkNode(n, false);
    }
  }

  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief move filters up in the plan
/// this rule modifies the plan in place
/// filters are moved as far up in the plan as possible to make result sets
/// as small as possible as early as possible
/// filters are not pushed beyond limits
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::moveFiltersUpRule (Optimizer* opt,
                                       ExecutionPlan* plan,
                                       Optimizer::Rule const* rule) {
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::FILTER, true));
  bool modified = false;

  for (auto const& n : nodes) {
    auto neededVars = n->getVariablesUsedHere();
    TRI_ASSERT(neededVars.size() == 1);

    std::vector<ExecutionNode*> stack;
    n->addDependencies(stack);

    while (! stack.empty()) {
      auto current = stack.back();
      stack.pop_back();

      if (current->getType() == EN::LIMIT) {
        // cannot push a filter beyond a LIMIT node
        break;
      }

      if (current->canThrow()) {
        // must not move a filter beyond a node that can throw
        break;
      }

      if (current->getType() == EN::CALCULATION) {
        // must not move a filter beyond a node with a non-deterministic result
        auto calculation = static_cast<CalculationNode const*>(current);
        if (! calculation->expression()->isDeterministic()) {
          break;
        }
      }

      bool found = false;

      for (auto const& v : current->getVariablesSetHere()) {
        for (auto it = neededVars.begin(); it != neededVars.end(); ++it) {
          if ((*it)->id == v->id) {
            // shared variable, cannot move up any more
            found = true;
            break;
          }
        }
      }

      if (found) {
        // done with optimizing this calculation node
        break;
      }

      if (! current->hasDependency()) {
        // node either has no or more than one dependency. we don't know what to do and must abort
        // note: this will also handle Singleton nodes
        break;
      }

      current->addDependencies(stack);

      // first, unlink the filter from the plan
      plan->unlinkNode(n);
      // and re-insert into plan in front of the current node
      plan->insertDependency(current, n);
      modified = true;
    }

  }

  opt->addPlan(plan, rule, modified);
}


class triagens::aql::RedundantCalculationsReplacer final : public WalkerWorker<ExecutionNode> {

  public:

    explicit RedundantCalculationsReplacer (std::unordered_map<VariableId, Variable const*> const& replacements)
      : _replacements(replacements) {
    }

    template<typename T>
    void replaceInVariable (ExecutionNode* en) {
      auto node = static_cast<T*>(en);

      node->_inVariable = Variable::replace(node->_inVariable, _replacements);
    }

    void replaceInCalculation (ExecutionNode* en) {
      auto node = static_cast<CalculationNode*>(en);
      std::unordered_set<Variable const*> variables;
      node->expression()->variables(variables);

      // check if the calculation uses any of the variables that we want to replace
      for (auto const& it : variables) {
        if (_replacements.find(it->id) != _replacements.end()) {
          // calculation uses a to-be-replaced variable
          node->expression()->replaceVariables(_replacements);
          return;
        }
      }
    }

    bool before (ExecutionNode* en) override final {
      switch (en->getType()) {
        case EN::ENUMERATE_LIST: {
          replaceInVariable<EnumerateListNode>(en);
          break;
        }

        case EN::RETURN: {
          replaceInVariable<ReturnNode>(en);
          break;
        }

        case EN::CALCULATION: {
          replaceInCalculation(en);
          break;
        }

        case EN::FILTER: {
          replaceInVariable<FilterNode>(en);
          break;
        }

        case EN::AGGREGATE: {
          auto node = static_cast<AggregateNode*>(en);
          for (auto& variable : node->_aggregateVariables) {
            variable.second = Variable::replace(variable.second, _replacements);
          }
          break;
        }

        case EN::SORT: {
          auto node = static_cast<SortNode*>(en);
          for (auto& variable : node->_elements) {
            variable.first = Variable::replace(variable.first, _replacements);
          }
          break;
        }

        default: {
          // ignore all other types of nodes
        }
      }

      // always continue
      return false;
    }

  private:

    std::unordered_map<VariableId, Variable const*> const& _replacements;
};

////////////////////////////////////////////////////////////////////////////////
/// @brief remove CalculationNode(s) that are repeatedly used in a query
/// (i.e. common expressions)
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::removeRedundantCalculationsRule (Optimizer* opt,
                                                     ExecutionPlan* plan,
                                                     Optimizer::Rule const* rule) {
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::CALCULATION, true));

  if (nodes.size() < 2) {
    // quick exit
    opt->addPlan(plan, rule, false);
    return;
  }

  triagens::basics::StringBuffer buffer(TRI_UNKNOWN_MEM_ZONE);
  std::unordered_map<VariableId, Variable const*> replacements;


  for (auto const& n : nodes) {
    auto nn = static_cast<CalculationNode*>(n);

    if (! nn->expression()->isDeterministic()) {
      // If this node is non-deterministic, we must not touch it!
      continue;
    }

    auto outvar = n->getVariablesSetHere();
    TRI_ASSERT(outvar.size() == 1);

    try {
      nn->expression()->stringifyIfNotTooLong(&buffer);
    }
    catch (...) {
      // expression could not be stringified (maybe because not all node types
      // are supported). this is not an error, we just skip the optimization
      buffer.reset();
      continue;
    }

    std::string const referenceExpression(buffer.c_str(), buffer.length());
    buffer.reset();

    std::vector<ExecutionNode*> stack;
    n->addDependencies(stack);

    while (! stack.empty()) {
      auto current = stack.back();
      stack.pop_back();

      if (current->getType() == EN::CALCULATION) {
        try {
          static_cast<CalculationNode*>(current)->expression()->stringifyIfNotTooLong(&buffer);
        }
        catch (...) {
          // expression could not be stringified (maybe because not all node types
          // are supported). this is not an error, we just skip the optimization
          buffer.reset();
          continue;
        }

        std::string const compareExpression(buffer.c_str(), buffer.length());
        buffer.reset();

        if (compareExpression == referenceExpression) {
          // expressions are identical
          auto outvars = current->getVariablesSetHere();
          TRI_ASSERT(outvars.size() == 1);

          // check if target variable is already registered as a replacement
          // this covers the following case:
          // - replacements is set to B => C
          // - we're now inserting a replacement A => B
          // the goal now is to enter a replacement A => C instead of A => B
          auto target = outvars[0];
          while (target != nullptr) {
            auto it = replacements.find(target->id);

            if (it != replacements.end()) {
              target = (*it).second;
            }
            else {
              break;
            }
          }
          replacements.emplace(outvar[0]->id, target);

          // also check if the insertion enables further shortcuts
          // this covers the following case:
          // - replacements is set to A => B
          // - we have just inserted a replacement B => C
          // the goal now is to change the replacement A => B to A => C
          for (auto it = replacements.begin(); it != replacements.end(); ++it) {
            if ((*it).second == outvar[0]) {
              (*it).second = target;
            }
          }
        }
      }

      if (current->getType() == EN::AGGREGATE) {
        if (static_cast<AggregateNode*>(current)->hasOutVariable()) {
          // COLLECT ... INTO is evil (tm): it needs to keep all already defined variables
          // we need to abort optimization here
          break;
        }
      }

      if (! current->hasDependency()) {
        // node either has no or more than one dependency. we don't know what to do and must abort
        // note: this will also handle Singleton nodes
        break;
      }

      current->addDependencies(stack);
    }
  }

  if (! replacements.empty()) {
    // finally replace the variables
    RedundantCalculationsReplacer finder(replacements);
    plan->root()->walk(&finder);

    opt->addPlan(plan, rule, true);
  }
  else {
    // no changes
    opt->addPlan(plan, rule, false);
  }
}

////////////////////////////////////////////////////////////////////////////////
/// @brief remove CalculationNodes and SubqueryNodes that are never needed
/// this modifies an existing plan in place
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::removeUnnecessaryCalculationsRule (Optimizer* opt,
                                                       ExecutionPlan* plan,
                                                       Optimizer::Rule const* rule) {
  std::vector<ExecutionNode::NodeType> const types = {
    EN::CALCULATION,
    EN::SUBQUERY
  };

  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(types, true));
  std::unordered_set<ExecutionNode*> toUnlink;

  for (auto const& n : nodes) {
    if (n->getType() == EN::CALCULATION) {
      auto nn = static_cast<CalculationNode*>(n);

      if (nn->canThrow() && ! nn->canRemoveIfThrows()) {
        // If this node can throw, we must not optimize it away!
        continue;
      }
    }
    else {
      auto nn = static_cast<SubqueryNode*>(n);

      if (nn->canThrow()) {
        // subqueries that can throw must not be optimized away
        continue;
      }

      if (nn->isModificationQuery()) {
        // subqueries that modify data must not be optimized away
        continue;
      }
    }

    auto outvar = n->getVariablesSetHere();
    TRI_ASSERT(outvar.size() == 1);
    auto varsUsedLater = n->getVarsUsedLater();

    if (varsUsedLater.find(outvar[0]) == varsUsedLater.end()) {
      // The variable whose value is calculated here is not used at
      // all further down the pipeline! We remove the whole
      // calculation node,
      toUnlink.emplace(n);
    }
  }

  if (! toUnlink.empty()) {
    plan->unlinkNodes(toUnlink);
  }

  opt->addPlan(plan, rule, ! toUnlink.empty());
}

////////////////////////////////////////////////////////////////////////////////
/// @brief useIndex, try to use an index for filtering
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::useIndexesRule (Optimizer* opt,
                                    ExecutionPlan* plan,
                                    Optimizer::Rule const* rule) {

  // These are all the nodes where we start traversing (including all subqueries)
  std::vector<ExecutionNode*> nodes(plan->findEndNodes(true));

  std::unordered_map<size_t, ExecutionNode*> changes;

  auto cleanupChanges = [&changes] () -> void {
    for (auto& v : changes) {
      delete v.second;
    }
    changes.clear();
  };

  TRI_DEFER(cleanupChanges());
  bool hasEmptyResult = false;
  for (auto const& n : nodes) {
    ConditionFinder finder(plan, &changes, &hasEmptyResult);
    n->walk(&finder);
  }

  if (! changes.empty()) {
    for (auto& it : changes) {
      plan->registerNode(it.second);
      plan->replaceNode(plan->getNodeById(it.first), it.second);

      // prevent double deletion by cleanupChanges()
      it.second = nullptr;
    }
    opt->addPlan(plan, rule, true);
  }
  else {
    opt->addPlan(plan, rule, hasEmptyResult);
  }
}

struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
  ExecutionPlan*                                 _plan;
  SortNode*                                      _sortNode;
  std::vector<std::pair<VariableId, bool>>       _sorts;
  std::unordered_map<VariableId, AstNode const*> _variableDefinitions;
  bool                                           _modified;

  public:

    explicit SortToIndexNode (ExecutionPlan* plan)
      : _plan(plan),
        _sortNode(nullptr),
        _sorts(),
        _variableDefinitions(),
        _modified(false) {
    }

    bool handleEnumerateCollectionNode (EnumerateCollectionNode* enumerateCollectionNode) {
      if (_sortNode == nullptr) {
        return true;
      }

      if (enumerateCollectionNode->isInInnerLoop()) {
        // index node contained in an outer loop. must not optimize away the sort!
        return true;
      }

      SortCondition sortCondition(_sorts, _variableDefinitions);

      if (! sortCondition.isEmpty() &&
          sortCondition.isOnlyAttributeAccess() &&
          sortCondition.isUnidirectional()) {
          // we have found a sort condition, which is unidirectionl
          // now check if any of the collection's indexes covers it

        Variable const* outVariable = enumerateCollectionNode->outVariable();
        auto const& indexes = enumerateCollectionNode->collection()->getIndexes();
        triagens::aql::Index const* bestIndex = nullptr;
        double bestCost                       = 0.0;
        size_t bestNumCovered                 = 0;

        for (auto& index : indexes) {
          if (! index->isSorted() || index->sparse) {
            // can only use a sorted index
            // cannot use a sparse index for sorting
            continue;
          }

          auto numCovered = sortCondition.coveredAttributes(outVariable, index->fields);

          if (numCovered == 0) {
            continue;
          }

          double estimatedCost = 0.0;
          if (! index->supportsSortCondition(&sortCondition, outVariable, enumerateCollectionNode->collection()->count(), estimatedCost)) {
            // should never happen
            TRI_ASSERT(false);
            continue;
          }

          if (bestIndex == nullptr || estimatedCost < bestCost) {
            bestIndex      = index;
            bestCost       = estimatedCost;
            bestNumCovered = numCovered;
          }
        }

        if (bestIndex != nullptr) {
          auto condition = std::make_unique<Condition>(_plan->getAst());
          condition->normalize(_plan);

          std::unique_ptr<ExecutionNode> newNode(new IndexNode(
            _plan,
            _plan->nextId(),
            enumerateCollectionNode->vocbase(),
            enumerateCollectionNode->collection(),
            outVariable,
            std::vector<Index const*>({ bestIndex }),
            condition.get(),
            sortCondition.isDescending()
          ));

          condition.release();

          auto n = newNode.release();

          _plan->registerNode(n);
          _plan->replaceNode(enumerateCollectionNode, n);
          _modified = true;

          if (bestNumCovered == sortCondition.numAttributes()) {
            // if the index covers the complete sort condition, we can also remove the sort node
            _plan->unlinkNode(_plan->getNodeById(_sortNode->id()));
          }
        }
      }

      return true; // always abort further searching here
    }

    bool handleIndexNode (IndexNode* indexNode) {
      if (_sortNode == nullptr) {
        return true;
      }

      if (indexNode->isInInnerLoop()) {
        // index node contained in an outer loop. must not optimize away the sort!
        return true;
      }

      auto const& indexes = indexNode->getIndexes();
      auto cond = indexNode->condition();

      if (indexes.size() != 1) {
        // can only use this index node if it uses exactly one index or multiple indexes on exactly the same attributes

        if (! cond->isSorted()) {
          // index conditions do not guarantee sortedness
          return true;
        }

        std::vector<std::vector<triagens::basics::AttributeName>> seen;

        for (auto& index : indexes) {
          if (index->sparse) {
            // cannot use a sparse index for sorting
            return true;
          }

          if (! seen.empty() && triagens::basics::AttributeName::isIdentical(index->fields, seen, true)) {
            // different attributes
            return true;
          }
        }

        // all indexes use the same attributes and index conditions guarantee sorted output
      }

      // if we get here, we either have one index or multiple indexes on the same attributes
      auto index = indexes[0];
      bool handled = false;

      SortCondition sortCondition(_sorts, _variableDefinitions);

      bool const isOnlyAttributeAccess = (! sortCondition.isEmpty() && sortCondition.isOnlyAttributeAccess());

      if (isOnlyAttributeAccess &&
          index->isSorted() &&
          ! index->sparse &&
          sortCondition.isUnidirectional() &&
          sortCondition.isDescending() == indexNode->reverse()) {
        // we have found a sort condition, which is unidirectional and in the same
        // order as the IndexNode...
        // now check if the sort attributes match the ones of the index
        Variable const* outVariable = indexNode->outVariable();
        auto numCovered = sortCondition.coveredAttributes(outVariable, index->fields);

        if (numCovered == sortCondition.numAttributes()) {
          // sort condition is fully covered by index... now we can remove the sort node from the plan
          _plan->unlinkNode(_plan->getNodeById(_sortNode->id()));
          _modified = true;
          handled = true;
        }
      }

      if (! handled &&
          isOnlyAttributeAccess &&
          indexes.size() == 1) {
        // special case... the index cannot be used for sorting, but we only compare with equality
        // lookups. now check if the equality lookup attributes are the same as the index attributes
        auto root = cond->root();

        if (root != nullptr) {
          auto condNode = root->getMember(0);

          if (condNode->isOnlyEqualityMatch()) {
            // now check if the index fields are the same as the sort condition fields
            // e.g. FILTER c.value1 == 1 && c.value2 == 42 SORT c.value1, c.value2
            Variable const* outVariable = indexNode->outVariable();
            size_t coveredFields = sortCondition.coveredAttributes(outVariable, index->fields);

            if (coveredFields == sortCondition.numAttributes() &&
                (index->isSorted() || index->fields.size() == sortCondition.numAttributes())) {
              // no need to sort
              _plan->unlinkNode(_plan->getNodeById(_sortNode->id()));
              _modified = true;
            }
          }
        }
      }

      return true; // always abort after we found an IndexNode
    }

    bool enterSubquery (ExecutionNode*, ExecutionNode*) override final {
      return false;
    }

    bool before (ExecutionNode* en) override final {
      switch (en->getType()) {
        case EN::TRAVERSAL:
        case EN::ENUMERATE_LIST:
        case EN::SUBQUERY:
        case EN::FILTER:
          return false;                           // skip. we don't care.

        case EN::CALCULATION: {
          auto outvars = en->getVariablesSetHere();
          TRI_ASSERT(outvars.size() == 1);

          _variableDefinitions.emplace(outvars[0]->id, static_cast<CalculationNode const*>(en)->expression()->node());
          return false;
        }

        case EN::SINGLETON:
        case EN::AGGREGATE:
        case EN::INSERT:
        case EN::REMOVE:
        case EN::REPLACE:
        case EN::UPDATE:
        case EN::UPSERT:
        case EN::RETURN:
        case EN::NORESULTS:
        case EN::SCATTER:
        case EN::DISTRIBUTE:
        case EN::GATHER:
        case EN::REMOTE:
        case EN::ILLEGAL:
        case EN::LIMIT:                      // LIMIT is criterion to stop
          return true;  // abort.

        case EN::SORT:     // pulling two sorts together is done elsewhere.
          if (! _sorts.empty() || _sortNode != nullptr) {
            return true; // a different SORT node. abort
          }
          _sortNode = static_cast<SortNode*>(en);
          for (auto& it : _sortNode->getElements()) {
            _sorts.emplace_back((it.first)->id, it.second);
          }
          return false;

        case EN::INDEX:
          return handleIndexNode(static_cast<IndexNode*>(en));

        case EN::ENUMERATE_COLLECTION:
          return handleEnumerateCollectionNode(static_cast<EnumerateCollectionNode*>(en));
      }
      return true;
    }
};

void triagens::aql::useIndexForSortRule (Optimizer* opt,
                                         ExecutionPlan* plan,
                                         Optimizer::Rule const* rule) {

  bool modified = false;
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::SORT, true));

  for (auto const& n : nodes) {
    auto sortNode = static_cast<SortNode*>(n);

    SortToIndexNode finder(plan);
    sortNode->walk(&finder);

    if (finder._modified) {
      modified = true;
    }
  }

  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief try to remove filters which are covered by indexes
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::removeFiltersCoveredByIndexRule (Optimizer* opt,
                                                     ExecutionPlan* plan,
                                                     Optimizer::Rule const* rule) {
  std::unordered_set<ExecutionNode*> toUnlink;
  bool modified = false;
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::FILTER, true));

  for (auto const& node : nodes) {
    auto fn = static_cast<FilterNode const*>(node);
    // find the node with the filter expression
    auto inVar = fn->getVariablesUsedHere();
    TRI_ASSERT(inVar.size() == 1);

    auto setter = plan->getVarSetBy(inVar[0]->id);

    if (setter == nullptr || setter->getType() != EN::CALCULATION) {
      continue;
    }

    auto calculationNode = static_cast<CalculationNode*>(setter);
    auto conditionNode = calculationNode->expression()->node();

    // build the filter condition
    auto condition = std::make_unique<Condition>(plan->getAst());
    condition->andCombine(conditionNode);
    condition->normalize(plan);

    if (condition->root() == nullptr) {
      continue;
    }

    size_t const n = condition->root()->numMembers();

    if (n != 1) {
      // either no condition or multiple ORed conditions...
      continue;
    }

    bool handled = false;
    auto current = node;
    while (current != nullptr) {
      if (current->getType() == EN::INDEX) {
        auto indexNode = static_cast<IndexNode const*>(current);

        // found an index node, now check if the expression is covered by the index
        auto indexCondition = indexNode->condition();

        if (indexCondition != nullptr && ! indexCondition->isEmpty()) {
          auto const& indexesUsed = indexNode->getIndexes();

          if (indexesUsed.size() == 1) {
            // single index. this is something that we can handle

            auto newNode = condition->removeIndexCondition(indexNode->outVariable(), indexCondition->root());

            if (newNode == nullptr) {
              // no condition left...
              // FILTER node can be completely removed
              toUnlink.emplace(setter);
              toUnlink.emplace(node);
              modified = true;
              handled = true;
            }
            else if (newNode != condition->root()) {
              // some condition is left, but it is a different one than
              // the one from the FILTER node
              auto expr = std::make_unique<Expression>(plan->getAst(), newNode);
              CalculationNode* cn = new CalculationNode(plan, plan->nextId(), expr.get(), calculationNode->outVariable());
              expr.release();
              plan->registerNode(cn);
              plan->replaceNode(setter, cn);
              modified = true;
              handled = true;
            }
          }
        }

        if (handled) {
          break;
        }
      }

      if (handled ||
          current->getType() == EN::LIMIT ||
          ! current->hasDependency()) {
        break;
      }

      current = current->getFirstDependency();
    }
  }

  if (! toUnlink.empty()) {
    plan->unlinkNodes(toUnlink);
  }

  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief helper to compute lots of permutation tuples
/// a permutation tuple is represented as a single vector together with
/// another vector describing the boundaries of the tuples.
/// Example:
/// data:   0,1,2, 3,4, 5,6
/// starts: 0,     3,   5,      (indices of starts of sections)
/// means a tuple of 3 permutations of 3, 2 and 2 points respectively
/// This function computes the next permutation tuple among the
/// lexicographically sorted list of all such tuples. It returns true
/// if it has successfully computed this and false if the tuple is already
/// the lexicographically largest one. If false is returned, the permutation
/// tuple is back to the beginning.
////////////////////////////////////////////////////////////////////////////////

static bool NextPermutationTuple (std::vector<size_t>& data,
                                  std::vector<size_t>& starts) {
  auto begin = data.begin();  // a random access iterator

  for (size_t i = starts.size(); i-- != 0; ) {
    std::vector<size_t>::iterator from = begin + starts[i];
    std::vector<size_t>::iterator to;
    if (i == starts.size() - 1) {
      to = data.end();
    }
    else {
      to = begin + starts[i + 1];
    }
    if (std::next_permutation(from, to)) {
      return true;
    }
  }

  return false;
}

////////////////////////////////////////////////////////////////////////////////
/// @brief interchange adjacent EnumerateCollectionNodes in all possible ways
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::interchangeAdjacentEnumerationsRule (Optimizer* opt,
                                                         ExecutionPlan* plan,
                                                         Optimizer::Rule const* rule) {
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::ENUMERATE_COLLECTION, true));

  std::unordered_set<ExecutionNode*> nodesSet;
  for (auto const& n : nodes) {
    TRI_ASSERT(nodesSet.find(n) == nodesSet.end());
    nodesSet.emplace(n);
  }

  std::vector<ExecutionNode*> nodesToPermute;
  std::vector<size_t> permTuple;
  std::vector<size_t> starts;

  // We use that the order of the nodes is such that a node B that is among the
  // recursive dependencies of a node A is later in the vector.
  for (auto const& n : nodes) {
    if (nodesSet.find(n) != nodesSet.end()) {
      std::vector<ExecutionNode*> nn{ n };
      nodesSet.erase(n);

      // Now follow the dependencies as long as we see further such nodes:
      auto nwalker = n;

      while (true) {
        if (! nwalker->hasDependency()) {
          break;
        }

        auto dep = nwalker->getFirstDependency();

        if (dep->getType() != EN::ENUMERATE_COLLECTION) {
          break;
        }

        nwalker = dep;
        nn.emplace_back(nwalker);
        nodesSet.erase(nwalker);
      }

      if (nn.size() > 1) {
        // Move it into the permutation tuple:
        starts.emplace_back(permTuple.size());

        for (auto const& nnn : nn) {
          nodesToPermute.emplace_back(nnn);
          permTuple.emplace_back(permTuple.size());
        }
      }
    }
  }

  // Now we have collected all the runs of EnumerateCollectionNodes in the
  // plan, we need to compute all possible permutations of all of them,
  // independently. This is why we need to compute all permutation tuples.

  opt->addPlan(plan, rule, false);

  if (! starts.empty()) {
    NextPermutationTuple(permTuple, starts);  // will never return false

    do {
      // Clone the plan:
      auto newPlan = plan->clone();

      try {   // get rid of plan if any of this fails
        // Find the nodes in the new plan corresponding to the ones in the
        // old plan that we want to permute:
        std::vector<ExecutionNode*> newNodes;
        for (size_t j = 0; j < nodesToPermute.size(); j++) {
          newNodes.emplace_back(newPlan->getNodeById(nodesToPermute[j]->id()));
        }

        // Now get going with the permutations:
        for (size_t i = 0; i < starts.size(); i++) {
          size_t lowBound = starts[i];
          size_t highBound = (i < starts.size()-1)
                           ? starts[i+1]
                           : permTuple.size();
          // We need to remove the nodes
          // newNodes[lowBound..highBound-1] in newPlan and replace
          // them by the same ones in a different order, given by
          // permTuple[lowBound..highBound-1].
          auto const& parents = newNodes[lowBound]->getParents();

          TRI_ASSERT(parents.size() == 1);
          auto parent = parents[0];  // needed for insertion later

          // Unlink all those nodes:
          for (size_t j = lowBound; j < highBound; j++) {
            newPlan->unlinkNode(newNodes[j]);
          }

          // And insert them in the new order:
          for (size_t j = highBound; j-- != lowBound; ) {
            newPlan->insertDependency(parent, newNodes[permTuple[j]]);
          }
        }

        // OK, the new plan is ready, let's report it:
        if (! opt->addPlan(newPlan, rule, true)) {
          // have enough plans. stop permutations
          break;
        }
      }
      catch (...) {
        delete newPlan;
        throw;
      }

    }
    while (NextPermutationTuple(permTuple, starts));
  }
}

////////////////////////////////////////////////////////////////////////////////
/// @brief scatter operations in cluster
/// this rule inserts scatter, gather and remote nodes so operations on sharded
/// collections actually work
/// it will change plans in place
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::scatterInClusterRule (Optimizer* opt,
                                          ExecutionPlan* plan,
                                          Optimizer::Rule const* rule) {
  bool wasModified = false;

  if (triagens::arango::ServerState::instance()->isCoordinator()) {
    // find subqueries
    std::unordered_map<ExecutionNode*, ExecutionNode*> subqueries;

    for (auto& it : plan->findNodesOfType(ExecutionNode::SUBQUERY, true)) {
      subqueries.emplace(static_cast<SubqueryNode const*>(it)->getSubquery(), it);
    }

    // we are a coordinator. now look in the plan for nodes of type
    // EnumerateCollectionNode, IndexNode and modification nodes
    std::vector<ExecutionNode::NodeType> const types = {
      ExecutionNode::ENUMERATE_COLLECTION,
      ExecutionNode::INDEX,
      ExecutionNode::INSERT,
      ExecutionNode::UPDATE,
      ExecutionNode::REPLACE,
      ExecutionNode::REMOVE,
      ExecutionNode::UPSERT // TODO: check if ok here
    };

    std::vector<ExecutionNode*> nodes(plan->findNodesOfType(types, true));

    for (auto& node: nodes) {
      // found a node we need to replace in the plan

      auto const& parents = node->getParents();
      auto const& deps = node->getDependencies();
      TRI_ASSERT(deps.size() == 1);

      // don't do this if we are already distributing!
      if (deps[0]->getType() == ExecutionNode::REMOTE &&
          deps[0]->getFirstDependency()->getType() == ExecutionNode::DISTRIBUTE) {
        continue;
      }

      bool const isRootNode = plan->isRoot(node);
      plan->unlinkNode(node, true);

      auto const nodeType = node->getType();

      // extract database and collection from plan node
      TRI_vocbase_t* vocbase = nullptr;
      Collection const* collection = nullptr;

      if (nodeType == ExecutionNode::ENUMERATE_COLLECTION) {
        vocbase = static_cast<EnumerateCollectionNode*>(node)->vocbase();
        collection = static_cast<EnumerateCollectionNode*>(node)->collection();
      }
      else if (nodeType == ExecutionNode::INDEX) {
        vocbase = static_cast<IndexNode*>(node)->vocbase();
        collection = static_cast<IndexNode*>(node)->collection();
      }
      else if (nodeType == ExecutionNode::INSERT ||
               nodeType == ExecutionNode::UPDATE ||
               nodeType == ExecutionNode::REPLACE ||
               nodeType == ExecutionNode::REMOVE ||
               nodeType == ExecutionNode::UPSERT) {
        vocbase = static_cast<ModificationNode*>(node)->vocbase();
        collection = static_cast<ModificationNode*>(node)->collection();
        if (nodeType == ExecutionNode::REMOVE ||
            nodeType == ExecutionNode::UPDATE) {
          // Note that in the REPLACE or UPSERT case we are not getting here, since
          // the distributeInClusterRule fires and a DistributionNode is
          // used.
          auto* modNode = static_cast<ModificationNode*>(node);
          modNode->getOptions().ignoreDocumentNotFound = true;
        }
      }
      else {
        TRI_ASSERT(false);
      }

      // insert a scatter node
      ExecutionNode* scatterNode = new ScatterNode(plan, plan->nextId(),
          vocbase, collection);
      plan->registerNode(scatterNode);
      scatterNode->addDependency(deps[0]);

      // insert a remote node
      ExecutionNode* remoteNode = new RemoteNode(plan, plan->nextId(), vocbase,
          collection, "", "", "");
      plan->registerNode(remoteNode);
      remoteNode->addDependency(scatterNode);

      // re-link with the remote node
      node->addDependency(remoteNode);

      // insert another remote node
      remoteNode = new RemoteNode(plan, plan->nextId(), vocbase, collection, "", "", "");
      plan->registerNode(remoteNode);
      remoteNode->addDependency(node);

      // insert a gather node
      ExecutionNode* gatherNode = new GatherNode(plan, plan->nextId(), vocbase,
          collection);
      plan->registerNode(gatherNode);
      gatherNode->addDependency(remoteNode);

      // and now link the gather node with the rest of the plan
      if (parents.size() == 1) {
        parents[0]->replaceDependency(deps[0], gatherNode);
      }

      // check if the node that we modified was at the end of a subquery
      auto it = subqueries.find(node);

      if (it != subqueries.end()) {
        static_cast<SubqueryNode*>((*it).second)->setSubquery(gatherNode, true);
      }

      if (isRootNode) {
        // if we replaced the root node, set a new root node
        plan->root(gatherNode);
      }
      wasModified = true;
    }
  }

  opt->addPlan(plan, rule, wasModified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief distribute operations in cluster
///
/// this rule inserts distribute, remote nodes so operations on sharded
/// collections actually work, this differs from scatterInCluster in that every
/// incoming row is only sent to one shard and not all as in scatterInCluster
///
/// it will change plans in place
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::distributeInClusterRule (Optimizer* opt,
                                             ExecutionPlan* plan,
                                             Optimizer::Rule const* rule) {
  bool wasModified = false;

  if (triagens::arango::ServerState::instance()->isCoordinator()) {
    // we are a coordinator, we replace the root if it is a modification node

    // only replace if it is the last node in the plan
    auto node = plan->root();
    TRI_ASSERT(node != nullptr);

    while (node != nullptr) {
      // loop until we find a modification node or the end of the plan
      auto nodeType = node->getType();

      if (nodeType == ExecutionNode::INSERT  ||
          nodeType == ExecutionNode::REMOVE  ||
          nodeType == ExecutionNode::UPDATE ||
          nodeType == ExecutionNode::REPLACE ||
          nodeType == ExecutionNode::UPSERT) {
        // found a node!
        break;
      }

      if (! node->hasDependency()) {
        // reached the end
        opt->addPlan(plan, rule, wasModified);
        return;
      }

      node = node->getFirstDependency();
    }

    TRI_ASSERT(node != nullptr);

    if (node == nullptr) {
      THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "logic error");
    }

    ExecutionNode* originalParent = nullptr;
    {
      if (node->hasParent()) {
        auto const& parents = node->getParents();
        originalParent = parents[0];
        TRI_ASSERT(originalParent != nullptr);
        TRI_ASSERT(node != plan->root());
      }
      else {
        TRI_ASSERT(node == plan->root());
      }
    }

    // when we get here, we have found a matching data-modification node!
    auto const nodeType = node->getType();

    TRI_ASSERT(nodeType == ExecutionNode::INSERT  ||
               nodeType == ExecutionNode::REMOVE  ||
               nodeType == ExecutionNode::UPDATE  ||
               nodeType == ExecutionNode::REPLACE ||
               nodeType == ExecutionNode::UPSERT);

    Collection const* collection = static_cast<ModificationNode*>(node)->collection();

    bool const defaultSharding = collection->usesDefaultSharding();

    if (nodeType == ExecutionNode::REMOVE ||
        nodeType == ExecutionNode::UPDATE) {
      if (! defaultSharding) {
        // We have to use a ScatterNode.
        opt->addPlan(plan, rule, wasModified);
        return;
      }
    }


    // In the INSERT and REPLACE cases we use a DistributeNode...

    TRI_ASSERT(node->hasDependency());
    auto const& deps = node->getDependencies();

    if (originalParent != nullptr) {
      originalParent->removeDependency(node);
      // unlink the node
      auto root = plan->root();
      plan->unlinkNode(node, true);
      plan->root(root, true); // fix root node
    }
    else {
      // unlink the node
      plan->unlinkNode(node, true);
      plan->root(deps[0], true); // fix root node
    }


    // extract database from plan node
    TRI_vocbase_t* vocbase = static_cast<ModificationNode*>(node)->vocbase();

    // insert a distribute node
    ExecutionNode* distNode = nullptr;
    Variable const* inputVariable;
    if (nodeType == ExecutionNode::INSERT ||
        nodeType == ExecutionNode::REMOVE) {
      TRI_ASSERT(node->getVariablesUsedHere().size() == 1);

      // in case of an INSERT, the DistributeNode is responsible for generating keys
      // if none present
      bool const createKeys = (nodeType == ExecutionNode::INSERT);
      inputVariable = node->getVariablesUsedHere()[0];
      distNode = new DistributeNode(plan, plan->nextId(),
          vocbase, collection, inputVariable->id, createKeys, true);
    }
    else if (nodeType == ExecutionNode::REPLACE) {
      std::vector<Variable const*> v = node->getVariablesUsedHere();
      if (defaultSharding && v.size() > 1) {
        // We only look into _inKeyVariable
        inputVariable = v[1];
      }
      else {
        // We only look into _inDocVariable
        inputVariable = v[0];
      }
      distNode = new DistributeNode(plan, plan->nextId(),
            vocbase, collection, inputVariable->id, false, v.size() > 1);
    }
    else if (nodeType == ExecutionNode::UPDATE) {
      std::vector<Variable const*> v = node->getVariablesUsedHere();
      if (v.size() > 1) {
        // If there is a key variable:
        inputVariable = v[1];
        // This is the _inKeyVariable! This works, since we use a ScatterNode
        // for non-default-sharding attributes.
      }
      else {
        // was only UPDATE <doc> IN <collection>
        inputVariable = v[0];
      }
      distNode = new DistributeNode(plan, plan->nextId(),
          vocbase, collection, inputVariable->id, false, v.size() > 1);
    }
    else if (nodeType == ExecutionNode::UPSERT) {
      // an UPSERT nodes has two input variables!
      std::vector<Variable const*> v(node->getVariablesUsedHere());
      TRI_ASSERT(v.size() >= 2);

      distNode = new DistributeNode(plan, plan->nextId(),
          vocbase, collection, v[0]->id, v[2]->id, false, true);
    }
    else {
      TRI_ASSERT(false);
      THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "logic error");
    }

    TRI_ASSERT(distNode != nullptr);

    plan->registerNode(distNode);
    distNode->addDependency(deps[0]);

    // insert a remote node
    ExecutionNode* remoteNode = new RemoteNode(plan, plan->nextId(), vocbase,
        collection, "", "", "");
    plan->registerNode(remoteNode);
    remoteNode->addDependency(distNode);

    // re-link with the remote node
    node->addDependency(remoteNode);

    // insert another remote node
    remoteNode = new RemoteNode(plan, plan->nextId(), vocbase, collection, "", "", "");
    plan->registerNode(remoteNode);
    remoteNode->addDependency(node);

    // insert a gather node
    ExecutionNode* gatherNode = new GatherNode(plan, plan->nextId(), vocbase, collection);
    plan->registerNode(gatherNode);
    gatherNode->addDependency(remoteNode);

    if (originalParent != nullptr) {
      // we did not replace the root node
      originalParent->addDependency(gatherNode);
    }
    else {
      // we replaced the root node, set a new root node
      plan->root(gatherNode, true);
    }
    wasModified = true;
  }

  opt->addPlan(plan, rule, wasModified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief move filters up into the cluster distribution part of the plan
/// this rule modifies the plan in place
/// filters are moved as far up in the plan as possible to make result sets
/// as small as possible as early as possible
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::distributeFilternCalcToClusterRule (Optimizer* opt,
                                                        ExecutionPlan* plan,
                                                        Optimizer::Rule const* rule) {
  bool modified = false;

  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::GATHER, true));

  for (auto& n : nodes) {
    auto const& remoteNodeList = n->getDependencies();
    TRI_ASSERT(remoteNodeList.size() > 0);
    auto rn = remoteNodeList[0];

    if (! n->hasParent()) {
      continue;
    }

    std::unordered_set<Variable const*> varsSetHere;
    auto parents = n->getParents();

    while (true) {
      bool stopSearching = false;
      auto inspectNode = parents[0];

      switch (inspectNode->getType()) {
        case EN::ENUMERATE_LIST:
        case EN::SINGLETON:
        case EN::INSERT:
        case EN::REMOVE:
        case EN::REPLACE:
        case EN::UPDATE:
        case EN::UPSERT: {
          for (auto& v : inspectNode->getVariablesSetHere()) {
            varsSetHere.emplace(v);
          }
          parents = inspectNode->getParents();
          continue;
        }

        case EN::AGGREGATE:
        case EN::SUBQUERY:
        case EN::RETURN:
        case EN::NORESULTS:
        case EN::SCATTER:
        case EN::DISTRIBUTE:
        case EN::GATHER:
        case EN::ILLEGAL:
        case EN::REMOTE:
        case EN::LIMIT:
        case EN::SORT:
        case EN::INDEX:
        case EN::ENUMERATE_COLLECTION:
        case EN::TRAVERSAL:
          //do break
          stopSearching = true;
          break;

        case EN::CALCULATION: {
          auto calc = static_cast<CalculationNode const*>(inspectNode);
          // check if the expression can be executed on a DB server safely
          if (! calc->expression()->canRunOnDBServer()) {
            stopSearching = true;
            break;
          }
          // intentionally fall through here
        }
        case EN::FILTER:
          for (auto& v : inspectNode->getVariablesUsedHere()) {
            if (varsSetHere.find(v) != varsSetHere.end()) {
              // do not move over the definition of variables that we need
              stopSearching = true;
              break;
            }
          }

          if (! stopSearching) {
            // remember our cursor...
            parents = inspectNode->getParents();
            // then unlink the filter/calculator from the plan
            plan->unlinkNode(inspectNode);
            // and re-insert into plan in front of the remoteNode
            plan->insertDependency(rn, inspectNode);

            modified = true;
            //ready to rumble!
          }
          break;
      }

      if (stopSearching) {
        break;
      }
    }
  }

  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief move sorts up into the cluster distribution part of the plan
/// this rule modifies the plan in place
/// sorts are moved as far up in the plan as possible to make result sets
/// as small as possible as early as possible
///
/// filters are not pushed beyond limits
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::distributeSortToClusterRule (Optimizer* opt,
                                                 ExecutionPlan* plan,
                                                 Optimizer::Rule const* rule) {
  bool modified = false;

  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::GATHER, true));

  for (auto& n : nodes) {
    auto const& remoteNodeList = n->getDependencies();
    auto gatherNode = static_cast<GatherNode*>(n);
    TRI_ASSERT(remoteNodeList.size() > 0);
    auto rn = remoteNodeList[0];

    if (! n->hasParent()) {
      continue;
    }

    auto parents = n->getParents();

    while (1) {
      bool stopSearching = false;

      auto inspectNode = parents[0];

      switch (inspectNode->getType()) {
        case EN::ENUMERATE_LIST:
        case EN::SINGLETON:
        case EN::AGGREGATE:
        case EN::INSERT:
        case EN::REMOVE:
        case EN::REPLACE:
        case EN::UPDATE:
        case EN::UPSERT:
        case EN::CALCULATION:
        case EN::FILTER:
        case EN::SUBQUERY:
        case EN::RETURN:
        case EN::NORESULTS:
        case EN::SCATTER:
        case EN::DISTRIBUTE:
        case EN::GATHER:
        case EN::ILLEGAL:
        case EN::REMOTE:
        case EN::LIMIT:
        case EN::INDEX:
        case EN::TRAVERSAL:
        case EN::ENUMERATE_COLLECTION:
          // For all these, we do not want to pull a SortNode further down
          // out to the DBservers, note that potential FilterNodes and
          // CalculationNodes that can be moved to the DBservers have
          // already been moved over by the distribute-filtercalc-to-cluster
          // rule which is done first.
          stopSearching = true;
          break;
        case EN::SORT:
          auto thisSortNode = static_cast<SortNode*>(inspectNode);

          // remember our cursor...
          parents = inspectNode->getParents();
          // then unlink the filter/calculator from the plan
          plan->unlinkNode(inspectNode);
          // and re-insert into plan in front of the remoteNode
          plan->insertDependency(rn, inspectNode);
          gatherNode->setElements(thisSortNode->getElements());
          modified = true;
          //ready to rumble!
      }

      if (stopSearching) {
        break;
      }
    }
  }

  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief try to get rid of a RemoteNode->ScatterNode combination which has
/// only a SingletonNode and possibly some CalculationNodes as dependencies
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::removeUnnecessaryRemoteScatterRule (Optimizer* opt,
                                                        ExecutionPlan* plan,
                                                        Optimizer::Rule const* rule) {
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::REMOTE, true));
  std::unordered_set<ExecutionNode*> toUnlink;

  for (auto& n : nodes) {
    // check if the remote node is preceeded by a scatter node and any number of
    // calculation and singleton nodes. if yes, remove remote and scatter
    if (! n->hasDependency()) {
      continue;
    }

    auto const dep = n->getFirstDependency();
    if (dep->getType() != EN::SCATTER) {
      continue;
    }

    bool canOptimize = true;
    auto node = dep;
    while (node != nullptr) {
      auto const& d = node->getDependencies();

      if (d.size() != 1) {
        break;
      }

      node = d[0];
      if (node->getType() != EN::SINGLETON &&
          node->getType() != EN::CALCULATION) {
        // found some other node type...
        // this disqualifies the optimization
        canOptimize = false;
        break;
      }

      if (node->getType() == EN::CALCULATION) {
        auto calc = static_cast<CalculationNode const*>(node);
        // check if the expression can be executed on a DB server safely
        if (! calc->expression()->canRunOnDBServer()) {
          canOptimize = false;
          break;
        }
      }
    }

    if (canOptimize) {
      toUnlink.emplace(n);
      toUnlink.emplace(dep);
    }
  }

  if (! toUnlink.empty()) {
    plan->unlinkNodes(toUnlink);
  }

  opt->addPlan(plan, rule, ! toUnlink.empty());
}

////////////////////////////////////////////////////////////////////////////////
/// WalkerWorker for undistributeRemoveAfterEnumColl
////////////////////////////////////////////////////////////////////////////////

class RemoveToEnumCollFinder final : public WalkerWorker<ExecutionNode> {
  ExecutionPlan* _plan;
  std::unordered_set<ExecutionNode*>& _toUnlink;
  bool _remove;
  bool _scatter;
  bool _gather;
  EnumerateCollectionNode* _enumColl;
  ExecutionNode* _setter;
  const Variable* _variable;
  ExecutionNode* _lastNode;

  public:

    RemoveToEnumCollFinder (ExecutionPlan* plan,
                            std::unordered_set<ExecutionNode*>& toUnlink)
      : _plan(plan),
        _toUnlink(toUnlink),
        _remove(false),
        _scatter(false),
        _gather(false),
        _enumColl(nullptr),
        _setter(nullptr),
        _variable(nullptr),
        _lastNode(nullptr) {
    };

    ~RemoveToEnumCollFinder () {
    }

    bool before (ExecutionNode* en) override final {
      switch (en->getType()) {
        case EN::REMOVE: {
          if (_remove) {
            break;
          }

          // find the variable we are removing . . .
          auto rn = static_cast<RemoveNode*>(en);
          auto varsToRemove = rn->getVariablesUsedHere();

          // remove nodes always have one input variable
          TRI_ASSERT(varsToRemove.size() == 1);

          _setter = _plan->getVarSetBy(varsToRemove[0]->id);
          TRI_ASSERT(_setter != nullptr);
          auto enumColl = _setter;

          if (_setter->getType() == EN::CALCULATION) {
            // this should be an attribute access for _key
            auto cn = static_cast<CalculationNode*>(_setter);
            if (! cn->expression()->isAttributeAccess()) {
              break; // abort . . .
            }
            // check the variable is the same as the remove variable
            auto vars = cn->getVariablesSetHere();
            if (vars.size() != 1 || vars[0]->id != varsToRemove[0]->id) {
              break; // abort . . .
            }
            // check the remove node's collection is sharded over _key
            std::vector<std::string> shardKeys = rn->collection()->shardKeys();
            if (shardKeys.size() != 1 || shardKeys[0] != TRI_VOC_ATTRIBUTE_KEY) {
              break; // abort . . .
            }

            // set the varsToRemove to the variable in the expression of this
            // node and also define enumColl
            varsToRemove = cn->getVariablesUsedHere();
            TRI_ASSERT(varsToRemove.size() == 1);
            enumColl = _plan->getVarSetBy(varsToRemove[0]->id);
            TRI_ASSERT(_setter != nullptr);
          }

          if (enumColl->getType() != EN::ENUMERATE_COLLECTION) {
            break; // abort . . .
          }

          _enumColl = static_cast<EnumerateCollectionNode*>(enumColl);

          if (_enumColl->collection() != rn->collection()) {
            break; // abort . . .
          }

          _variable = varsToRemove[0];    // the variable we'll remove
          _remove = true;
          _lastNode = en;
          return false; // continue . . .
        }
        case EN::REMOTE: {
          _toUnlink.emplace(en);
          _lastNode = en;
          return false; // continue . . .
        }
        case EN::DISTRIBUTE:
        case EN::SCATTER: {
          if (_scatter) { // met more than one scatter node
            break;        // abort . . .
          }
          _scatter = true;
          _toUnlink.emplace(en);
          _lastNode = en;
          return false; // continue . . .
        }
        case EN::GATHER: {
          if (_gather) { // met more than one gather node
            break;       // abort . . .
          }
          _gather = true;
          _toUnlink.emplace(en);
          _lastNode = en;
          return false; // continue . . .
        }
        case EN::FILTER: {
          _lastNode = en;
          return false; // continue . . .
        }
        case EN::CALCULATION: {
          TRI_ASSERT(_setter != nullptr);
          if (_setter->getType() == EN::CALCULATION && _setter->id() == en->id()) {
            _lastNode = en;
            return false; // continue . . .
          }
          if (_lastNode == nullptr || _lastNode->getType() != EN::FILTER) {
            // doesn't match the last filter node
            break; // abort . . .
          }
          auto cn = static_cast<CalculationNode*>(en);
          auto fn = static_cast<FilterNode*>(_lastNode);

          // check these are a Calc-Filter pair
          if (cn->getVariablesSetHere()[0]->id != fn->getVariablesUsedHere()[0]->id) {
            break; // abort . . .
          }

          // check that we are filtering/calculating something with the variable
          // we are to remove
          auto varsUsedHere = cn->getVariablesUsedHere();

          if (varsUsedHere.size() != 1) {
            break; //abort . . .
          }
          if (varsUsedHere[0]->id != _variable->id) {
            break;
          }
          _lastNode = en;
          return false; // continue . . .
        }
        case EN::ENUMERATE_COLLECTION: {
          // check that we are enumerating the variable we are to remove
          // and that we have already seen a remove node
          TRI_ASSERT(_enumColl != nullptr);
          if (en->id() != _enumColl->id()) {
            break;
          }
          return true; // reached the end!
        }
        case EN::SINGLETON:
        case EN::ENUMERATE_LIST:
        case EN::SUBQUERY:
        case EN::AGGREGATE:
        case EN::INSERT:
        case EN::REPLACE:
        case EN::UPDATE:
        case EN::UPSERT:
        case EN::RETURN:
        case EN::NORESULTS:
        case EN::ILLEGAL:
        case EN::LIMIT:
        case EN::SORT:
        case EN::TRAVERSAL:
        case EN::INDEX: {
          // if we meet any of the above, then we abort . . .
        }
    }
    _toUnlink.clear();
    return true;
  }
};

////////////////////////////////////////////////////////////////////////////////
/// @brief recognizes that a RemoveNode can be moved to the shards.
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::undistributeRemoveAfterEnumCollRule (Optimizer* opt,
                                                         ExecutionPlan* plan,
                                                         Optimizer::Rule const* rule) {
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::REMOVE, true));
  std::unordered_set<ExecutionNode*> toUnlink;

  for (auto& n : nodes) {
    RemoveToEnumCollFinder finder(plan, toUnlink);
    n->walk(&finder);
  }

  bool modified = false;
  if (! toUnlink.empty()) {
    plan->unlinkNodes(toUnlink);
    modified = true;
  }

  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief auxilliary struct for finding common nodes in OR conditions
////////////////////////////////////////////////////////////////////////////////

struct CommonNodeFinder {
  std::vector<AstNode const*> possibleNodes;

  bool find (AstNode const*  node,
             AstNodeType     condition,
             AstNode const*& commonNode,
             std::string&    commonName) {

    if (node->type == NODE_TYPE_OPERATOR_BINARY_OR) {
      return (find(node->getMember(0), condition, commonNode, commonName)
           && find(node->getMember(1), condition, commonNode, commonName));
    }

    if (node->type == NODE_TYPE_VALUE) {
      possibleNodes.clear();
      return true;
    }

    if (node->type == condition
        || (condition != NODE_TYPE_OPERATOR_BINARY_EQ
            && ( node->type == NODE_TYPE_OPERATOR_BINARY_LE
              || node->type == NODE_TYPE_OPERATOR_BINARY_LT
              || node->type == NODE_TYPE_OPERATOR_BINARY_GE
              || node->type == NODE_TYPE_OPERATOR_BINARY_GT
              || node->type == NODE_TYPE_OPERATOR_BINARY_IN))) {

      auto lhs = node->getMember(0);
      auto rhs = node->getMember(1);

      bool const isIn = (node->type == NODE_TYPE_OPERATOR_BINARY_IN && rhs->isArray());

      if (node->type == NODE_TYPE_OPERATOR_BINARY_IN &&
          rhs->type == NODE_TYPE_EXPANSION) {
        // ooh, cannot optimize this (yet)
        possibleNodes.clear();
        return false;
      }

      if (! isIn && lhs->isConstant()) {
        commonNode = rhs;
        commonName = commonNode->toString();
        possibleNodes.clear();
        return true;
      }

      if (rhs->isConstant()) {
        commonNode = lhs;
        commonName = commonNode->toString();
        possibleNodes.clear();
        return true;
      }

      if (rhs->type == NODE_TYPE_FCALL ||
          rhs->type == NODE_TYPE_FCALL_USER ||
          rhs->type == NODE_TYPE_REFERENCE) {
        commonNode = lhs;
        commonName = commonNode->toString();
        possibleNodes.clear();
        return true;
      }

      if (! isIn &&
          (lhs->type == NODE_TYPE_FCALL ||
           lhs->type == NODE_TYPE_FCALL_USER ||
           lhs->type == NODE_TYPE_REFERENCE)) {
        commonNode = rhs;
        commonName = commonNode->toString();
        possibleNodes.clear();
        return true;
      }

      if (! isIn &&
          (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
           lhs->type == NODE_TYPE_INDEXED_ACCESS)) {
        if (possibleNodes.size() == 2) {
          for (size_t i = 0; i < 2; i++) {
            if (lhs->toString() == possibleNodes[i]->toString()) {
              commonNode = possibleNodes[i];
              commonName = commonNode->toString();
              possibleNodes.clear();
              return true;
            }
          }
          // don't return, must consider the other side of the condition
        }
        else {
          possibleNodes.emplace_back(lhs);
        }
      }
      if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
          rhs->type == NODE_TYPE_INDEXED_ACCESS) {
        if (possibleNodes.size() == 2) {
          for (size_t i = 0; i < 2; i++) {
            if (rhs->toString() == possibleNodes[i]->toString()) {
              commonNode = possibleNodes[i];
              commonName = commonNode->toString();
              possibleNodes.clear();
              return true;
            }
          }
          return false;
        }
        else {
          possibleNodes.emplace_back(rhs);
          return true;
        }
      }
    }
    possibleNodes.clear();
    return (! commonName.empty());
  }
};

////////////////////////////////////////////////////////////////////////////////
/// @brief auxilliary struct for the OR-to-IN conversion
////////////////////////////////////////////////////////////////////////////////

struct OrToInConverter {
  std::vector<AstNode const*> valueNodes;
  CommonNodeFinder            finder;
  AstNode const*              commonNode = nullptr;
  std::string                 commonName;

  AstNode* buildInExpression (Ast* ast) {
    // the list of comparison values
    auto list = ast->createNodeArray();
    for (auto& x : valueNodes) {
      list->addMember(x);
    }

    // return a new IN operator node
    return ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_IN,
                                         commonNode->clone(ast),
                                         list);
  }

  bool canConvertExpression (AstNode const* node) {
    if (finder.find(node, NODE_TYPE_OPERATOR_BINARY_EQ, commonNode, commonName)) {
      return canConvertExpressionWalker(node);
    }
    else if (finder.find(node, NODE_TYPE_OPERATOR_BINARY_IN, commonNode, commonName)) {
      return canConvertExpressionWalker(node);
    }
    return false;
  }

  bool canConvertExpressionWalker (AstNode const* node) {
    if (node->type == NODE_TYPE_OPERATOR_BINARY_OR) {
      return (canConvertExpressionWalker(node->getMember(0)) &&
              canConvertExpressionWalker(node->getMember(1)));
    }

    if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
      auto lhs = node->getMember(0);
      auto rhs = node->getMember(1);

      if (canConvertExpressionWalker(rhs) && ! canConvertExpressionWalker(lhs)) {
        valueNodes.emplace_back(lhs);
        return true;
      }

      if (canConvertExpressionWalker(lhs) && ! canConvertExpressionWalker(rhs)) {
        valueNodes.emplace_back(rhs);
        return true;
      }
      // if canConvertExpressionWalker(lhs) and canConvertExpressionWalker(rhs), then one of
      // the equalities in the OR statement is of the form x == x
      // fall-through intentional
    }
    else if (node->type == NODE_TYPE_OPERATOR_BINARY_IN) {
      auto lhs = node->getMember(0);
      auto rhs = node->getMember(1);

      if (canConvertExpressionWalker(lhs) && ! canConvertExpressionWalker(rhs) && rhs->isArray()) {
        size_t const n = rhs->numMembers();

        for (size_t i = 0; i < n; ++i) {
          valueNodes.emplace_back(rhs->getMemberUnchecked(i));
        }
        return true;
      }
      // fall-through intentional
    }
    else if (node->type == NODE_TYPE_REFERENCE ||
             node->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
             node->type == NODE_TYPE_INDEXED_ACCESS) {
      // get a string representation of the node for comparisons
      return (node->toString() == commonName);
    }

    return false;
  }
};

////////////////////////////////////////////////////////////////////////////////
/// @brief this rule replaces expressions of the type:
///   x.val == 1 || x.val == 2 || x.val == 3
//  with
//    x.val IN [1,2,3]
//  when the OR conditions are present in the same FILTER node, and refer to the
//  same (single) attribute.
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::replaceOrWithInRule (Optimizer* opt,
                                         ExecutionPlan* plan,
                                         Optimizer::Rule const* rule) {
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::FILTER, true));

  bool modified = false;
  for (auto const& n : nodes) {
    TRI_ASSERT(n->hasDependency());

    auto const dep = n->getFirstDependency();

    if (dep->getType() != EN::CALCULATION) {
      continue;
    }

    auto fn = static_cast<FilterNode*>(n);
    auto inVar = fn->getVariablesUsedHere();

    auto cn = static_cast<CalculationNode*>(dep);
    auto outVar = cn->getVariablesSetHere();

    if (outVar.size() != 1 || outVar[0]->id != inVar[0]->id) {
      continue;
    }
    if (cn->expression()->node()->type != NODE_TYPE_OPERATOR_BINARY_OR) {
      continue;
    }

    OrToInConverter converter;
    if (converter.canConvertExpression(cn->expression()->node())) {
      ExecutionNode* newNode = nullptr;
      auto inNode = converter.buildInExpression(plan->getAst());

      Expression* expr = new Expression(plan->getAst(), inNode);

      try {
        TRI_IF_FAILURE("OptimizerRules::replaceOrWithInRuleOom") {
          THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
        }

        newNode = new CalculationNode(plan, plan->nextId(), expr, outVar[0]);
      }
      catch (...) {
        delete expr;
        throw;
      }

      plan->registerNode(newNode);
      plan->replaceNode(cn, newNode);
      modified = true;
    }
  }

  opt->addPlan(plan, rule, modified);
}

struct RemoveRedundantOr {
  AstNode const*    bestValue = nullptr;
  AstNodeType       comparison;
  bool              inclusive;
  bool              isComparisonSet = false;
  CommonNodeFinder  finder;
  AstNode const*    commonNode = nullptr;
  std::string       commonName;

  AstNode* createReplacementNode (Ast* ast) {
    TRI_ASSERT(commonNode != nullptr);
    TRI_ASSERT(bestValue != nullptr);
    TRI_ASSERT(isComparisonSet == true);
    return ast->createNodeBinaryOperator(comparison, commonNode->clone(ast),
        bestValue);
  }

  bool isInclusiveBound (AstNodeType type) {
    return (type == NODE_TYPE_OPERATOR_BINARY_GE || type == NODE_TYPE_OPERATOR_BINARY_LE);
  }

  int isCompatibleBound (AstNodeType type, AstNode const* value) {
    if ((comparison == NODE_TYPE_OPERATOR_BINARY_LE
          || comparison == NODE_TYPE_OPERATOR_BINARY_LT) &&
        (type == NODE_TYPE_OPERATOR_BINARY_LE
         || type == NODE_TYPE_OPERATOR_BINARY_LT)) {
      return -1; //high bound
    }
    else if ((comparison == NODE_TYPE_OPERATOR_BINARY_GE
          || comparison == NODE_TYPE_OPERATOR_BINARY_GT) &&
        (type == NODE_TYPE_OPERATOR_BINARY_GE
         || type == NODE_TYPE_OPERATOR_BINARY_GT)) {
      return 1; //low bound
    }
    return 0; //incompatible bounds
  }

  // returns false if the existing value is better and true if the input value is
  // better
  bool compareBounds (AstNodeType type, AstNode const* value, int lowhigh) {
    int cmp = CompareAstNodes(bestValue, value, true);

    if (cmp == 0 && (isInclusiveBound(comparison) != isInclusiveBound(type))) {
      return (isInclusiveBound(type) ? true : false);
    }
    return (cmp * lowhigh == 1);
  }

  bool hasRedundantCondition (AstNode const* node) {
    if (finder.find(node, NODE_TYPE_OPERATOR_BINARY_LT, commonNode, commonName)) {
      return hasRedundantConditionWalker(node);
    }
    return false;
  }

  bool hasRedundantConditionWalker (AstNode const* node) {
    AstNodeType type = node->type;

    if (type == NODE_TYPE_OPERATOR_BINARY_OR) {
      return (hasRedundantConditionWalker(node->getMember(0)) &&
              hasRedundantConditionWalker(node->getMember(1)));
    }

    if (type == NODE_TYPE_OPERATOR_BINARY_LE
     || type == NODE_TYPE_OPERATOR_BINARY_LT
     || type == NODE_TYPE_OPERATOR_BINARY_GE
     || type == NODE_TYPE_OPERATOR_BINARY_GT) {

      auto lhs = node->getMember(0);
      auto rhs = node->getMember(1);

      if (hasRedundantConditionWalker(rhs)
          && ! hasRedundantConditionWalker(lhs)
          && lhs->isConstant()) {

        if (! isComparisonSet) {
          comparison = Ast::ReverseOperator(type);
          bestValue = lhs;
          isComparisonSet = true;
          return true;
        }

        int lowhigh = isCompatibleBound(Ast::ReverseOperator(type), lhs);
        if (lowhigh == 0) {
          return false;
        }

        if (compareBounds(type, lhs, lowhigh)) {
          comparison = Ast::ReverseOperator(type);
          bestValue = lhs;
        }
        return true;
      }
      if (hasRedundantConditionWalker(lhs)
          && ! hasRedundantConditionWalker(rhs)
          && rhs->isConstant()) {
        if (! isComparisonSet) {
          comparison = type;
          bestValue = rhs;
          isComparisonSet = true;
          return true;
        }

        int lowhigh = isCompatibleBound(type, rhs);
        if (lowhigh == 0) {
          return false;
        }

        if (compareBounds(type, rhs, lowhigh)) {
            comparison = type;
            bestValue = rhs;
        }
        return true;
      }
      // if hasRedundantConditionWalker(lhs) and
      // hasRedundantConditionWalker(rhs), then one of the conditions in the OR
      // statement is of the form x == x fall-through intentional
    }
    else if (type == NODE_TYPE_REFERENCE ||
             type == NODE_TYPE_ATTRIBUTE_ACCESS ||
             type == NODE_TYPE_INDEXED_ACCESS) {
      // get a string representation of the node for comparisons
      return (node->toString() == commonName);
    }

    return false;
  }
};

void triagens::aql::removeRedundantOrRule (Optimizer* opt,
                                           ExecutionPlan* plan,
                                           Optimizer::Rule const* rule) {
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::FILTER, true));

  bool modified = false;
  for (auto const& n : nodes) {
    TRI_ASSERT(n->hasDependency());

    auto const dep = n->getFirstDependency();

    if (dep->getType() != EN::CALCULATION) {
      continue;
    }

    auto fn = static_cast<FilterNode*>(n);
    auto inVar = fn->getVariablesUsedHere();

    auto cn = static_cast<CalculationNode*>(dep);
    auto outVar = cn->getVariablesSetHere();

    if (outVar.size() != 1 || outVar[0]->id != inVar[0]->id) {
      continue;
    }
    if (cn->expression()->node()->type != NODE_TYPE_OPERATOR_BINARY_OR) {
      continue;
    }

    RemoveRedundantOr remover;
    if (remover.hasRedundantCondition(cn->expression()->node())) {
      Expression* expr = nullptr;
      ExecutionNode* newNode = nullptr;
      auto astNode = remover.createReplacementNode(plan->getAst());

      expr = new Expression(plan->getAst(), astNode);

      try {
        newNode = new CalculationNode(plan, plan->nextId(), expr, outVar[0]);
      }
      catch (...) {
        delete expr;
        throw;
      }

      plan->registerNode(newNode);
      plan->replaceNode(cn, newNode);
      modified = true;
    }
  }

  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief remove $OLD and $NEW variables from data-modification statements
/// if not required
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::removeDataModificationOutVariablesRule (Optimizer* opt,
                                                            ExecutionPlan* plan,
                                                            Optimizer::Rule const* rule) {
  bool modified = false;
  std::vector<ExecutionNode::NodeType> const types = {
    EN::REMOVE,
    EN::INSERT,
    EN::UPDATE,
    EN::REPLACE,
    EN::UPSERT
  };

  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(types, true));

  for (auto const& n : nodes) {
    auto node = static_cast<ModificationNode*>(n);
    TRI_ASSERT(node != nullptr);

    auto varsUsedLater = n->getVarsUsedLater();
    if (varsUsedLater.find(node->getOutVariableOld()) == varsUsedLater.end()) {
      // "$OLD" is not used later
      node->clearOutVariableOld();
      modified = true;
    }

    if (varsUsedLater.find(node->getOutVariableNew()) == varsUsedLater.end()) {
      // "$NEW" is not used later
      node->clearOutVariableNew();
      modified = true;
    }
  }

  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief patch UPDATE statement on single collection that iterates over the
/// entire collection to operate in batches
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::patchUpdateStatementsRule (Optimizer* opt,
                                               ExecutionPlan* plan,
                                               Optimizer::Rule const* rule) {
  bool modified = false;

  // not need to dive into subqueries here, as UPDATE needs to be on the top level
  std::vector<ExecutionNode*> nodes(plan->findNodesOfType(EN::UPDATE, false));

  for (auto const& n : nodes) {
    // we should only get through here a single time
    auto node = static_cast<ModificationNode*>(n);
    TRI_ASSERT(node != nullptr);

    auto& options = node->getOptions();
    if (! options.readCompleteInput) {
      // already ok
      continue;
    }

    auto const collection = node->collection();

    auto dep = n->getFirstDependency();

    while (dep != nullptr) {
      auto const type = dep->getType();

      if (type == EN::ENUMERATE_LIST ||
          type == EN::INDEX ||
          type == EN::SUBQUERY) {
        // not suitable
        modified = false;
        break;
      }

      if (type == EN::ENUMERATE_COLLECTION) {
        auto collectionNode = static_cast<EnumerateCollectionNode const*>(dep);

        if (collectionNode->collection() != collection) {
          // different collection, not suitable
          modified = false;
          break;
        }
        else {
          modified = true;
        }
      }

      if (type == EN::TRAVERSAL) {
        // unclear what will be read by the traversal
        modified = false;
        break;
      }

      dep = dep->getFirstDependency();
    }

    if (modified) {
      options.readCompleteInput = false;
    }
  }

  // always re-add the original plan, be it modified or not
  // only a flag in the plan will be modified
  opt->addPlan(plan, rule, modified);
}

////////////////////////////////////////////////////////////////////////////////
/// @brief merges filter nodes into graph traversal nodes
////////////////////////////////////////////////////////////////////////////////

void triagens::aql::mergeFilterIntoTraversalRule (Optimizer* opt,
                                                  ExecutionPlan* plan,
                                                  Optimizer::Rule const* rule) {

  std::vector<ExecutionNode*> tNodes(plan->findNodesOfType(EN::TRAVERSAL, true));

  if (tNodes.empty()) {
    opt->addPlan(plan, rule, false);
    return;
  }

  // These are all the end nodes where we start
  std::vector<ExecutionNode*> nodes(plan->findEndNodes(true));

  bool planAltered = false;
  for (auto const& n : nodes) {
    TraversalConditionFinder finder(plan, &planAltered);
    n->walk(&finder);
  }

  opt->addPlan(plan, rule, planAltered);
}

// Local Variables:
// mode: outline-minor
// outline-regexp: "^\\(/// @brief\\|/// {@inheritDoc}\\|/// @addtogroup\\|// --SECTION--\\|/// @\\}\\)"
// End: