mirror of https://gitee.com/bigwinds/arangodb
4845 lines
157 KiB
C++
4845 lines
157 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief rules for the query optimizer
|
|
///
|
|
/// @file arangod/Aql/OptimizerRules.cpp
|
|
///
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2010-2014 triagens GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// @author Max Neunhoeffer
|
|
/// @author Copyright 2014, triagens GmbH, Cologne, Germany
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "Aql/OptimizerRules.h"
|
|
#include "Aql/AggregationOptions.h"
|
|
#include "Aql/ExecutionEngine.h"
|
|
#include "Aql/ExecutionNode.h"
|
|
#include "Aql/Function.h"
|
|
#include "Aql/Variable.h"
|
|
#include "Aql/types.h"
|
|
|
|
using namespace triagens::aql;
|
|
using Json = triagens::basics::Json;
|
|
using EN = triagens::aql::ExecutionNode;
|
|
|
|
#if 0
|
|
#define ENTER_BLOCK try { (void) 0;
|
|
#define LEAVE_BLOCK } catch (...) { std::cout << "caught an exception in " << __FUNCTION__ << ", " << __FILE__ << ":" << __LINE__ << "!\n"; throw; }
|
|
#else
|
|
#define ENTER_BLOCK
|
|
#define LEAVE_BLOCK
|
|
#endif
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- rules for the optimizer
|
|
// -----------------------------------------------------------------------------
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief remove redundant sorts
|
|
/// this rule modifies the plan in place:
|
|
/// - sorts that are covered by earlier sorts will be removed
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::removeRedundantSortsRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::SORT, true);
|
|
|
|
if (nodes.empty()) {
|
|
// quick exit
|
|
opt->addPlan(plan, rule, false);
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
std::unordered_set<ExecutionNode*> toUnlink;
|
|
|
|
triagens::basics::StringBuffer buffer(TRI_UNKNOWN_MEM_ZONE);
|
|
|
|
for (auto const& n : nodes) {
|
|
if (toUnlink.find(n) != toUnlink.end()) {
|
|
// encountered a sort node that we already deleted
|
|
continue;
|
|
}
|
|
|
|
auto const sortNode = static_cast<SortNode*>(n);
|
|
|
|
auto sortInfo = sortNode->getSortInformation(plan, &buffer);
|
|
|
|
if (sortInfo.isValid && ! sortInfo.criteria.empty()) {
|
|
// we found a sort that we can understand
|
|
std::vector<ExecutionNode*> stack;
|
|
|
|
sortNode->addDependencies(stack);
|
|
|
|
int nodesRelyingOnSort = 0;
|
|
|
|
while (! stack.empty()) {
|
|
auto current = stack.back();
|
|
stack.pop_back();
|
|
|
|
if (current->getType() == EN::SORT) {
|
|
// we found another sort. now check if they are compatible!
|
|
|
|
auto other = static_cast<SortNode*>(current)->getSortInformation(plan, &buffer);
|
|
|
|
switch (sortInfo.isCoveredBy(other)) {
|
|
case SortInformation::unequal: {
|
|
// different sort criteria
|
|
if (nodesRelyingOnSort == 0) {
|
|
// a sort directly followed by another sort: now remove one of them
|
|
|
|
if (other.canThrow || ! other.isDeterministic) {
|
|
// if the sort can throw or is non-deterministic, we must not remove it
|
|
break;
|
|
}
|
|
|
|
if (sortNode->isStable()) {
|
|
// we should not optimize predecessors of a stable sort (used in a COLLECT node)
|
|
// the stable sort is for a reason, and removing any predecessors sorts might
|
|
// change the result
|
|
break;
|
|
}
|
|
|
|
// remove sort that is a direct predecessor of a sort
|
|
toUnlink.emplace(current);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case SortInformation::otherLessAccurate: {
|
|
toUnlink.emplace(current);
|
|
break;
|
|
}
|
|
|
|
case SortInformation::ourselvesLessAccurate: {
|
|
// the sort at the start of the pipeline makes the sort at the end
|
|
// superfluous, so we'll remove it
|
|
toUnlink.emplace(n);
|
|
break;
|
|
}
|
|
|
|
case SortInformation::allEqual: {
|
|
// the sort at the end of the pipeline makes the sort at the start
|
|
// superfluous, so we'll remove it
|
|
toUnlink.emplace(current);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else if (current->getType() == EN::FILTER) {
|
|
// ok: a filter does not depend on sort order
|
|
}
|
|
else if (current->getType() == EN::CALCULATION) {
|
|
// ok: a filter does not depend on sort order only if it does not throw
|
|
if (current->canThrow()) {
|
|
++nodesRelyingOnSort;
|
|
}
|
|
}
|
|
else if (current->getType() == EN::ENUMERATE_LIST ||
|
|
current->getType() == EN::ENUMERATE_COLLECTION) {
|
|
// ok, but we cannot remove two different sorts if one of these node types is between them
|
|
// example: in the following query, the one sort will be optimized away:
|
|
// FOR i IN [ { a: 1 }, { a: 2 } , { a: 3 } ] SORT i.a ASC SORT i.a DESC RETURN i
|
|
// but in the following query, the sorts will stay:
|
|
// FOR i IN [ { a: 1 }, { a: 2 } , { a: 3 } ] SORT i.a ASC LET a = i.a SORT i.a DESC RETURN i
|
|
++nodesRelyingOnSort;
|
|
}
|
|
else {
|
|
// abort at all other type of nodes. we cannot remove a sort beyond them
|
|
// this includes COLLECT and LIMIT
|
|
break;
|
|
}
|
|
|
|
if (! current->hasDependency()) {
|
|
// node either has no or more than one dependency. we don't know what to do and must abort
|
|
// note: this will also handle Singleton nodes
|
|
break;
|
|
}
|
|
|
|
current->addDependencies(stack);
|
|
}
|
|
|
|
if (toUnlink.find(n) == toUnlink.end() &&
|
|
sortNode->simplify(plan)) {
|
|
// sort node had only constant expressions. it will make no difference if we execute it or not
|
|
// so we can remove it
|
|
toUnlink.emplace(n);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (! toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, ! toUnlink.empty());
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief remove all unnecessary filters
|
|
/// this rule modifies the plan in place:
|
|
/// - filters that are always true are removed completely
|
|
/// - filters that are always false will be replaced by a NoResults node
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::removeUnnecessaryFiltersRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
bool modified = false;
|
|
std::unordered_set<ExecutionNode*> toUnlink;
|
|
// should we enter subqueries??
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::FILTER, true);
|
|
|
|
for (auto const& n : nodes) {
|
|
// filter nodes always have one input variable
|
|
auto varsUsedHere = n->getVariablesUsedHere();
|
|
TRI_ASSERT(varsUsedHere.size() == 1);
|
|
|
|
// now check who introduced our variable
|
|
auto variable = varsUsedHere[0];
|
|
auto setter = plan->getVarSetBy(variable->id);
|
|
|
|
if (setter == nullptr ||
|
|
setter->getType() != EN::CALCULATION) {
|
|
// filter variable was not introduced by a calculation.
|
|
continue;
|
|
}
|
|
|
|
// filter variable was introduced a CalculationNode. now check the expression
|
|
auto s = static_cast<CalculationNode*>(setter);
|
|
auto root = s->expression()->node();
|
|
|
|
TRI_ASSERT(root != nullptr);
|
|
|
|
if (root->canThrow() || ! root->isDeterministic()) {
|
|
// we better not tamper with this filter
|
|
continue;
|
|
}
|
|
|
|
// filter expression is constant and thus cannot throw
|
|
// we can now evaluate it safely
|
|
TRI_ASSERT(! s->expression()->canThrow());
|
|
|
|
if (root->isTrue()) {
|
|
// filter is always true
|
|
// remove filter node and merge with following node
|
|
toUnlink.emplace(n);
|
|
modified = true;
|
|
}
|
|
else if (root->isFalse()) {
|
|
// filter is always false
|
|
// now insert a NoResults node below it
|
|
auto noResults = new NoResultsNode(plan, plan->nextId());
|
|
plan->registerNode(noResults);
|
|
plan->replaceNode(n, noResults);
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
if (! toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
#if 0
|
|
struct CollectVariableFinder {
|
|
Variable const* searchVariable;
|
|
std::unordered_set<std::string>& attributeNames;
|
|
std::vector<AstNode const*> stack;
|
|
bool canUseOptimization;
|
|
bool isArgumentToLength;
|
|
|
|
CollectVariableFinder (AggregateNode const* collectNode,
|
|
std::unordered_set<std::string>& attributeNames)
|
|
: searchVariable(collectNode->outVariable()),
|
|
attributeNames(attributeNames),
|
|
stack(),
|
|
canUseOptimization(true),
|
|
isArgumentToLength(false) {
|
|
|
|
TRI_ASSERT(searchVariable != nullptr);
|
|
stack.reserve(4);
|
|
}
|
|
|
|
void analyze (AstNode const* node) {
|
|
TRI_ASSERT(node != nullptr);
|
|
|
|
if (! canUseOptimization) {
|
|
// we already know we cannot apply this optimization
|
|
return;
|
|
}
|
|
|
|
stack.push_back(node);
|
|
|
|
size_t const n = node->numMembers();
|
|
for (size_t i = 0; i < n; ++i) {
|
|
auto sub = node->getMember(i);
|
|
if (sub != nullptr) {
|
|
// recurse into subnodes
|
|
analyze(sub);
|
|
}
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_REFERENCE) {
|
|
auto variable = static_cast<Variable const*>(node->getData());
|
|
|
|
TRI_ASSERT(variable != nullptr);
|
|
|
|
if (variable->id == searchVariable->id) {
|
|
bool handled = false;
|
|
auto const size = stack.size();
|
|
|
|
if (size >= 3 &&
|
|
stack[size - 3]->type == NODE_TYPE_EXPANSION) {
|
|
// our variable is used in an expansion, e.g. g[*].attribute
|
|
auto expandNode = stack[size - 3];
|
|
TRI_ASSERT(expandNode->numMembers() == 2);
|
|
TRI_ASSERT(expandNode->getMember(0)->type == NODE_TYPE_ITERATOR);
|
|
|
|
auto expansion = expandNode->getMember(1);
|
|
TRI_ASSERT(expansion != nullptr);
|
|
while (expansion->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
// note which attribute is used with our variable
|
|
if (expansion->getMember(0)->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
expansion = expansion->getMember(0);
|
|
}
|
|
else {
|
|
attributeNames.emplace(expansion->getStringValue());
|
|
handled = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else if (size >= 3 &&
|
|
stack[size - 2]->type == NODE_TYPE_ARRAY &&
|
|
stack[size - 3]->type == NODE_TYPE_FCALL) {
|
|
auto func = static_cast<Function const*>(stack[size - 3]->getData());
|
|
|
|
if (func->externalName == "LENGTH" &&
|
|
stack[size - 2]->numMembers() == 1) {
|
|
// call to function LENGTH() with our variable as its single argument
|
|
handled = true;
|
|
isArgumentToLength = true;
|
|
}
|
|
}
|
|
|
|
if (! handled) {
|
|
canUseOptimization = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
stack.pop_back();
|
|
}
|
|
|
|
};
|
|
#endif
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief specialize the variables used in a COLLECT INTO
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#if 0
|
|
int triagens::aql::specializeCollectVariables (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
bool modified = false;
|
|
std::vector<ExecutionNode*> nodes = plan->findNodesOfType(EN::AGGREGATE, true);
|
|
|
|
for (auto n : nodes) {
|
|
auto collectNode = static_cast<AggregateNode*>(n);
|
|
TRI_ASSERT(collectNode != nullptr);
|
|
|
|
auto const&& deps = collectNode->getDependencies();
|
|
if (deps.size() != 1) {
|
|
continue;
|
|
}
|
|
|
|
if (! collectNode->hasOutVariable() ||
|
|
collectNode->hasExpressionVariable() ||
|
|
collectNode->count()) {
|
|
// COLLECT without INTO or a COLLECT that already uses an
|
|
// expression variable or a COLLECT that only counts
|
|
continue;
|
|
}
|
|
|
|
auto outVariable = collectNode->outVariable();
|
|
// must have an outVariable if we got here
|
|
TRI_ASSERT(outVariable != nullptr);
|
|
|
|
std::unordered_set<std::string> attributeNames;
|
|
CollectVariableFinder finder(collectNode, attributeNames);
|
|
|
|
// check all following nodes for usage of the out variable
|
|
std::vector<ExecutionNode*> parents(n->getParents());
|
|
|
|
while (! parents.empty() &&
|
|
finder.canUseOptimization) {
|
|
auto current = parents.back();
|
|
parents.pop_back();
|
|
|
|
for (auto it : current->getParents()) {
|
|
parents.emplace_back(it);
|
|
}
|
|
|
|
// now check current node for usage of out variable
|
|
auto const&& variablesUsed = current->getVariablesUsedHere();
|
|
|
|
bool found = false;
|
|
for (auto it : variablesUsed) {
|
|
if (it == outVariable) {
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (found) {
|
|
// variable is used. now find out how it is used
|
|
if (current->getType() != EN::CALCULATION) {
|
|
// variable is used outside of a calculation... skip optimization
|
|
// TODO
|
|
break;
|
|
}
|
|
|
|
auto calculationNode = static_cast<CalculationNode*>(current);
|
|
auto expression = calculationNode->expression();
|
|
TRI_ASSERT(expression != nullptr);
|
|
|
|
finder.analyze(expression->node());
|
|
}
|
|
}
|
|
|
|
if (finder.canUseOptimization) {
|
|
// can use the optimization
|
|
|
|
if (! finder.attributeNames.empty()) {
|
|
auto obj = plan->getAst()->createNodeObject();
|
|
|
|
for (auto const& attributeName : finder.attributeNames) {
|
|
for (auto it : collectNode->getVariablesUsedHere()) {
|
|
if (it->name == attributeName) {
|
|
auto refNode = plan->getAst()->createNodeReference(it);
|
|
auto element = plan->getAst()->createNodeObjectElement(it->name.c_str(), refNode);
|
|
obj->addMember(element);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (obj->numMembers() == attributeNames.size()) {
|
|
collectNode->removeDependency(deps[0]);
|
|
auto calculationNode = plan->createTemporaryCalculation(obj);
|
|
calculationNode->addDependency(deps[0]);
|
|
collectNode->addDependency(calculationNode);
|
|
|
|
collectNode->setExpressionVariable(calculationNode->outVariable());
|
|
modified = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
#endif
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief remove INTO of a COLLECT if not used
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::removeCollectIntoRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
bool modified = false;
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::AGGREGATE, true);
|
|
|
|
for (auto const& n : nodes) {
|
|
auto collectNode = static_cast<AggregateNode*>(n);
|
|
TRI_ASSERT(collectNode != nullptr);
|
|
|
|
auto outVariable = collectNode->outVariable();
|
|
|
|
if (outVariable == nullptr) {
|
|
// no out variable. nothing to do
|
|
continue;
|
|
}
|
|
|
|
auto varsUsedLater = n->getVarsUsedLater();
|
|
if (varsUsedLater.find(outVariable) != varsUsedLater.end()) {
|
|
// outVariable is used later
|
|
continue;
|
|
}
|
|
|
|
// outVariable is not used later. remove it!
|
|
collectNode->clearOutVariable();
|
|
modified = true;
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- helper class for propagateConstantAttributesRule
|
|
// -----------------------------------------------------------------------------
|
|
|
|
class PropagateConstantAttributesHelper {
|
|
|
|
public:
|
|
|
|
PropagateConstantAttributesHelper ()
|
|
: _constants(),
|
|
_modified(false) {
|
|
}
|
|
|
|
bool modified () const {
|
|
return _modified;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief inspects a plan and propages constant values in expressions
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void propagateConstants (ExecutionPlan* plan) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::FILTER, true);
|
|
|
|
for (auto const& node : nodes) {
|
|
auto fn = static_cast<FilterNode*>(node);
|
|
|
|
auto inVar = fn->getVariablesUsedHere();
|
|
TRI_ASSERT(inVar.size() == 1);
|
|
|
|
auto setter = plan->getVarSetBy(inVar[0]->id);
|
|
if (setter != nullptr &&
|
|
setter->getType() == EN::CALCULATION) {
|
|
auto cn = static_cast<CalculationNode*>(setter);
|
|
auto expression = cn->expression();
|
|
|
|
if (expression != nullptr) {
|
|
collectConstantAttributes(const_cast<AstNode*>(expression->node()));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (! _constants.empty()) {
|
|
for (auto const& node : nodes) {
|
|
auto fn = static_cast<FilterNode*>(node);
|
|
|
|
auto inVar = fn->getVariablesUsedHere();
|
|
TRI_ASSERT(inVar.size() == 1);
|
|
|
|
auto setter = plan->getVarSetBy(inVar[0]->id);
|
|
if (setter != nullptr &&
|
|
setter->getType() == EN::CALCULATION) {
|
|
auto cn = static_cast<CalculationNode*>(setter);
|
|
auto expression = cn->expression();
|
|
|
|
if (expression != nullptr) {
|
|
insertConstantAttributes(const_cast<AstNode*>(expression->node()));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private:
|
|
|
|
AstNode const* getConstant (Variable const* variable,
|
|
std::string const& attribute) const {
|
|
auto it = _constants.find(variable);
|
|
|
|
if (it == _constants.end()) {
|
|
return nullptr;
|
|
}
|
|
|
|
auto it2 = (*it).second.find(attribute);
|
|
|
|
if (it2 == (*it).second.end()) {
|
|
return nullptr;
|
|
}
|
|
|
|
return (*it2).second;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief inspects an expression (recursively) and notes constant attribute
|
|
/// values so they can be propagated later
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void collectConstantAttributes (AstNode* node) {
|
|
if (node == nullptr) {
|
|
return;
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
collectConstantAttributes(lhs);
|
|
collectConstantAttributes(rhs);
|
|
}
|
|
else if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
if (lhs->isConstant() && rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
inspectConstantAttribute(rhs, lhs);
|
|
}
|
|
else if (rhs->isConstant() && lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
inspectConstantAttribute(lhs, rhs);
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief traverses an AST part recursively and patches it by inserting
|
|
/// constant values
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void insertConstantAttributes (AstNode* node) {
|
|
if (node == nullptr) {
|
|
return;
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
insertConstantAttributes(lhs);
|
|
insertConstantAttributes(rhs);
|
|
}
|
|
else if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
if (! lhs->isConstant() && rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
insertConstantAttribute(node, 1);
|
|
}
|
|
if (! rhs->isConstant() && lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
insertConstantAttribute(node, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief extract an attribute and its variable from an attribute access
|
|
/// (e.g. `a.b.c` will return variable `a` and attribute name `b.c.`.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
bool getAttribute (AstNode const* attribute,
|
|
Variable const*& variable,
|
|
std::string& name) {
|
|
TRI_ASSERT(attribute != nullptr &&
|
|
attribute->type == NODE_TYPE_ATTRIBUTE_ACCESS);
|
|
TRI_ASSERT(name.empty());
|
|
|
|
while (attribute->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
name = std::string(".") + std::string(attribute->getStringValue(), attribute->getStringLength()) + name;
|
|
attribute = attribute->getMember(0);
|
|
}
|
|
|
|
if (attribute->type != NODE_TYPE_REFERENCE) {
|
|
return false;
|
|
}
|
|
|
|
variable = static_cast<Variable const*>(attribute->getData());
|
|
TRI_ASSERT(variable != nullptr);
|
|
|
|
return true;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief inspect the constant value assigned to an attribute
|
|
/// the attribute value will be stored so it can be inserted for the attribute
|
|
/// later
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void inspectConstantAttribute (AstNode const* attribute,
|
|
AstNode const* value) {
|
|
Variable const* variable = nullptr;
|
|
std::string name;
|
|
|
|
if (! getAttribute(attribute, variable, name)) {
|
|
return;
|
|
}
|
|
|
|
auto it = _constants.find(variable);
|
|
|
|
if (it == _constants.end()) {
|
|
_constants.emplace(std::make_pair(variable, std::unordered_map<std::string, AstNode const*>{ { name, value } }));
|
|
return;
|
|
}
|
|
|
|
auto it2 = (*it).second.find(name);
|
|
|
|
if (it2 == (*it).second.end()) {
|
|
// first value for the attribute
|
|
(*it).second.emplace(std::make_pair(name, value));
|
|
}
|
|
else {
|
|
auto previous = (*it2).second;
|
|
|
|
if (previous == nullptr) {
|
|
// we have multiple different values for the attribute. better not use this attribute
|
|
return;
|
|
}
|
|
|
|
if (TRI_CompareValuesJson(value->computeJson(), previous->computeJson(), true) != 0) {
|
|
// different value found for an already tracked attribute. better not use this attribute
|
|
(*it2).second = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief patches an AstNode by inserting a constant value into it
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void insertConstantAttribute (AstNode* parentNode,
|
|
size_t accessIndex) {
|
|
Variable const* variable = nullptr;
|
|
std::string name;
|
|
|
|
if (! getAttribute(parentNode->getMember(accessIndex), variable, name)) {
|
|
return;
|
|
}
|
|
|
|
auto constantValue = getConstant(variable, name);
|
|
|
|
if (constantValue != nullptr) {
|
|
parentNode->changeMember(accessIndex, const_cast<AstNode*>(constantValue));
|
|
_modified = true;
|
|
}
|
|
}
|
|
|
|
std::unordered_map<Variable const*, std::unordered_map<std::string, AstNode const*>> _constants;
|
|
|
|
bool _modified;
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief propagate constant attributes in FILTERs
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::propagateConstantAttributesRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
PropagateConstantAttributesHelper helper;
|
|
helper.propagateConstants(plan);
|
|
|
|
bool const modified = helper.modified();
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief remove SORT RAND() if appropriate
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::removeSortRandRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
bool modified = false;
|
|
// should we enter subqueries??
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::SORT, true);
|
|
|
|
for (auto const& n : nodes) {
|
|
auto node = static_cast<SortNode*>(n);
|
|
auto const& elements = node->getElements();
|
|
if (elements.size() != 1) {
|
|
// we're looking for "SORT RAND()", which has just one sort criterion
|
|
continue;
|
|
}
|
|
|
|
auto const variable = elements[0].first;
|
|
TRI_ASSERT(variable != nullptr);
|
|
|
|
auto setter = plan->getVarSetBy(variable->id);
|
|
|
|
if (setter == nullptr ||
|
|
setter->getType() != EN::CALCULATION) {
|
|
continue;
|
|
}
|
|
|
|
auto cn = static_cast<CalculationNode*>(setter);
|
|
auto const expression = cn->expression();
|
|
|
|
if (expression == nullptr ||
|
|
expression->node() == nullptr ||
|
|
expression->node()->type != NODE_TYPE_FCALL) {
|
|
// not the right type of node
|
|
continue;
|
|
}
|
|
|
|
auto funcNode = expression->node();
|
|
auto func = static_cast<Function const*>(funcNode->getData());
|
|
|
|
// we're looking for "RAND()", which is a function call
|
|
// with an empty parameters array
|
|
if (func->externalName != "RAND" ||
|
|
funcNode->numMembers() != 1 ||
|
|
funcNode->getMember(0)->numMembers() != 0) {
|
|
continue;
|
|
}
|
|
|
|
// now we're sure we got SORT RAND() !
|
|
|
|
// we found what we were looking for!
|
|
// now check if the dependencies qualify
|
|
if (! n->hasDependency()) {
|
|
break;
|
|
}
|
|
|
|
auto current = n->getFirstDependency();
|
|
ExecutionNode* collectionNode = nullptr;
|
|
|
|
while (current != nullptr) {
|
|
if (current->canThrow()) {
|
|
// we shouldn't bypass a node that can throw
|
|
collectionNode = nullptr;
|
|
break;
|
|
}
|
|
|
|
switch (current->getType()) {
|
|
case EN::SORT:
|
|
case EN::AGGREGATE:
|
|
case EN::FILTER:
|
|
case EN::SUBQUERY:
|
|
case EN::ENUMERATE_LIST:
|
|
case EN::INDEX_RANGE: {
|
|
// if we found another SortNode, an AggregateNode, FilterNode, a SubqueryNode,
|
|
// an EnumerateListNode or an IndexRangeNode
|
|
// this means we cannot apply our optimization
|
|
collectionNode = nullptr;
|
|
current = nullptr;
|
|
continue; // this will exit the while loop
|
|
}
|
|
|
|
case EN::ENUMERATE_COLLECTION: {
|
|
if (collectionNode == nullptr) {
|
|
// note this node
|
|
collectionNode = current;
|
|
break;
|
|
}
|
|
else {
|
|
// we already found another collection node before. this means we
|
|
// should not apply our optimization
|
|
collectionNode = nullptr;
|
|
current = nullptr;
|
|
continue; // this will exit the while loop
|
|
}
|
|
// cannot get here
|
|
TRI_ASSERT(false);
|
|
}
|
|
|
|
default: {
|
|
// ignore all other nodes
|
|
}
|
|
}
|
|
|
|
if (! current->hasDependency()) {
|
|
break;
|
|
}
|
|
|
|
current = current->getFirstDependency();
|
|
}
|
|
|
|
if (collectionNode != nullptr) {
|
|
// we found a node to modify!
|
|
TRI_ASSERT(collectionNode->getType() == EN::ENUMERATE_COLLECTION);
|
|
// set the random iteration flag for the EnumerateCollectionNode
|
|
static_cast<EnumerateCollectionNode*>(collectionNode)->setRandom();
|
|
|
|
// remove the SortNode
|
|
// note: the CalculationNode will be removed by "remove-unnecessary-calculations"
|
|
// rule if not used
|
|
|
|
plan->unlinkNode(n);
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief move calculations up in the plan
|
|
/// this rule modifies the plan in place
|
|
/// it aims to move up calculations as far up in the plan as possible, to
|
|
/// avoid redundant calculations in inner loops
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::moveCalculationsUpRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::CALCULATION, true);
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto nn = static_cast<CalculationNode*>(n);
|
|
|
|
if (nn->expression()->canThrow() ||
|
|
! nn->expression()->isDeterministic()) {
|
|
// we will only move expressions up that cannot throw and that are deterministic
|
|
continue;
|
|
}
|
|
|
|
std::unordered_set<Variable const*> neededVars;
|
|
n->getVariablesUsedHere(neededVars);
|
|
|
|
std::vector<ExecutionNode*> stack;
|
|
|
|
n->addDependencies(stack);
|
|
|
|
while (! stack.empty()) {
|
|
auto current = stack.back();
|
|
stack.pop_back();
|
|
|
|
bool found = false;
|
|
|
|
for (auto const& v : current->getVariablesSetHere()) {
|
|
if (neededVars.find(v) != neededVars.end()) {
|
|
// shared variable, cannot move up any more
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (found) {
|
|
// done with optimizing this calculation node
|
|
break;
|
|
}
|
|
|
|
|
|
if (! current->hasDependency()) {
|
|
// node either has no or more than one dependency. we don't know what to do and must abort
|
|
// note: this will also handle Singleton nodes
|
|
break;
|
|
}
|
|
|
|
current->addDependencies(stack);
|
|
|
|
// first, unlink the calculation from the plan
|
|
plan->unlinkNode(n);
|
|
// and re-insert into before the current node
|
|
plan->insertDependency(current, n);
|
|
modified = true;
|
|
}
|
|
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief move calculations down in the plan
|
|
/// this rule modifies the plan in place
|
|
/// it aims to move calculations as far down in the plan as possible, beyond
|
|
/// FILTER and LIMIT operations
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::moveCalculationsDownRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::CALCULATION, true);
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto nn = static_cast<CalculationNode*>(n);
|
|
if (nn->expression()->canThrow() ||
|
|
! nn->expression()->isDeterministic()) {
|
|
// we will only move expressions down that cannot throw and that are deterministic
|
|
continue;
|
|
}
|
|
|
|
// this is the variable that the calculation will set
|
|
auto variable = nn->outVariable();
|
|
|
|
std::vector<ExecutionNode*> stack;
|
|
n->addParents(stack);
|
|
|
|
bool shouldMove = false;
|
|
ExecutionNode* lastNode = nullptr;
|
|
|
|
while (! stack.empty()) {
|
|
auto current = stack.back();
|
|
stack.pop_back();
|
|
|
|
lastNode = current;
|
|
bool done = false;
|
|
|
|
auto&& varsUsed = current->getVariablesUsedHere();
|
|
|
|
for (auto const& v : varsUsed) {
|
|
if (v == variable) {
|
|
// the node we're looking at needs the variable we're setting.
|
|
// can't push further!
|
|
done = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (done) {
|
|
// done with optimizing this calculation node
|
|
break;
|
|
}
|
|
|
|
auto const currentType = current->getType();
|
|
|
|
if (currentType == EN::FILTER ||
|
|
currentType == EN::SORT ||
|
|
currentType == EN::LIMIT ||
|
|
currentType == EN::SUBQUERY) {
|
|
// we found something interesting that justifies moving our node down
|
|
shouldMove = true;
|
|
}
|
|
else if (currentType == EN::INDEX_RANGE ||
|
|
currentType == EN::ENUMERATE_COLLECTION ||
|
|
currentType == EN::ENUMERATE_LIST ||
|
|
currentType == EN::AGGREGATE ||
|
|
currentType == EN::NORESULTS) {
|
|
// we will not push further down than such nodes
|
|
shouldMove = false;
|
|
break;
|
|
}
|
|
|
|
if (! current->hasParent()) {
|
|
break;
|
|
}
|
|
|
|
current->addParents(stack);
|
|
}
|
|
|
|
if (shouldMove && lastNode != nullptr) {
|
|
// first, unlink the calculation from the plan
|
|
plan->unlinkNode(n);
|
|
|
|
// and re-insert into before the current node
|
|
plan->insertDependency(lastNode, n);
|
|
modified = true;
|
|
}
|
|
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief fuse calculations in the plan
|
|
/// this rule modifies the plan in place
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::fuseCalculationsRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::CALCULATION, true);
|
|
|
|
if (nodes.size() < 2) {
|
|
opt->addPlan(plan, rule, false);
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
std::unordered_set<ExecutionNode*> toUnlink;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto nn = static_cast<CalculationNode*>(n);
|
|
if (nn->expression()->canThrow() ||
|
|
! nn->expression()->isDeterministic()) {
|
|
// we will only fuse calculations of expressions that cannot throw and that are deterministic
|
|
continue;
|
|
}
|
|
|
|
if (toUnlink.find(n) != toUnlink.end()) {
|
|
// do not process the same node twice
|
|
continue;
|
|
}
|
|
|
|
std::unordered_map<Variable const*, ExecutionNode*> toInsert;
|
|
for (auto&& it : nn->getVariablesUsedHere()) {
|
|
if (! n->isVarUsedLater(it)) {
|
|
toInsert.emplace(it, n);
|
|
}
|
|
}
|
|
|
|
TRI_ASSERT(n->hasDependency());
|
|
std::vector<ExecutionNode*> stack{ n->getFirstDependency() };
|
|
|
|
while (! stack.empty()) {
|
|
auto current = stack.back();
|
|
stack.pop_back();
|
|
|
|
bool handled = false;
|
|
|
|
if (current->getType() == EN::CALCULATION) {
|
|
auto otherExpression = static_cast<CalculationNode const*>(current)->expression();
|
|
|
|
if (otherExpression->isDeterministic() &&
|
|
! otherExpression->canThrow() &&
|
|
otherExpression->canRunOnDBServer() == nn->expression()->canRunOnDBServer()) {
|
|
// found another calculation node
|
|
auto&& varsSet = current->getVariablesSetHere();
|
|
if (varsSet.size() == 1) {
|
|
// check if it is a calculation for a variable that we are looking for
|
|
auto it = toInsert.find(varsSet[0]);
|
|
|
|
if (it != toInsert.end()) {
|
|
// remove the variable from the list of search variables
|
|
toInsert.erase(it);
|
|
|
|
// replace the variable reference in the original expression with the expression for that variable
|
|
auto expression = nn->expression();
|
|
TRI_ASSERT(expression != nullptr);
|
|
expression->replaceVariableReference((*it).first, otherExpression->node());
|
|
|
|
toUnlink.emplace(current);
|
|
|
|
// insert the calculations' own referenced variables into the list of search variables
|
|
for (auto&& it2 : current->getVariablesUsedHere()) {
|
|
if (! n->isVarUsedLater(it2)) {
|
|
toInsert.emplace(it2, n);
|
|
}
|
|
}
|
|
|
|
handled = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (! handled) {
|
|
// remove all variables from our list that might be used elsewhere
|
|
for (auto&& it : current->getVariablesUsedHere()) {
|
|
toInsert.erase(it);
|
|
}
|
|
}
|
|
|
|
if (toInsert.empty()) {
|
|
// done
|
|
break;
|
|
}
|
|
|
|
if (! current->hasDependency()) {
|
|
break;
|
|
}
|
|
|
|
stack.emplace_back(current->getFirstDependency());
|
|
}
|
|
}
|
|
|
|
if (! toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, ! toUnlink.empty());
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief determine the "right" type of AggregateNode and
|
|
/// add a sort node for each COLLECT (note: the sort may be removed later)
|
|
/// this rule cannot be turned off (otherwise, the query result might be wrong!)
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::specializeCollectRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::AGGREGATE, true);
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto collectNode = static_cast<AggregateNode*>(n);
|
|
|
|
if (collectNode->isSpecialized()) {
|
|
// already specialized this node
|
|
continue;
|
|
}
|
|
|
|
auto const& aggregateVariables = collectNode->aggregateVariables();
|
|
|
|
// test if we can use an alternative version of COLLECT with a hash table
|
|
bool const canUseHashAggregation = (! aggregateVariables.empty() &&
|
|
(! collectNode->hasOutVariable() || collectNode->count()) &&
|
|
collectNode->getOptions().canUseHashMethod());
|
|
|
|
if (canUseHashAggregation) {
|
|
// create a new plan with the adjusted COLLECT node
|
|
std::unique_ptr<ExecutionPlan> newPlan(plan->clone());
|
|
|
|
// use the cloned COLLECT node
|
|
auto newCollectNode = static_cast<AggregateNode*>(newPlan->getNodeById(collectNode->id()));
|
|
TRI_ASSERT(newCollectNode != nullptr);
|
|
|
|
// specialize the AggregateNode so it will become a HashAggregateBlock later
|
|
// additionally, add a SortNode BEHIND the AggregateNode (to sort the final result)
|
|
newCollectNode->aggregationMethod(AggregationOptions::AggregationMethod::AGGREGATION_METHOD_HASH);
|
|
newCollectNode->specialized();
|
|
|
|
if (! collectNode->isDistinctCommand()) {
|
|
// add the post-SORT
|
|
std::vector<std::pair<Variable const*, bool>> sortElements;
|
|
for (auto const& v : newCollectNode->aggregateVariables()) {
|
|
sortElements.emplace_back(std::make_pair(v.first, true));
|
|
}
|
|
|
|
auto sortNode = new SortNode(newPlan.get(), newPlan->nextId(), sortElements, false);
|
|
newPlan->registerNode(sortNode);
|
|
|
|
TRI_ASSERT(newCollectNode->hasParent());
|
|
auto const& parents = newCollectNode->getParents();
|
|
auto parent = parents[0];
|
|
|
|
sortNode->addDependency(newCollectNode);
|
|
parent->replaceDependency(newCollectNode, sortNode);
|
|
}
|
|
newPlan->findVarUsage();
|
|
|
|
if (nodes.size() > 1) {
|
|
// this will tell the optimizer to optimize the cloned plan with this specific rule again
|
|
opt->addPlan(newPlan.release(), rule, true, static_cast<int>(rule->level - 1));
|
|
}
|
|
else {
|
|
// no need to run this specific rule again on the cloned plan
|
|
opt->addPlan(newPlan.release(), rule, true);
|
|
}
|
|
}
|
|
|
|
// mark node as specialized, so we do not process it again
|
|
collectNode->specialized();
|
|
|
|
// finally, adjust the original plan and create a sorted version of COLLECT
|
|
|
|
// specialize the AggregateNode so it will become a SortedAggregateBlock later
|
|
collectNode->aggregationMethod(AggregationOptions::AggregationMethod::AGGREGATION_METHOD_SORTED);
|
|
|
|
// insert a SortNode IN FRONT OF the AggregateNode
|
|
if (! aggregateVariables.empty()) {
|
|
std::vector<std::pair<Variable const*, bool>> sortElements;
|
|
for (auto const& v : aggregateVariables) {
|
|
sortElements.emplace_back(std::make_pair(v.second, true));
|
|
}
|
|
|
|
auto sortNode = new SortNode(plan, plan->nextId(), sortElements, true);
|
|
plan->registerNode(sortNode);
|
|
|
|
TRI_ASSERT(collectNode->hasDependency());
|
|
auto dep = collectNode->getFirstDependency();
|
|
sortNode->addDependency(dep);
|
|
collectNode->replaceDependency(dep, sortNode);
|
|
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief split and-combined filters and break them into smaller parts
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::splitFiltersRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::FILTER, true);
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto const&& inVar = n->getVariablesUsedHere();
|
|
TRI_ASSERT(inVar.size() == 1);
|
|
auto setter = plan->getVarSetBy(inVar[0]->id);
|
|
|
|
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
|
|
continue;
|
|
}
|
|
|
|
auto cn = static_cast<CalculationNode*>(setter);
|
|
auto const expression = cn->expression();
|
|
|
|
if (expression->canThrow() ||
|
|
! expression->isDeterministic() ||
|
|
expression->node()->type != NODE_TYPE_OPERATOR_BINARY_AND) {
|
|
continue;
|
|
}
|
|
|
|
std::vector<AstNode const*> stack{ expression->node() };
|
|
|
|
while (! stack.empty()) {
|
|
auto current = stack.back();
|
|
stack.pop_back();
|
|
|
|
if (current->type == NODE_TYPE_OPERATOR_BINARY_AND) {
|
|
stack.emplace_back(current->getMember(0));
|
|
stack.emplace_back(current->getMember(1));
|
|
}
|
|
else {
|
|
modified = true;
|
|
|
|
ExecutionNode* calculationNode = nullptr;
|
|
auto outVar = plan->getAst()->variables()->createTemporaryVariable();
|
|
auto expression = new Expression(plan->getAst(), current);
|
|
try {
|
|
calculationNode = new CalculationNode(plan, plan->nextId(), expression, outVar);
|
|
}
|
|
catch (...) {
|
|
delete expression;
|
|
throw;
|
|
}
|
|
plan->registerNode(calculationNode);
|
|
|
|
plan->insertDependency(n, calculationNode);
|
|
|
|
auto filterNode = new FilterNode(plan, plan->nextId(), outVar);
|
|
plan->registerNode(filterNode);
|
|
|
|
plan->insertDependency(n, filterNode);
|
|
}
|
|
}
|
|
|
|
if (modified) {
|
|
plan->unlinkNode(n, false);
|
|
}
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief move filters up in the plan
|
|
/// this rule modifies the plan in place
|
|
/// filters are moved as far up in the plan as possible to make result sets
|
|
/// as small as possible as early as possible
|
|
/// filters are not pushed beyond limits
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::moveFiltersUpRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::FILTER, true);
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto neededVars = n->getVariablesUsedHere();
|
|
TRI_ASSERT(neededVars.size() == 1);
|
|
|
|
std::vector<ExecutionNode*> stack;
|
|
n->addDependencies(stack);
|
|
|
|
while (! stack.empty()) {
|
|
auto current = stack.back();
|
|
stack.pop_back();
|
|
|
|
if (current->getType() == EN::LIMIT) {
|
|
// cannot push a filter beyond a LIMIT node
|
|
break;
|
|
}
|
|
|
|
if (current->canThrow()) {
|
|
// must not move a filter beyond a node that can throw
|
|
break;
|
|
}
|
|
|
|
if (current->getType() == EN::CALCULATION) {
|
|
// must not move a filter beyond a node with a non-deterministic result
|
|
auto calculation = static_cast<CalculationNode const*>(current);
|
|
if (! calculation->expression()->isDeterministic()) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool found = false;
|
|
|
|
auto&& varsSet = current->getVariablesSetHere();
|
|
for (auto const& v : varsSet) {
|
|
for (auto it = neededVars.begin(); it != neededVars.end(); ++it) {
|
|
if ((*it)->id == v->id) {
|
|
// shared variable, cannot move up any more
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (found) {
|
|
// done with optimizing this calculation node
|
|
break;
|
|
}
|
|
|
|
if (! current->hasDependency()) {
|
|
// node either has no or more than one dependency. we don't know what to do and must abort
|
|
// note: this will also handle Singleton nodes
|
|
break;
|
|
}
|
|
|
|
current->addDependencies(stack);
|
|
|
|
// first, unlink the filter from the plan
|
|
plan->unlinkNode(n);
|
|
// and re-insert into plan in front of the current node
|
|
plan->insertDependency(current, n);
|
|
modified = true;
|
|
}
|
|
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
|
|
class triagens::aql::RedundantCalculationsReplacer final : public WalkerWorker<ExecutionNode> {
|
|
|
|
public:
|
|
|
|
RedundantCalculationsReplacer (std::unordered_map<VariableId, Variable const*> const& replacements)
|
|
: _replacements(replacements) {
|
|
}
|
|
|
|
template<typename T>
|
|
void replaceInVariable (ExecutionNode* en) {
|
|
auto node = static_cast<T*>(en);
|
|
|
|
node->_inVariable = Variable::replace(node->_inVariable, _replacements);
|
|
}
|
|
|
|
void replaceInCalculation (ExecutionNode* en) {
|
|
auto node = static_cast<CalculationNode*>(en);
|
|
std::unordered_set<Variable const*> variables;
|
|
node->expression()->variables(variables);
|
|
|
|
// check if the calculation uses any of the variables that we want to replace
|
|
for (auto const& it : variables) {
|
|
if (_replacements.find(it->id) != _replacements.end()) {
|
|
// calculation uses a to-be-replaced variable
|
|
node->expression()->replaceVariables(_replacements);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool before (ExecutionNode* en) override final {
|
|
switch (en->getType()) {
|
|
case EN::ENUMERATE_LIST: {
|
|
replaceInVariable<EnumerateListNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::RETURN: {
|
|
replaceInVariable<ReturnNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::CALCULATION: {
|
|
replaceInCalculation(en);
|
|
break;
|
|
}
|
|
|
|
case EN::FILTER: {
|
|
replaceInVariable<FilterNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::AGGREGATE: {
|
|
auto node = static_cast<AggregateNode*>(en);
|
|
for (auto variable : node->_aggregateVariables) {
|
|
variable.second = Variable::replace(variable.second, _replacements);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case EN::SORT: {
|
|
auto node = static_cast<SortNode*>(en);
|
|
for (auto variable : node->_elements) {
|
|
variable.first = Variable::replace(variable.first, _replacements);
|
|
}
|
|
break;
|
|
}
|
|
|
|
default: {
|
|
// ignore all other types of nodes
|
|
}
|
|
}
|
|
|
|
// always continue
|
|
return false;
|
|
}
|
|
|
|
private:
|
|
|
|
std::unordered_map<VariableId, Variable const*> const& _replacements;
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief remove CalculationNode(s) that are repeatedly used in a query
|
|
/// (i.e. common expressions)
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::removeRedundantCalculationsRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::CALCULATION, true);
|
|
|
|
if (nodes.size() < 2) {
|
|
// quick exit
|
|
opt->addPlan(plan, rule, false);
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
triagens::basics::StringBuffer buffer(TRI_UNKNOWN_MEM_ZONE);
|
|
std::unordered_map<VariableId, Variable const*> replacements;
|
|
|
|
|
|
for (auto const& n : nodes) {
|
|
auto nn = static_cast<CalculationNode*>(n);
|
|
|
|
if (! nn->expression()->isDeterministic()) {
|
|
// If this node is non-deterministic, we must not touch it!
|
|
continue;
|
|
}
|
|
|
|
auto outvar = n->getVariablesSetHere();
|
|
TRI_ASSERT(outvar.size() == 1);
|
|
|
|
try {
|
|
nn->expression()->stringifyIfNotTooLong(&buffer);
|
|
}
|
|
catch (...) {
|
|
// expression could not be stringified (maybe because not all node types
|
|
// are supported). this is not an error, we just skip the optimization
|
|
buffer.reset();
|
|
continue;
|
|
}
|
|
|
|
std::string const referenceExpression(buffer.c_str(), buffer.length());
|
|
buffer.reset();
|
|
|
|
std::vector<ExecutionNode*> stack;
|
|
n->addDependencies(stack);
|
|
|
|
while (! stack.empty()) {
|
|
auto current = stack.back();
|
|
stack.pop_back();
|
|
|
|
if (current->getType() == EN::CALCULATION) {
|
|
try {
|
|
static_cast<CalculationNode*>(current)->expression()->stringifyIfNotTooLong(&buffer);
|
|
}
|
|
catch (...) {
|
|
// expression could not be stringified (maybe because not all node types
|
|
// are supported). this is not an error, we just skip the optimization
|
|
buffer.reset();
|
|
continue;
|
|
}
|
|
|
|
std::string const compareExpression(buffer.c_str(), buffer.length());
|
|
buffer.reset();
|
|
|
|
if (compareExpression == referenceExpression) {
|
|
// expressions are identical
|
|
auto outvars = current->getVariablesSetHere();
|
|
TRI_ASSERT(outvars.size() == 1);
|
|
|
|
// check if target variable is already registered as a replacement
|
|
// this covers the following case:
|
|
// - replacements is set to B => C
|
|
// - we're now inserting a replacement A => B
|
|
// the goal now is to enter a replacement A => C instead of A => B
|
|
auto target = outvars[0];
|
|
while (target != nullptr) {
|
|
auto it = replacements.find(target->id);
|
|
|
|
if (it != replacements.end()) {
|
|
target = (*it).second;
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
}
|
|
replacements.emplace(std::make_pair(outvar[0]->id, target));
|
|
|
|
// also check if the insertion enables further shortcuts
|
|
// this covers the following case:
|
|
// - replacements is set to A => B
|
|
// - we have just inserted a replacement B => C
|
|
// the goal now is to change the replacement A => B to A => C
|
|
for (auto it = replacements.begin(); it != replacements.end(); ++it) {
|
|
if ((*it).second == outvar[0]) {
|
|
(*it).second = target;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (current->getType() == EN::AGGREGATE) {
|
|
if (static_cast<AggregateNode*>(current)->hasOutVariable()) {
|
|
// COLLECT ... INTO is evil (tm): it needs to keep all already defined variables
|
|
// we need to abort optimization here
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (! current->hasDependency()) {
|
|
// node either has no or more than one dependency. we don't know what to do and must abort
|
|
// note: this will also handle Singleton nodes
|
|
break;
|
|
}
|
|
|
|
current->addDependencies(stack);
|
|
}
|
|
}
|
|
|
|
if (! replacements.empty()) {
|
|
// finally replace the variables
|
|
RedundantCalculationsReplacer finder(replacements);
|
|
plan->root()->walk(&finder);
|
|
plan->findVarUsage();
|
|
|
|
opt->addPlan(plan, rule, true);
|
|
}
|
|
else {
|
|
// no changes
|
|
opt->addPlan(plan, rule, false);
|
|
}
|
|
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief remove CalculationNodes and SubqueryNodes that are never needed
|
|
/// this modifies an existing plan in place
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::removeUnnecessaryCalculationsRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode::NodeType> const types = {
|
|
EN::CALCULATION,
|
|
EN::SUBQUERY
|
|
};
|
|
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(types, true);
|
|
std::unordered_set<ExecutionNode*> toUnlink;
|
|
|
|
for (auto const& n : nodes) {
|
|
if (n->getType() == EN::CALCULATION) {
|
|
auto nn = static_cast<CalculationNode*>(n);
|
|
|
|
if (nn->canThrow()) {
|
|
// If this node can throw, we must not optimize it away!
|
|
continue;
|
|
}
|
|
}
|
|
else {
|
|
auto nn = static_cast<SubqueryNode*>(n);
|
|
if (nn->canThrow()) {
|
|
// subqueries that can throw must not be optimized away
|
|
continue;
|
|
}
|
|
}
|
|
|
|
auto outvar = n->getVariablesSetHere();
|
|
TRI_ASSERT(outvar.size() == 1);
|
|
auto varsUsedLater = n->getVarsUsedLater();
|
|
|
|
if (varsUsedLater.find(outvar[0]) == varsUsedLater.end()) {
|
|
// The variable whose value is calculated here is not used at
|
|
// all further down the pipeline! We remove the whole
|
|
// calculation node,
|
|
toUnlink.emplace(n);
|
|
}
|
|
}
|
|
|
|
if (! toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, ! toUnlink.empty());
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief helper function to find variable and attribute names from a node (if any)
|
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static void FindVarAndAttr (ExecutionPlan const* plan,
|
|
AstNode const* node,
|
|
Variable const*& enumCollVar,
|
|
std::string& attr) {
|
|
if (node->type == NODE_TYPE_REFERENCE) {
|
|
auto x = static_cast<Variable*>(node->getData());
|
|
auto setter = plan->getVarSetBy(x->id);
|
|
if (setter != nullptr &&
|
|
setter->getType() == EN::ENUMERATE_COLLECTION) {
|
|
enumCollVar = x;
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
FindVarAndAttr(plan, node->getMember(0), enumCollVar, attr);
|
|
|
|
if (enumCollVar != nullptr) {
|
|
attr.append(node->getStringValue(), node->getStringLength());
|
|
attr.push_back('.');
|
|
}
|
|
return;
|
|
}
|
|
|
|
attr.clear();
|
|
enumCollVar = nullptr;
|
|
return;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief builds a range info from the expression node
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static RangeInfoMapVec* BuildRangeInfo (ExecutionPlan* plan,
|
|
AstNode const* node,
|
|
Variable const*& enumCollVar,
|
|
std::string& attr,
|
|
bool& mustNotUseRanges,
|
|
AstNodeType combineType = NODE_TYPE_OPERATOR_BINARY_AND) {
|
|
TRI_ASSERT(combineType == NODE_TYPE_OPERATOR_BINARY_AND ||
|
|
combineType == NODE_TYPE_OPERATOR_BINARY_OR);
|
|
|
|
bool foundSomething = false;
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
std::unique_ptr<RangeInfoMap> rim(new RangeInfoMap());
|
|
|
|
if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
FindVarAndAttr(plan, rhs, enumCollVar, attr);
|
|
|
|
if (enumCollVar != nullptr) {
|
|
std::unordered_set<Variable const*> varsUsed;
|
|
Ast::getReferencedVariables(lhs, varsUsed);
|
|
|
|
if (varsUsed.find(enumCollVar) == varsUsed.end()) {
|
|
// Found a multiple attribute access of a variable and an
|
|
// expression which does not involve that variable:
|
|
foundSomething = true;
|
|
rim->insert(enumCollVar->name,
|
|
attr.substr(0, attr.size() - 1),
|
|
RangeInfoBound(lhs, true));
|
|
|
|
enumCollVar = nullptr;
|
|
attr.clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
FindVarAndAttr(plan, lhs, enumCollVar, attr);
|
|
|
|
if (enumCollVar != nullptr) {
|
|
std::unordered_set<Variable const*> varsUsed;
|
|
Ast::getReferencedVariables(rhs, varsUsed);
|
|
|
|
if (varsUsed.find(enumCollVar) == varsUsed.end()) {
|
|
// Found a multiple attribute access of a variable and an
|
|
// expression which does not involve that variable:
|
|
foundSomething = true;
|
|
rim->insert(enumCollVar->name,
|
|
attr.substr(0, attr.size() - 1),
|
|
RangeInfoBound(rhs, true));
|
|
|
|
enumCollVar = nullptr;
|
|
attr.clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
if (combineType == NODE_TYPE_OPERATOR_BINARY_OR && ! foundSomething) {
|
|
// disable the use of the range because we may have found something like this,
|
|
// which makes using an index for a.x invalid:
|
|
// a.x == 1 || RAND() > 0
|
|
mustNotUseRanges = true;
|
|
}
|
|
|
|
return new RangeInfoMapVec(rim.release());
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_LT ||
|
|
node->type == NODE_TYPE_OPERATOR_BINARY_GT ||
|
|
node->type == NODE_TYPE_OPERATOR_BINARY_LE ||
|
|
node->type == NODE_TYPE_OPERATOR_BINARY_GE) {
|
|
|
|
std::unique_ptr<RangeInfoMap> rim(new RangeInfoMap());
|
|
bool include = (node->type == NODE_TYPE_OPERATOR_BINARY_LE ||
|
|
node->type == NODE_TYPE_OPERATOR_BINARY_GE);
|
|
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
// Attribute access on the right:
|
|
// First find out whether there is a multiple attribute access
|
|
// of a variable on the right:
|
|
FindVarAndAttr(plan, rhs, enumCollVar, attr);
|
|
|
|
if (enumCollVar != nullptr) {
|
|
foundSomething = true;
|
|
RangeInfoBound low;
|
|
RangeInfoBound high;
|
|
|
|
// Constant value on the left, so insert a constant condition:
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_GE ||
|
|
node->type == NODE_TYPE_OPERATOR_BINARY_GT) {
|
|
high.assign(lhs, include);
|
|
}
|
|
else {
|
|
low.assign(lhs, include);
|
|
}
|
|
|
|
rim->insert(enumCollVar->name,
|
|
attr.substr(0, attr.size() - 1),
|
|
low,
|
|
high,
|
|
false);
|
|
|
|
enumCollVar = nullptr;
|
|
attr.clear();
|
|
}
|
|
}
|
|
|
|
if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
// Attribute access on the left:
|
|
// First find out whether there is a multiple attribute access
|
|
// of a variable on the left:
|
|
FindVarAndAttr(plan, lhs, enumCollVar, attr);
|
|
|
|
if (enumCollVar != nullptr) {
|
|
foundSomething = true;
|
|
RangeInfoBound low;
|
|
RangeInfoBound high;
|
|
|
|
// Constant value on the right, so insert a constant condition:
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_GE ||
|
|
node->type == NODE_TYPE_OPERATOR_BINARY_GT) {
|
|
low.assign(rhs, include);
|
|
}
|
|
else {
|
|
high.assign(rhs, include);
|
|
}
|
|
|
|
rim->insert(enumCollVar->name,
|
|
attr.substr(0, attr.size() - 1),
|
|
low,
|
|
high,
|
|
false);
|
|
|
|
enumCollVar = nullptr;
|
|
attr.clear();
|
|
}
|
|
}
|
|
|
|
if (combineType == NODE_TYPE_OPERATOR_BINARY_OR && ! foundSomething) {
|
|
// disable the use of the range because we may have found something like this,
|
|
// which makes using an index for a.x invalid:
|
|
// a.x == 1 || RAND() > 0
|
|
mustNotUseRanges = true;
|
|
}
|
|
|
|
return new RangeInfoMapVec(rim.release());
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) {
|
|
auto lhs = BuildRangeInfo(plan, node->getMember(0), enumCollVar, attr, mustNotUseRanges, node->type);
|
|
auto rhs = BuildRangeInfo(plan, node->getMember(1), enumCollVar, attr, mustNotUseRanges, node->type);
|
|
|
|
mustNotUseRanges = false;
|
|
|
|
// distribute AND into OR
|
|
return andCombineRangeInfoMapVecsIgnoreEmpty(lhs, rhs);
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_IN) {
|
|
auto lhs = node->getMember(0); // enumCollVar
|
|
auto rhs = node->getMember(1); // value
|
|
|
|
std::unique_ptr<RangeInfoMapVec> rimv(new RangeInfoMapVec());
|
|
|
|
if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
FindVarAndAttr(plan, lhs, enumCollVar, attr);
|
|
|
|
if (enumCollVar != nullptr) {
|
|
std::unordered_set<Variable const*> varsUsed;
|
|
Ast::getReferencedVariables(rhs, varsUsed);
|
|
|
|
if (varsUsed.find(enumCollVar) == varsUsed.end()) {
|
|
// Found a multiple attribute access of a variable and an
|
|
// expression which does not involve that variable:
|
|
foundSomething = true;
|
|
|
|
if (rhs->type == NODE_TYPE_ARRAY) {
|
|
size_t const n = rhs->numMembers();
|
|
rimv->reserve(n);
|
|
|
|
std::string const attrName(attr.substr(0, attr.size() - 1));
|
|
|
|
for (size_t i = 0; i < n; i++) {
|
|
RangeInfo ri(enumCollVar->name,
|
|
attrName,
|
|
RangeInfoBound(rhs->getMember(i), true));
|
|
// the following does not seem to be necessary here, but will slow things down
|
|
// considerably if the array is very big
|
|
// rimv->differenceRangeInfo(ri);
|
|
if (ri.isValid()) {
|
|
std::unique_ptr<RangeInfoMap> temp(new RangeInfoMap(ri));
|
|
rimv->emplace_back(temp.get());
|
|
temp.release();
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
RangeInfo ri(enumCollVar->name,
|
|
attr.substr(0, attr.size() - 1),
|
|
RangeInfoBound(rhs, true));
|
|
rimv->differenceRangeInfo(ri);
|
|
if (ri.isValid()) {
|
|
std::unique_ptr<RangeInfoMap> temp(new RangeInfoMap(ri));
|
|
rimv->emplace_back(temp.get());
|
|
temp.release();
|
|
}
|
|
}
|
|
enumCollVar = nullptr;
|
|
attr.clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
if (combineType == NODE_TYPE_OPERATOR_BINARY_OR && ! foundSomething) {
|
|
// disable the use of the range because we may have found something like this,
|
|
// which makes using an index for a.x invalid:
|
|
// a.x == 1 || RAND() > 0
|
|
mustNotUseRanges = true;
|
|
}
|
|
|
|
return rimv.release();
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_OR) {
|
|
bool lhsMustNotUseRange = false;
|
|
bool rhsMustNotUseRange = false;
|
|
|
|
auto lhs = BuildRangeInfo(plan, node->getMember(0), enumCollVar, attr, lhsMustNotUseRange, node->type);
|
|
auto rhs = BuildRangeInfo(plan, node->getMember(1), enumCollVar, attr, rhsMustNotUseRange, node->type);
|
|
|
|
if (lhsMustNotUseRange || rhsMustNotUseRange) {
|
|
mustNotUseRanges = true;
|
|
}
|
|
|
|
return orCombineRangeInfoMapVecs(lhs, rhs);
|
|
}
|
|
|
|
if (combineType == NODE_TYPE_OPERATOR_BINARY_AND) {
|
|
attr.clear();
|
|
enumCollVar = nullptr;
|
|
return nullptr;
|
|
}
|
|
|
|
// default case
|
|
mustNotUseRanges = true;
|
|
attr.clear();
|
|
enumCollVar = nullptr;
|
|
return nullptr;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief prefer IndexRange nodes over EnumerateCollection nodes
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
class FilterToEnumCollFinder final : public WalkerWorker<ExecutionNode> {
|
|
|
|
public:
|
|
|
|
typedef std::unordered_map<ExecutionNode const*, std::unordered_set<triagens::aql::Index const*>> IndexCache;
|
|
|
|
private:
|
|
|
|
RangeInfoMapVec* _rangeInfoMapVec;
|
|
ExecutionPlan* _plan;
|
|
std::unordered_set<VariableId> _varIds;
|
|
bool _modified;
|
|
bool _canThrow;
|
|
// The following maps ids of EnumerateCollectionNodes in the original
|
|
// plan to an index in the (outer vector) of the _changes container.
|
|
std::unordered_map<size_t, size_t>& _changesPlaces;
|
|
// The outer vector is for the different ids of EnumerateCollectionNodes
|
|
// in the original plan that could be replaced. For each one, the pair
|
|
// contains the id of the node in the original plan and a vector
|
|
// that holds the possible replacements.
|
|
std::vector<std::pair<size_t, std::vector<ExecutionNode*>>>& _changes;
|
|
|
|
// a reference to the CollectionNodes for which all indexes have been processed
|
|
std::unordered_set<ExecutionNode const*>& _doneCollections;
|
|
|
|
// a reference to the indexes processed for CollectionNodes
|
|
IndexCache& _doneIndexes;
|
|
|
|
public:
|
|
|
|
FilterToEnumCollFinder (ExecutionPlan* plan,
|
|
Variable const* var,
|
|
std::unordered_map<size_t, size_t>& changesPlaces,
|
|
std::vector<std::pair<size_t, std::vector<ExecutionNode*>>>& changes,
|
|
std::unordered_set<ExecutionNode const*>& doneCollections,
|
|
IndexCache& doneIndexes)
|
|
: _rangeInfoMapVec(nullptr),
|
|
_plan(plan),
|
|
_varIds({ var->id }),
|
|
_modified(false),
|
|
_canThrow(false),
|
|
_changesPlaces(changesPlaces),
|
|
_changes(changes),
|
|
_doneCollections(doneCollections),
|
|
_doneIndexes(doneIndexes) {
|
|
|
|
}
|
|
|
|
~FilterToEnumCollFinder () {
|
|
delete _rangeInfoMapVec;
|
|
}
|
|
|
|
bool modified () const {
|
|
return _modified;
|
|
}
|
|
|
|
bool before (ExecutionNode* en) override final {
|
|
_canThrow = (_canThrow || en->canThrow()); // can any node walked over throw?
|
|
|
|
switch (en->getType()) {
|
|
case EN::ENUMERATE_LIST:
|
|
case EN::SUBQUERY:
|
|
case EN::SORT:
|
|
case EN::INDEX_RANGE:
|
|
break;
|
|
|
|
case EN::CALCULATION: {
|
|
auto&& outvar = en->getVariablesSetHere();
|
|
TRI_ASSERT(outvar.size() == 1);
|
|
|
|
if (_varIds.find(outvar[0]->id) != _varIds.end()) {
|
|
auto node = static_cast<CalculationNode*>(en);
|
|
std::string attr;
|
|
Variable const* enumCollVar = nullptr;
|
|
auto expression = node->expression()->node();
|
|
bool mustNotUseRanges = false;
|
|
|
|
// there is an implicit AND between FILTER statements
|
|
if (_rangeInfoMapVec == nullptr) {
|
|
// don't yet have anything to AND-combine
|
|
_rangeInfoMapVec = BuildRangeInfo(_plan, expression, enumCollVar, attr, mustNotUseRanges);
|
|
}
|
|
else {
|
|
// AND-combine with previous ranges
|
|
auto other = BuildRangeInfo(_plan, expression, enumCollVar, attr, mustNotUseRanges);
|
|
|
|
if (mustNotUseRanges) {
|
|
mustNotUseRanges = false;
|
|
|
|
if (other != nullptr) {
|
|
delete other;
|
|
}
|
|
// keep existing _rangeInfoMapVec
|
|
}
|
|
else {
|
|
// AND-combine ranges in FILTER found with previous ranges
|
|
_rangeInfoMapVec = andCombineRangeInfoMapVecsIgnoreEmpty(_rangeInfoMapVec, other);
|
|
}
|
|
}
|
|
|
|
if (_rangeInfoMapVec != nullptr && mustNotUseRanges) {
|
|
// it is unsafe to use the ranges found. throw them away immediately
|
|
delete _rangeInfoMapVec;
|
|
_rangeInfoMapVec = nullptr;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
case EN::FILTER: {
|
|
std::vector<Variable const*>&& inVar = en->getVariablesUsedHere();
|
|
TRI_ASSERT(inVar.size() == 1);
|
|
_varIds.emplace(inVar[0]->id);
|
|
break;
|
|
}
|
|
|
|
case EN::AGGREGATE:
|
|
case EN::SCATTER:
|
|
case EN::DISTRIBUTE:
|
|
case EN::GATHER:
|
|
case EN::REMOTE:
|
|
// in these cases we simply ignore the intermediate nodes, note
|
|
// that we have taken care of nodes that could throw exceptions
|
|
// above.
|
|
break;
|
|
|
|
case EN::SINGLETON:
|
|
case EN::INSERT:
|
|
case EN::REMOVE:
|
|
case EN::REPLACE:
|
|
case EN::UPDATE:
|
|
case EN::UPSERT:
|
|
case EN::RETURN:
|
|
case EN::NORESULTS:
|
|
case EN::ILLEGAL:
|
|
// in all these cases something is seriously wrong and we better abort
|
|
return true;
|
|
|
|
case EN::LIMIT:
|
|
// if we meet a limit node between a filter and an enumerate
|
|
// collection, we abort . . .
|
|
return true;
|
|
|
|
case EN::ENUMERATE_COLLECTION: {
|
|
if (_rangeInfoMapVec == nullptr) {
|
|
break;
|
|
}
|
|
|
|
if (_doneCollections.find(en) != _doneCollections.end()) {
|
|
// all indexes for this collection have been used. done
|
|
break;
|
|
}
|
|
|
|
auto const node = static_cast<EnumerateCollectionNode*>(en);
|
|
auto var = node->getVariablesSetHere()[0]; // should only be 1
|
|
|
|
// check if we have any ranges with this var
|
|
std::unordered_map<std::string, RangeInfo>* map = _rangeInfoMapVec->find(var->name, 0);
|
|
|
|
if (map != nullptr) {
|
|
// Remove all variable bounds that are no longer defined here:
|
|
std::unordered_set<Variable const*> varsDefined = node->getVarsValid();
|
|
// Take out the variable we define only here, because we are
|
|
// not allowed to use it in a variable bound expression:
|
|
std::vector<Variable const*>&& varsSetHere = node->getVariablesSetHere();
|
|
for (auto const& v : varsSetHere) {
|
|
varsDefined.erase(v);
|
|
}
|
|
|
|
size_t pos = 0;
|
|
std::unordered_set<Variable const*> varsUsed;
|
|
do {
|
|
for (auto& x : *map) {
|
|
auto worker = [&] (std::list<RangeInfoBound>& bounds) -> void {
|
|
for (auto it = bounds.begin(); it != bounds.end();
|
|
/* no hoisting */) {
|
|
AstNode const* a = it->getExpressionAst(_plan->getAst());
|
|
varsUsed.clear();
|
|
Ast::getReferencedVariables(a, varsUsed);
|
|
|
|
bool bad = false;
|
|
for (auto const& v : varsUsed) {
|
|
if (varsDefined.find(const_cast<Variable const*>(v)) == varsDefined.end()) {
|
|
bad = true;
|
|
break;
|
|
}
|
|
}
|
|
if (bad) {
|
|
it = bounds.erase(it);
|
|
x.second.revokeEquality(); // just to be sure
|
|
}
|
|
else {
|
|
it++;
|
|
}
|
|
}
|
|
};
|
|
worker(x.second._lows);
|
|
worker(x.second._highs);
|
|
}
|
|
map = _rangeInfoMapVec->find(var->name, ++pos);
|
|
}
|
|
while (map != nullptr);
|
|
|
|
// Now remove empty conditions:
|
|
_rangeInfoMapVec->eraseEmptyOrUndefined(var->name);
|
|
|
|
// if var->name is not mapped in every position of _rangeInfoMapVec
|
|
// then we cannot use the index range node (we would return too few
|
|
// results), for example
|
|
// x.a == 1 || y.c == 2 || x.a == 3
|
|
if (_rangeInfoMapVec->isMapped(var->name)) {
|
|
std::vector<size_t>&& validPos = _rangeInfoMapVec->validPositions(var->name);
|
|
|
|
// are any of the RangeInfoMaps in the vector valid?
|
|
|
|
if (! _canThrow) {
|
|
if (validPos.empty()) { // ranges are not valid . . .
|
|
for (auto const& x : node->getParents()) {
|
|
auto noRes = new NoResultsNode(_plan, _plan->nextId());
|
|
_plan->registerNode(noRes);
|
|
_plan->insertDependency(x, noRes);
|
|
}
|
|
_modified = true;
|
|
}
|
|
else {
|
|
std::vector<Index*> idxs;
|
|
std::vector<size_t> prefixes;
|
|
// {idxs.at(i)->_fields[0]..idxs.at(i)->_fields[prefixes.at(i)]}
|
|
// is a subset of <attrs>
|
|
|
|
// note: prefixes are only used for skiplist indexes
|
|
// for all other index types, the prefix value will always be 0
|
|
node->getIndexesForIndexRangeNode(_rangeInfoMapVec->attributes(var->name), idxs, prefixes);
|
|
// make one new plan for every index in <idxs> that replaces the
|
|
// enumerate collection node with a IndexRangeNode ...
|
|
|
|
for (size_t i = 0; i < idxs.size(); ++i) {
|
|
// ranges must be valid and all comparisons == if hash
|
|
// index or == followed by a single <, >, >=, or <=
|
|
// if a skip index in the order of the fields of the
|
|
// index.
|
|
auto const idx = idxs.at(i);
|
|
TRI_ASSERT(idx != nullptr);
|
|
|
|
{
|
|
// prevent duplicate usage of the same index for the same collection node
|
|
auto p1 = _doneIndexes.find(en);
|
|
|
|
if (p1 != _doneIndexes.end()) {
|
|
auto p2 = (*p1).second.find(idx);
|
|
|
|
if (p2 != (*p1).second.end()) {
|
|
// already processed this index for this collection node
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
// initialize all conditions with empty ranges
|
|
IndexOrCondition indexOrCondition(validPos.size());
|
|
|
|
|
|
if (idx->type == triagens::arango::Index::TRI_IDX_TYPE_PRIMARY_INDEX) {
|
|
for (size_t k = 0; k < validPos.size(); k++) {
|
|
bool handled = false;
|
|
|
|
auto const map = _rangeInfoMapVec->find(var->name, validPos[k]);
|
|
auto range = map->find(std::string(TRI_VOC_ATTRIBUTE_ID));
|
|
|
|
if (range != map->end()) {
|
|
if (! range->second.is1ValueRangeInfo()) {
|
|
indexOrCondition.clear(); // not usable
|
|
break;
|
|
}
|
|
|
|
indexOrCondition.at(k).emplace_back(range->second);
|
|
handled = true;
|
|
}
|
|
|
|
if (! handled) {
|
|
range = map->find(std::string(TRI_VOC_ATTRIBUTE_KEY));
|
|
|
|
if (range != map->end()) {
|
|
if (! range->second.is1ValueRangeInfo()) {
|
|
indexOrCondition.clear(); // not usable
|
|
break;
|
|
}
|
|
|
|
indexOrCondition.at(k).emplace_back(range->second);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (idx->type == triagens::arango::Index::TRI_IDX_TYPE_EDGE_INDEX) {
|
|
for (size_t k = 0; k < validPos.size(); k++) {
|
|
bool handled = false;
|
|
|
|
auto const map = _rangeInfoMapVec->find(var->name, validPos[k]);
|
|
auto range = map->find(std::string(TRI_VOC_ATTRIBUTE_FROM));
|
|
|
|
if (range != map->end()) {
|
|
if (! range->second.is1ValueRangeInfo()) {
|
|
indexOrCondition.clear();
|
|
break; // not usable
|
|
}
|
|
|
|
indexOrCondition.at(k).emplace_back(range->second);
|
|
handled = true;
|
|
}
|
|
|
|
if (! handled) {
|
|
range = map->find(std::string(TRI_VOC_ATTRIBUTE_TO));
|
|
|
|
if (range != map->end()) {
|
|
if (! range->second.is1ValueRangeInfo()) {
|
|
indexOrCondition.clear(); // not usable
|
|
break;
|
|
}
|
|
|
|
indexOrCondition.at(k).emplace_back(range->second);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (idx->type == triagens::arango::Index::TRI_IDX_TYPE_HASH_INDEX) {
|
|
// each valid orCondition should match every field of the given index
|
|
for (size_t k = 0; k < validPos.size() && ! indexOrCondition.empty(); k++) {
|
|
auto const map = _rangeInfoMapVec->find(var->name, validPos[k]);
|
|
|
|
for (size_t j = 0; j < idx->fields.size(); j++) {
|
|
std::string fieldString;
|
|
TRI_AttributeNamesToString(idx->fields[j], fieldString, true);
|
|
auto range = map->find(fieldString);
|
|
|
|
if (range == map->end() || ! range->second.is1ValueRangeInfo()) {
|
|
indexOrCondition.clear(); // not usable
|
|
break;
|
|
}
|
|
|
|
if (idx->sparse) {
|
|
// a sparse hash index must not be used if any of the lookup values is
|
|
// either null (null is not contained in a sparse index) or is calculated
|
|
// using an expression with unknown result. this is because the expression
|
|
// result may be null and using the sparse index then would not allow
|
|
// finding the document
|
|
bool mustClear = false;
|
|
auto const& rib = range->second;
|
|
|
|
if (rib.isConstant()) {
|
|
// value is constant (and an equality because we're looking at a hash index)
|
|
auto const& value = rib._lowConst.bound();
|
|
if (value.isEmpty() || value.isNull()) {
|
|
// lookup value is null. can't use a sparse index.
|
|
mustClear = true;
|
|
}
|
|
}
|
|
else {
|
|
// non-constant lookup value. it might be null, so we can't use the index
|
|
mustClear = true;
|
|
}
|
|
|
|
if (mustClear) {
|
|
// not usable
|
|
indexOrCondition.clear();
|
|
break; // exit for loop
|
|
}
|
|
}
|
|
|
|
indexOrCondition.at(k).emplace_back(range->second);
|
|
}
|
|
}
|
|
}
|
|
else if (idx->type == triagens::arango::Index::TRI_IDX_TYPE_SKIPLIST_INDEX) {
|
|
for (size_t k = 0; k < validPos.size(); k++) {
|
|
auto const map = _rangeInfoMapVec->find(var->name, validPos[k]);
|
|
|
|
std::string fieldString;
|
|
TRI_AttributeNamesToString(idx->fields[0], fieldString, true);
|
|
// check if there is a range that contains the first index attribute
|
|
auto range = map->find(fieldString);
|
|
|
|
if (range == map->end()) {
|
|
indexOrCondition.clear();
|
|
break; // not usable
|
|
}
|
|
|
|
// insert the first index attribute
|
|
indexOrCondition.at(k).emplace_back(range->second);
|
|
|
|
// iterate over all index attributes from left to right
|
|
bool equality = range->second.is1ValueRangeInfo();
|
|
bool handled = false;
|
|
size_t j = 0;
|
|
while (++j < prefixes.at(i) && equality) {
|
|
std::string fieldString;
|
|
TRI_AttributeNamesToString(idx->fields[j], fieldString, true);
|
|
range = map->find(fieldString);
|
|
|
|
if (range == map->end()) {
|
|
indexOrCondition.clear();
|
|
handled = true;
|
|
break; // not usable
|
|
}
|
|
|
|
indexOrCondition.at(k).emplace_back(range->second);
|
|
equality = equality && range->second.is1ValueRangeInfo();
|
|
}
|
|
|
|
if (handled) {
|
|
break; // exit for loop
|
|
}
|
|
}
|
|
|
|
// check if index is sparse and exclude it if required
|
|
// a sparse skiplist index must not be used if any of the lookup values is
|
|
// either null (null is not contained in a sparse index) or is calculated
|
|
// using an expression with unknown result. this is because the expression
|
|
// result may be null and using the sparse index then would not allow
|
|
// finding the document
|
|
if (idx->sparse && ! indexOrCondition.empty()) {
|
|
for (size_t k = 0; k < validPos.size() && ! indexOrCondition.empty(); k++) {
|
|
auto const map = _rangeInfoMapVec->find(var->name, validPos[k]);
|
|
|
|
for (size_t j = 0; j < idx->fields.size(); j++) {
|
|
std::string fieldString;
|
|
TRI_AttributeNamesToString(idx->fields[j], fieldString, true);
|
|
auto range = map->find(fieldString);
|
|
|
|
if (range == map->end()) {
|
|
indexOrCondition.clear();
|
|
break; // not usable
|
|
}
|
|
|
|
auto const& rib = range->second;
|
|
|
|
// if the lookup value is dynamic, undefined or includes null, then we
|
|
// can't use the index
|
|
if (! rib.isConstant() ||
|
|
! rib._lowConst.isDefined() ||
|
|
(rib._lowConst.inclusive() && rib._lowConst.bound().isNull())) {
|
|
indexOrCondition.clear();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// check if there are all positions are non-empty
|
|
bool isEmpty = indexOrCondition.empty();
|
|
|
|
if (! isEmpty) {
|
|
size_t const vs = validPos.size();
|
|
|
|
for (size_t k = 0; k < vs; k++) {
|
|
if (indexOrCondition[k].empty()) {
|
|
isEmpty = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (! isEmpty) {
|
|
// enter index into the index cache
|
|
{
|
|
size_t indexesUsed = 0;
|
|
auto p1 = _doneIndexes.find(en);
|
|
|
|
if (p1 != _doneIndexes.end()) {
|
|
indexesUsed = (*p1).second.size() + 1;
|
|
(*p1).second.emplace(idx);
|
|
}
|
|
else {
|
|
_doneIndexes.emplace(en, std::unordered_set<triagens::aql::Index const*>{ idx });
|
|
indexesUsed = 1;
|
|
}
|
|
|
|
if (indexesUsed == idxs.size()) {
|
|
// we processed all usable indexes for this CollectionNode
|
|
_doneCollections.emplace(en);
|
|
}
|
|
}
|
|
|
|
auto indexRangeNode = new IndexRangeNode(
|
|
_plan,
|
|
_plan->nextId(),
|
|
node->vocbase(),
|
|
node->collection(),
|
|
node->outVariable(),
|
|
idx,
|
|
indexOrCondition,
|
|
false
|
|
);
|
|
|
|
std::unique_ptr<ExecutionNode> newNode(indexRangeNode);
|
|
size_t place = node->id();
|
|
|
|
std::unordered_map<size_t, size_t>::iterator it = _changesPlaces.find(place);
|
|
|
|
if (it == _changesPlaces.end()) {
|
|
_changes.emplace_back(place, std::vector<ExecutionNode*>());
|
|
it = _changesPlaces.emplace(place, _changes.size() - 1).first;
|
|
}
|
|
|
|
std::vector<ExecutionNode*>& vec = _changes[it->second].second;
|
|
vec.emplace_back(newNode.release());
|
|
// if all goes well, this node will be used, if an
|
|
// exception happens, the destructor will free it
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool enterSubquery (ExecutionNode* super, ExecutionNode* sub) final {
|
|
return false;
|
|
}
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief useIndexRange, try to use an index for filtering
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::useIndexRangeRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode::NodeType> const types = {
|
|
EN::ENUMERATE_COLLECTION,
|
|
EN::FILTER
|
|
};
|
|
|
|
// These are all the EnumerateCollection and Filter nodes in the query
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(types, true);
|
|
|
|
size_t numCollections = 0;
|
|
for (auto& it : nodes) {
|
|
if (it->getType() == EN::ENUMERATE_COLLECTION) {
|
|
++numCollections;
|
|
}
|
|
}
|
|
|
|
if (numCollections == 0) {
|
|
// shortcut
|
|
opt->addPlan(plan, rule, false);
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
// The following maps ids of EnumerateCollectionNodes in the original
|
|
// plan to an index in the (outer vector) of the _changes container.
|
|
std::unordered_map<size_t, size_t> changesPlaces;
|
|
// The outer vector is for the different ids of EnumerateCollectionNodes
|
|
// in the original plan that could be replaced. For each one, the pair
|
|
// contains the id of the node in the original plan and a vector
|
|
// that holds the possible replacements.
|
|
std::vector<std::pair<size_t, std::vector<ExecutionNode*>>> changes;
|
|
|
|
auto cleanupChanges = [&] () -> void {
|
|
for (auto& v : changes) {
|
|
for (ExecutionNode* n : v.second) {
|
|
delete n;
|
|
}
|
|
}
|
|
changes.clear();
|
|
changesPlaces.clear();
|
|
};
|
|
|
|
bool modified = false;
|
|
// In the following loop we only collect changes, maybe we introduce some
|
|
// NoResultsNode, possibly in subqueries.
|
|
|
|
try {
|
|
std::unordered_set<ExecutionNode const*> doneCollections;
|
|
FilterToEnumCollFinder::IndexCache doneIndexes;
|
|
|
|
for (auto const& n : nodes) {
|
|
if (n->getType() != EN::FILTER) {
|
|
// only process FILTER nodes, not ENUMERATE_COLLECTION here!
|
|
continue;
|
|
}
|
|
|
|
auto nn = static_cast<FilterNode*>(n);
|
|
auto invars = nn->getVariablesUsedHere();
|
|
TRI_ASSERT(invars.size() == 1);
|
|
|
|
FilterToEnumCollFinder finder(plan, invars[0], changesPlaces, changes, doneCollections, doneIndexes);
|
|
nn->walk(&finder);
|
|
modified |= finder.modified();
|
|
|
|
if (doneCollections.size() == numCollections) {
|
|
// handled all possible combinations already
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
catch (...) {
|
|
cleanupChanges();
|
|
throw;
|
|
}
|
|
|
|
// First find out how many possibilities for plan changes we actually have:
|
|
size_t nrPlans = opt->numberOfPlans();
|
|
size_t possibilities = 1;
|
|
size_t i = 0;
|
|
while (i < changes.size()) {
|
|
possibilities *= changes[i].second.size();
|
|
i++;
|
|
if (possibilities + nrPlans > 30) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// We will apply the first possible change for changes[i..changes.size()-1]
|
|
// and all possible changes for changes[0..i-1] and create all these plans.
|
|
// First make all the changes from i on in the original plan and those
|
|
// for which there is only one possibility:
|
|
try {
|
|
for (size_t j = 0; j < changes.size(); j++) {
|
|
std::vector<ExecutionNode*>& v = changes[j].second;
|
|
if (j >= i || v.size() == 1) {
|
|
size_t choice = 0;
|
|
if (v.size() > 1) {
|
|
// If in doubt, take a skiplist index:
|
|
for (size_t k = 0; k < v.size(); k++) {
|
|
auto n = static_cast<IndexRangeNode*>(v[k]);
|
|
if (n->getIndex()->type == triagens::arango::Index::TRI_IDX_TYPE_SKIPLIST_INDEX) {
|
|
choice = k;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
size_t id = changes[j].first;
|
|
// Just in case:
|
|
if (! v.empty()) {
|
|
plan->registerNode(v[choice]);
|
|
plan->replaceNode(plan->getNodeById(id), v[choice]);
|
|
modified = true;
|
|
// Free the other nodes, if they are there:
|
|
for (size_t k = 0; k < v.size(); k++) {
|
|
if (k != choice) {
|
|
delete v[k];
|
|
}
|
|
}
|
|
v.clear(); // take the new node away from changes such that
|
|
// cleanupChanges does not touch it
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch (...) {
|
|
cleanupChanges();
|
|
throw;
|
|
}
|
|
|
|
// Now see whether it is actually only one plan we make:
|
|
if (possibilities == 1) {
|
|
try {
|
|
opt->addPlan(plan, rule, modified);
|
|
cleanupChanges();
|
|
}
|
|
catch (...) {
|
|
cleanupChanges();
|
|
}
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
// Now we have to create more than one plan, we have to use those from
|
|
// changes[0..i-1] which have more than one possibility. Note that those
|
|
// with exactly 1 possibility have already been done above. This amounts
|
|
// to doing a cartesian product, which we do recursively. The result will
|
|
// be in the todo variable:
|
|
|
|
std::function <void(size_t, size_t, std::vector<size_t>&)> doworkRecursive;
|
|
std::vector<std::vector<size_t>> todo;
|
|
std::vector<size_t> work;
|
|
|
|
doworkRecursive = [&doworkRecursive, &changes, &todo]
|
|
(size_t index, size_t limit, std::vector<size_t>& v) {
|
|
if (index >= limit) {
|
|
todo.push_back(v); // intentionally copy vector
|
|
}
|
|
else if (changes[index].second.size() < 2) {
|
|
doworkRecursive(index + 1, limit, v);
|
|
}
|
|
else {
|
|
for (size_t l = 0; l < changes[index].second.size(); l++) {
|
|
v[index] = l;
|
|
doworkRecursive(index + 1, limit, v);
|
|
}
|
|
}
|
|
};
|
|
|
|
// if we get here, we can choose between multiple plans...
|
|
TRI_ASSERT(possibilities != 1);
|
|
|
|
try {
|
|
work.reserve(i);
|
|
for (size_t l = 0; l < i; l++) {
|
|
work.emplace_back(0);
|
|
}
|
|
|
|
doworkRecursive(0, i, work);
|
|
}
|
|
catch (...) {
|
|
cleanupChanges();
|
|
throw;
|
|
}
|
|
|
|
// Now we only have to go through todo and do what needs doing:
|
|
try {
|
|
for (auto const& v : todo) {
|
|
std::unique_ptr<ExecutionPlan> newPlan(plan->clone());
|
|
for (size_t l = 0; l < i; l++) {
|
|
if (changes[l].second.size() >= 2) {
|
|
ExecutionNode* newNode = changes[l].second[v[l]]->clone(newPlan.get(), true, false);
|
|
newPlan->registerNode(newNode);
|
|
newPlan->replaceNode(newPlan->getNodeById(changes[l].first), newNode);
|
|
}
|
|
}
|
|
opt->addPlan(newPlan.release(), rule, true);
|
|
}
|
|
}
|
|
catch (...) {
|
|
cleanupChanges();
|
|
throw;
|
|
}
|
|
|
|
cleanupChanges();
|
|
// finally delete the original plan. all plans created in this rule will be better(tm)
|
|
delete plan;
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief analyse the sortnode and its calculation nodes
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
class SortAnalysis {
|
|
using ECN = triagens::aql::EnumerateCollectionNode;
|
|
|
|
typedef std::pair<ECN::IndexMatchVec, IndexOrCondition> RangeIndexPair;
|
|
|
|
struct sortNodeData {
|
|
bool ASC;
|
|
size_t calculationNodeID;
|
|
std::string variableName;
|
|
std::string attributevec;
|
|
};
|
|
|
|
std::vector<sortNodeData*> _sortNodeData;
|
|
std::unordered_set<size_t> removedNodes;
|
|
|
|
public:
|
|
size_t const sortNodeID;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief constructor; fetches the referenced calculation nodes and builds
|
|
/// _sortNodeData for later use.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
SortAnalysis (SortNode* node)
|
|
: sortNodeID(node->id()) {
|
|
auto sortParams = node->getCalcNodePairs();
|
|
|
|
for (size_t n = 0; n < sortParams.size(); n++) {
|
|
auto d = new sortNodeData;
|
|
try {
|
|
d->ASC = sortParams[n].second;
|
|
d->calculationNodeID = sortParams[n].first->id();
|
|
|
|
if (sortParams[n].first->getType() == EN::CALCULATION) {
|
|
auto cn = static_cast<triagens::aql::CalculationNode*>(sortParams[n].first);
|
|
auto oneSortExpression = cn->expression();
|
|
|
|
if (oneSortExpression->isAttributeAccess()) {
|
|
auto simpleExpression = oneSortExpression->getAttributeAccess();
|
|
d->variableName = simpleExpression.first;
|
|
d->attributevec = simpleExpression.second;
|
|
}
|
|
}
|
|
_sortNodeData.emplace_back(d);
|
|
}
|
|
catch (...) {
|
|
delete d;
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
|
|
~SortAnalysis () {
|
|
for (auto& x : _sortNodeData) {
|
|
delete x;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief checks the whether we only have simple calculation nodes
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
bool isAnalyzeable () {
|
|
if (_sortNodeData.size() == 0) {
|
|
return false;
|
|
}
|
|
for (size_t j = 0; j < _sortNodeData.size(); j ++) {
|
|
if (_sortNodeData[j]->variableName.length() == 0) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief checks whether our calculation nodes reference variableName;
|
|
/// returns pair used for further processing with the indices.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
RangeIndexPair getAttrsForVariableName (std::string const& variableName) {
|
|
ECN::IndexMatchVec v;
|
|
IndexOrCondition rangeInfo;
|
|
|
|
for (size_t j = 0; j < _sortNodeData.size(); ++j) {
|
|
if (_sortNodeData[j]->variableName != variableName) {
|
|
return std::make_pair(v, rangeInfo); // for now, no mixed support.
|
|
}
|
|
}
|
|
|
|
// Collect the right data for the sorting:
|
|
v.reserve(_sortNodeData.size());
|
|
|
|
for (size_t j = 0; j < _sortNodeData.size(); ++j) {
|
|
v.emplace_back(_sortNodeData[j]->attributevec, _sortNodeData[j]->ASC);
|
|
}
|
|
// We only need one or-condition (because this is mandatory) which
|
|
// refers to 0 of the attributes:
|
|
rangeInfo.emplace_back(std::vector<RangeInfo>());
|
|
return std::make_pair(v, rangeInfo);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief removes the sortNode and its referenced Calculationnodes from
|
|
/// the plan.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void removeSortNodeFromPlan (ExecutionPlan* newPlan) {
|
|
// only remove a node once, otherwise this might cause follow up failures
|
|
if (removedNodes.find(sortNodeID) == removedNodes.end()) {
|
|
newPlan->unlinkNode(newPlan->getNodeById(sortNodeID));
|
|
removedNodes.emplace(sortNodeID);
|
|
}
|
|
}
|
|
};
|
|
|
|
class SortToIndexNode final : public WalkerWorker<ExecutionNode> {
|
|
using ECN = triagens::aql::EnumerateCollectionNode;
|
|
|
|
ExecutionPlan* _plan;
|
|
SortAnalysis* _sortNode;
|
|
Optimizer::RuleLevel _level;
|
|
bool _modified;
|
|
|
|
public:
|
|
|
|
SortToIndexNode (ExecutionPlan* plan,
|
|
SortAnalysis* Node,
|
|
Optimizer::RuleLevel level)
|
|
: _plan(plan),
|
|
_sortNode(Node),
|
|
_level(level),
|
|
_modified(false) {
|
|
}
|
|
|
|
bool modified () const {
|
|
return _modified;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief check if an enumerate collection or index range node is part of an
|
|
/// outer loop - this is necessary to ensure that the overall query result
|
|
/// does not change by replacing a SortNode with an IndexRangeNode
|
|
/// Example:
|
|
/// FOR i IN [ 1, 2 ] FOR j IN collectionWithIndex SORT j.indexdedAttr RETURN j
|
|
/// this must not be optimized because removing the sort and using the index
|
|
/// would only guarantee the sortedness within each iteration of the outer for
|
|
/// loop but not for the total result
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
bool isInnerLoop (ExecutionNode const* node) const {
|
|
while (node != nullptr) {
|
|
if (! node->hasDependency()) {
|
|
return false;
|
|
}
|
|
|
|
node = node->getFirstDependency();
|
|
TRI_ASSERT(node != nullptr);
|
|
|
|
if (node->getType() == EN::ENUMERATE_COLLECTION ||
|
|
node->getType() == EN::INDEX_RANGE ||
|
|
node->getType() == EN::ENUMERATE_LIST) {
|
|
// we are contained in an outer loop
|
|
return true;
|
|
|
|
// future potential optimization: check if the outer loop has 0 or 1
|
|
// iterations. in this case it is still possible to remove the sort
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief if the sort is already done by an indexrange, remove the sort.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
bool handleIndexRangeNode (IndexRangeNode* node) {
|
|
if (isInnerLoop(node)) {
|
|
// index range contained in an outer loop. must not optimize away the sort!
|
|
return true;
|
|
}
|
|
|
|
auto variableName = node->getVariablesSetHere()[0]->name;
|
|
auto result = _sortNode->getAttrsForVariableName(variableName);
|
|
|
|
auto const& match = node->matchesIndex(result.first);
|
|
|
|
if (match.doesMatch) {
|
|
if (match.reverse) {
|
|
node->reverse(true);
|
|
}
|
|
_sortNode->removeSortNodeFromPlan(_plan);
|
|
_modified = true;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief check whether we can sort via an index.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
bool handleEnumerateCollectionNode (EnumerateCollectionNode* node,
|
|
Optimizer::RuleLevel level) {
|
|
if (isInnerLoop(node)) {
|
|
// index range contained in an outer loop. must not optimize away the sort!
|
|
return true;
|
|
}
|
|
|
|
auto variableName = node->getVariablesSetHere()[0]->name;
|
|
auto result = _sortNode->getAttrsForVariableName(variableName);
|
|
|
|
if (result.first.size() == 0) {
|
|
return true; // we didn't find anything replaceable by index
|
|
}
|
|
|
|
// get all candidate indexes
|
|
// note: can only use the index if it is a skip list (or a hash and we
|
|
// are checking equality)
|
|
|
|
auto const& indexes = node->getIndicesOrdered(result.first);
|
|
|
|
EnumerateCollectionNode::IndexMatch const* preferredIndex = nullptr;
|
|
|
|
// enumerate all indexes and pick the first one that covers the condition
|
|
for (auto const& idx : indexes) {
|
|
if (idx.doesMatch) {
|
|
preferredIndex = &idx;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (preferredIndex == nullptr && ! indexes.empty()) {
|
|
// did not find an index which covers the condition. now pick the first one
|
|
preferredIndex = &indexes[0];
|
|
}
|
|
|
|
if (preferredIndex != nullptr) {
|
|
ExecutionNode* newNode = new IndexRangeNode(
|
|
_plan,
|
|
_plan->nextId(),
|
|
node->vocbase(),
|
|
node->collection(),
|
|
node->outVariable(),
|
|
preferredIndex->index,
|
|
result.second,
|
|
(preferredIndex->doesMatch && preferredIndex->reverse)
|
|
);
|
|
|
|
_plan->registerNode(newNode);
|
|
_plan->replaceNode(node, newNode);
|
|
|
|
if (preferredIndex->doesMatch) { // if the index superseedes the sort, remove it.
|
|
_sortNode->removeSortNodeFromPlan(_plan);
|
|
}
|
|
|
|
_modified = true;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool enterSubquery (ExecutionNode*, ExecutionNode*) override final {
|
|
return false;
|
|
}
|
|
|
|
bool before (ExecutionNode* en) override final {
|
|
switch (en->getType()) {
|
|
case EN::ENUMERATE_LIST:
|
|
case EN::CALCULATION:
|
|
case EN::SUBQUERY:
|
|
case EN::FILTER:
|
|
return false; // skip. we don't care.
|
|
|
|
case EN::SINGLETON:
|
|
case EN::AGGREGATE:
|
|
case EN::INSERT:
|
|
case EN::REMOVE:
|
|
case EN::REPLACE:
|
|
case EN::UPDATE:
|
|
case EN::UPSERT:
|
|
case EN::RETURN:
|
|
case EN::NORESULTS:
|
|
case EN::SCATTER:
|
|
case EN::DISTRIBUTE:
|
|
case EN::GATHER:
|
|
case EN::REMOTE:
|
|
case EN::ILLEGAL:
|
|
case EN::LIMIT: // LIMIT is criterion to stop
|
|
return true; // abort.
|
|
|
|
case EN::SORT: // pulling two sorts together is done elsewhere.
|
|
return en->id() != _sortNode->sortNodeID; // ignore ourselves.
|
|
|
|
case EN::INDEX_RANGE:
|
|
return handleIndexRangeNode(static_cast<IndexRangeNode*>(en));
|
|
|
|
case EN::ENUMERATE_COLLECTION:
|
|
return handleEnumerateCollectionNode(static_cast<EnumerateCollectionNode*>(en), _level);
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
int triagens::aql::useIndexForSortRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
bool modified = false;
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::SORT, true);
|
|
|
|
for (auto const& n : nodes) {
|
|
auto thisSortNode = static_cast<SortNode*>(n);
|
|
SortAnalysis node(thisSortNode);
|
|
|
|
if (node.isAnalyzeable() && n->hasDependency()) {
|
|
SortToIndexNode finder(plan, &node, rule->level);
|
|
thisSortNode->getFirstDependency()->walk(&finder);
|
|
|
|
if (finder.modified()) {
|
|
modified = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified, modified ? Optimizer::RuleLevel::pass5 : 0);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
// TODO: finish rule and test it
|
|
struct FilterCondition {
|
|
std::string variableName;
|
|
std::string attributeName;
|
|
AstNode const* lowNode = nullptr;
|
|
AstNode const* highNode = nullptr;
|
|
bool lowInclusive = false;
|
|
bool highInclusive = false;
|
|
|
|
FilterCondition () {
|
|
}
|
|
|
|
bool isFullyCoveredBy (RangeInfo const& other) {
|
|
if (! other.isConstant()) {
|
|
return false;
|
|
}
|
|
|
|
if (other._var != variableName ||
|
|
other._attr != attributeName) {
|
|
return false;
|
|
}
|
|
|
|
bool const lowDefined = (lowNode != nullptr);
|
|
bool const highDefined = (highNode != nullptr);
|
|
|
|
// do the quickest checks first
|
|
if (lowDefined != other._lowConst.isDefined()) {
|
|
return false;
|
|
}
|
|
|
|
if (highDefined != other._highConst.isDefined()) {
|
|
return false;
|
|
}
|
|
|
|
if (lowDefined && other._lowConst.inclusive() != lowInclusive) {
|
|
return false;
|
|
}
|
|
|
|
if (highDefined && other._highConst.inclusive() != highInclusive) {
|
|
return false;
|
|
}
|
|
|
|
// now the expensive checks
|
|
if (lowDefined) {
|
|
Json json(TRI_UNKNOWN_MEM_ZONE, lowNode->toJsonValue(TRI_UNKNOWN_MEM_ZONE));
|
|
|
|
if (! TRI_CheckSameValueJson(other._lowConst.bound().json(), json.json())) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (highDefined) {
|
|
Json json(TRI_UNKNOWN_MEM_ZONE, highNode->toJsonValue(TRI_UNKNOWN_MEM_ZONE));
|
|
|
|
if (! TRI_CheckSameValueJson(other._highConst.bound().json(), json.json())) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool analyze (AstNode const* node) {
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ ||
|
|
node->type == NODE_TYPE_OPERATOR_BINARY_LT ||
|
|
node->type == NODE_TYPE_OPERATOR_BINARY_LE ||
|
|
node->type == NODE_TYPE_OPERATOR_BINARY_GT ||
|
|
node->type == NODE_TYPE_OPERATOR_BINARY_GE) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
AstNodeType op = node->type;
|
|
bool found = false;
|
|
|
|
if (lhs->isConstant() &&
|
|
rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
found = (lhs->type == NODE_TYPE_VALUE);
|
|
}
|
|
else if (rhs->isConstant() &&
|
|
lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
// reverse the nodes
|
|
lhs = node->getMember(1);
|
|
rhs = node->getMember(0);
|
|
|
|
op = Ast::ReverseOperator(node->type);
|
|
found = (lhs->type == NODE_TYPE_VALUE);
|
|
}
|
|
|
|
if (found) {
|
|
TRI_ASSERT(lhs->type == NODE_TYPE_VALUE);
|
|
TRI_ASSERT(rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS);
|
|
|
|
std::function<void(AstNode const*, std::string&, std::string&)> buildName;
|
|
buildName = [&] (AstNode const* node, std::string& variableName, std::string& attributeName) -> void {
|
|
if (node->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
buildName(node->getMember(0), variableName, attributeName);
|
|
|
|
if (! attributeName.empty()) {
|
|
attributeName.push_back('.');
|
|
}
|
|
|
|
attributeName.append(node->getStringValue(), node->getStringLength());
|
|
}
|
|
else if (node->type == NODE_TYPE_REFERENCE) {
|
|
auto variable = static_cast<Variable const*>(node->getData());
|
|
variableName = variable->name;
|
|
}
|
|
};
|
|
|
|
if (attributeName.empty()) {
|
|
|
|
buildName(rhs, variableName, attributeName);
|
|
if (op == NODE_TYPE_OPERATOR_BINARY_EQ ||
|
|
op == NODE_TYPE_OPERATOR_BINARY_NE) {
|
|
lowInclusive = true;
|
|
lowNode = lhs;
|
|
highInclusive = true;
|
|
highNode = lhs;
|
|
}
|
|
else if (op == NODE_TYPE_OPERATOR_BINARY_LT) {
|
|
lowInclusive = false;
|
|
lowNode = lhs;
|
|
}
|
|
else if (op == NODE_TYPE_OPERATOR_BINARY_LE) {
|
|
lowInclusive = true;
|
|
lowNode = lhs;
|
|
}
|
|
else if (op == NODE_TYPE_OPERATOR_BINARY_GT) {
|
|
highInclusive = false;
|
|
highNode = lhs;
|
|
}
|
|
else if (op == NODE_TYPE_OPERATOR_BINARY_GE) {
|
|
highInclusive = true;
|
|
highNode = lhs;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
else {
|
|
// already have collected something, now check if the next condition
|
|
// is for the same variable / attribute
|
|
std::string compareVariableName;
|
|
std::string compareAttributeName;
|
|
buildName(rhs, compareVariableName, compareAttributeName);
|
|
|
|
if (variableName == compareVariableName &&
|
|
attributeName == compareAttributeName) {
|
|
// same attribute
|
|
// TODO
|
|
}
|
|
}
|
|
|
|
// fall-through
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
return (analyze(lhs) && analyze(rhs));
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief try to remove filters which are covered by indexes
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::removeFiltersCoveredByIndexRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::unordered_set<ExecutionNode*> toUnlink;
|
|
std::vector<ExecutionNode*>&& nodes= plan->findNodesOfType(EN::FILTER, true);
|
|
|
|
for (auto const& n : nodes) {
|
|
auto fn = static_cast<FilterNode*>(n);
|
|
// find the node with the filter expression
|
|
auto inVar = fn->getVariablesUsedHere();
|
|
TRI_ASSERT(inVar.size() == 1);
|
|
// auto outVar = cn->getVariablesSetHere();
|
|
|
|
auto setter = plan->getVarSetBy(inVar[0]->id);
|
|
if (setter == nullptr) {
|
|
continue;
|
|
}
|
|
|
|
if (setter->getType() != EN::CALCULATION) {
|
|
continue;
|
|
}
|
|
|
|
// check the filter condition
|
|
FilterCondition condition;
|
|
if (! condition.analyze(static_cast<CalculationNode const*>(setter)->expression()->node())) {
|
|
continue;
|
|
}
|
|
|
|
bool handled = false;
|
|
auto current = n;
|
|
while (current != nullptr) {
|
|
if (current->getType() == EN::INDEX_RANGE) {
|
|
// found an index range, now check if the expression is covered by the index
|
|
auto const& ranges = static_cast<IndexRangeNode const*>(current)->ranges();
|
|
|
|
// TODO: this is not prepared for OR conditions
|
|
for (auto const& it : ranges) {
|
|
for (auto it2 : it) {
|
|
if (condition.isFullyCoveredBy(it2)) {
|
|
toUnlink.emplace(setter);
|
|
toUnlink.emplace(n);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (handled) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (handled) {
|
|
break;
|
|
}
|
|
|
|
if (! current->hasDependency()) {
|
|
break;
|
|
}
|
|
|
|
current = current->getFirstDependency();
|
|
}
|
|
}
|
|
|
|
if (! toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, ! toUnlink.empty());
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief helper to compute lots of permutation tuples
|
|
/// a permutation tuple is represented as a single vector together with
|
|
/// another vector describing the boundaries of the tuples.
|
|
/// Example:
|
|
/// data: 0,1,2, 3,4, 5,6
|
|
/// starts: 0, 3, 5, (indices of starts of sections)
|
|
/// means a tuple of 3 permutations of 3, 2 and 2 points respectively
|
|
/// This function computes the next permutation tuple among the
|
|
/// lexicographically sorted list of all such tuples. It returns true
|
|
/// if it has successfully computed this and false if the tuple is already
|
|
/// the lexicographically largest one. If false is returned, the permutation
|
|
/// tuple is back to the beginning.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static bool NextPermutationTuple (std::vector<size_t>& data,
|
|
std::vector<size_t>& starts) {
|
|
auto begin = data.begin(); // a random access iterator
|
|
|
|
for (size_t i = starts.size(); i-- != 0; ) {
|
|
std::vector<size_t>::iterator from = begin + starts[i];
|
|
std::vector<size_t>::iterator to;
|
|
if (i == starts.size() - 1) {
|
|
to = data.end();
|
|
}
|
|
else {
|
|
to = begin + starts[i + 1];
|
|
}
|
|
if (std::next_permutation(from, to)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief interchange adjacent EnumerateCollectionNodes in all possible ways
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::interchangeAdjacentEnumerationsRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::ENUMERATE_COLLECTION, true);
|
|
|
|
std::unordered_set<ExecutionNode*> nodesSet;
|
|
for (auto const& n : nodes) {
|
|
TRI_ASSERT(nodesSet.find(n) == nodesSet.end());
|
|
nodesSet.emplace(n);
|
|
}
|
|
|
|
std::vector<ExecutionNode*> nodesToPermute;
|
|
std::vector<size_t> permTuple;
|
|
std::vector<size_t> starts;
|
|
|
|
// We use that the order of the nodes is such that a node B that is among the
|
|
// recursive dependencies of a node A is later in the vector.
|
|
for (auto const& n : nodes) {
|
|
if (nodesSet.find(n) != nodesSet.end()) {
|
|
std::vector<ExecutionNode*> nn{ n };
|
|
nodesSet.erase(n);
|
|
|
|
// Now follow the dependencies as long as we see further such nodes:
|
|
auto nwalker = n;
|
|
|
|
while (true) {
|
|
if (! nwalker->hasDependency()) {
|
|
break;
|
|
}
|
|
|
|
auto dep = nwalker->getFirstDependency();
|
|
|
|
if (dep->getType() != EN::ENUMERATE_COLLECTION) {
|
|
break;
|
|
}
|
|
|
|
nwalker = dep;
|
|
nn.emplace_back(nwalker);
|
|
nodesSet.erase(nwalker);
|
|
}
|
|
|
|
if (nn.size() > 1) {
|
|
// Move it into the permutation tuple:
|
|
starts.emplace_back(permTuple.size());
|
|
|
|
for (auto const& nnn : nn) {
|
|
nodesToPermute.emplace_back(nnn);
|
|
permTuple.emplace_back(permTuple.size());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Now we have collected all the runs of EnumerateCollectionNodes in the
|
|
// plan, we need to compute all possible permutations of all of them,
|
|
// independently. This is why we need to compute all permutation tuples.
|
|
|
|
opt->addPlan(plan, rule, false);
|
|
|
|
if (! starts.empty()) {
|
|
NextPermutationTuple(permTuple, starts); // will never return false
|
|
|
|
do {
|
|
// Clone the plan:
|
|
auto newPlan = plan->clone();
|
|
|
|
try { // get rid of plan if any of this fails
|
|
// Find the nodes in the new plan corresponding to the ones in the
|
|
// old plan that we want to permute:
|
|
std::vector<ExecutionNode*> newNodes;
|
|
for (size_t j = 0; j < nodesToPermute.size(); j++) {
|
|
newNodes.emplace_back(newPlan->getNodeById(nodesToPermute[j]->id()));
|
|
}
|
|
|
|
// Now get going with the permutations:
|
|
for (size_t i = 0; i < starts.size(); i++) {
|
|
size_t lowBound = starts[i];
|
|
size_t highBound = (i < starts.size()-1)
|
|
? starts[i+1]
|
|
: permTuple.size();
|
|
// We need to remove the nodes
|
|
// newNodes[lowBound..highBound-1] in newPlan and replace
|
|
// them by the same ones in a different order, given by
|
|
// permTuple[lowBound..highBound-1].
|
|
auto const& parents = newNodes[lowBound]->getParents();
|
|
|
|
TRI_ASSERT(parents.size() == 1);
|
|
auto parent = parents[0]; // needed for insertion later
|
|
|
|
// Unlink all those nodes:
|
|
for (size_t j = lowBound; j < highBound; j++) {
|
|
newPlan->unlinkNode(newNodes[j]);
|
|
}
|
|
|
|
// And insert them in the new order:
|
|
for (size_t j = highBound; j-- != lowBound; ) {
|
|
newPlan->insertDependency(parent, newNodes[permTuple[j]]);
|
|
}
|
|
}
|
|
|
|
// OK, the new plan is ready, let's report it:
|
|
if (! opt->addPlan(newPlan, rule, true)) {
|
|
// have enough plans. stop permutations
|
|
break;
|
|
}
|
|
}
|
|
catch (...) {
|
|
delete newPlan;
|
|
throw;
|
|
}
|
|
|
|
}
|
|
while (NextPermutationTuple(permTuple, starts));
|
|
}
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief scatter operations in cluster
|
|
/// this rule inserts scatter, gather and remote nodes so operations on sharded
|
|
/// collections actually work
|
|
/// it will change plans in place
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::scatterInClusterRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
bool wasModified = false;
|
|
|
|
if (triagens::arango::ServerState::instance()->isCoordinator()) {
|
|
// we are a coordinator. now look in the plan for nodes of type
|
|
// EnumerateCollectionNode and IndexRangeNode
|
|
std::vector<ExecutionNode::NodeType> const types = {
|
|
ExecutionNode::ENUMERATE_COLLECTION,
|
|
ExecutionNode::INDEX_RANGE,
|
|
ExecutionNode::INSERT,
|
|
ExecutionNode::UPDATE,
|
|
ExecutionNode::REPLACE,
|
|
ExecutionNode::REMOVE
|
|
};
|
|
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(types, true);
|
|
|
|
for (auto& node: nodes) {
|
|
// found a node we need to replace in the plan
|
|
|
|
auto const& parents = node->getParents();
|
|
auto const& deps = node->getDependencies();
|
|
TRI_ASSERT(deps.size() == 1);
|
|
bool const isRootNode = plan->isRoot(node);
|
|
// don't do this if we are already distributing!
|
|
if (deps[0]->getType() == ExecutionNode::REMOTE &&
|
|
deps[0]->getFirstDependency()->getType() == ExecutionNode::DISTRIBUTE) {
|
|
continue;
|
|
}
|
|
plan->unlinkNode(node, isRootNode);
|
|
|
|
auto const nodeType = node->getType();
|
|
|
|
// extract database and collection from plan node
|
|
TRI_vocbase_t* vocbase = nullptr;
|
|
Collection const* collection = nullptr;
|
|
|
|
if (nodeType == ExecutionNode::ENUMERATE_COLLECTION) {
|
|
vocbase = static_cast<EnumerateCollectionNode*>(node)->vocbase();
|
|
collection = static_cast<EnumerateCollectionNode*>(node)->collection();
|
|
}
|
|
else if (nodeType == ExecutionNode::INDEX_RANGE) {
|
|
vocbase = static_cast<IndexRangeNode*>(node)->vocbase();
|
|
collection = static_cast<IndexRangeNode*>(node)->collection();
|
|
}
|
|
else if (nodeType == ExecutionNode::INSERT ||
|
|
nodeType == ExecutionNode::UPDATE ||
|
|
nodeType == ExecutionNode::REPLACE ||
|
|
nodeType == ExecutionNode::REMOVE ||
|
|
nodeType == ExecutionNode::UPSERT) {
|
|
vocbase = static_cast<ModificationNode*>(node)->vocbase();
|
|
collection = static_cast<ModificationNode*>(node)->collection();
|
|
if (nodeType == ExecutionNode::REMOVE ||
|
|
nodeType == ExecutionNode::UPDATE) {
|
|
// Note that in the REPLACE or UPSERT case we are not getting here, since
|
|
// the distributeInClusterRule fires and a DistributionNode is
|
|
// used.
|
|
auto* modNode = static_cast<ModificationNode*>(node);
|
|
modNode->getOptions().ignoreDocumentNotFound = true;
|
|
}
|
|
}
|
|
else {
|
|
TRI_ASSERT(false);
|
|
}
|
|
|
|
// insert a scatter node
|
|
ExecutionNode* scatterNode = new ScatterNode(plan, plan->nextId(),
|
|
vocbase, collection);
|
|
plan->registerNode(scatterNode);
|
|
scatterNode->addDependency(deps[0]);
|
|
|
|
// insert a remote node
|
|
ExecutionNode* remoteNode = new RemoteNode(plan, plan->nextId(), vocbase,
|
|
collection, "", "", "");
|
|
plan->registerNode(remoteNode);
|
|
remoteNode->addDependency(scatterNode);
|
|
|
|
// re-link with the remote node
|
|
node->addDependency(remoteNode);
|
|
|
|
// insert another remote node
|
|
remoteNode = new RemoteNode(plan, plan->nextId(), vocbase, collection, "", "", "");
|
|
plan->registerNode(remoteNode);
|
|
remoteNode->addDependency(node);
|
|
|
|
// insert a gather node
|
|
ExecutionNode* gatherNode = new GatherNode(plan, plan->nextId(), vocbase,
|
|
collection);
|
|
plan->registerNode(gatherNode);
|
|
gatherNode->addDependency(remoteNode);
|
|
|
|
// and now link the gather node with the rest of the plan
|
|
if (parents.size() == 1) {
|
|
parents[0]->replaceDependency(deps[0], gatherNode);
|
|
}
|
|
|
|
if (isRootNode) {
|
|
// if we replaced the root node, set a new root node
|
|
plan->root(gatherNode);
|
|
}
|
|
wasModified = true;
|
|
}
|
|
}
|
|
|
|
if (wasModified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, wasModified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief distribute operations in cluster
|
|
///
|
|
/// this rule inserts distribute, remote nodes so operations on sharded
|
|
/// collections actually work, this differs from scatterInCluster in that every
|
|
/// incoming row is only sent to one shard and not all as in scatterInCluster
|
|
///
|
|
/// it will change plans in place
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::distributeInClusterRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
bool wasModified = false;
|
|
|
|
if (triagens::arango::ServerState::instance()->isCoordinator()) {
|
|
// we are a coordinator, we replace the root if it is a modification node
|
|
|
|
// only replace if it is the last node in the plan
|
|
auto node = plan->root();
|
|
TRI_ASSERT(node != nullptr);
|
|
|
|
while (node != nullptr) {
|
|
// loop until we find a modification node or the end of the plan
|
|
auto nodeType = node->getType();
|
|
|
|
if (nodeType == ExecutionNode::INSERT ||
|
|
nodeType == ExecutionNode::REMOVE ||
|
|
nodeType == ExecutionNode::UPDATE ||
|
|
nodeType == ExecutionNode::REPLACE ||
|
|
nodeType == ExecutionNode::UPSERT) {
|
|
// found a node!
|
|
break;
|
|
}
|
|
|
|
if (! node->hasDependency()) {
|
|
// reached the end
|
|
opt->addPlan(plan, rule, wasModified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
node = node->getFirstDependency();
|
|
}
|
|
|
|
TRI_ASSERT(node != nullptr);
|
|
|
|
if (node == nullptr) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "logic error");
|
|
}
|
|
|
|
ExecutionNode* originalParent = nullptr;
|
|
{
|
|
if (node->hasParent()) {
|
|
auto const& parents = node->getParents();
|
|
originalParent = parents[0];
|
|
TRI_ASSERT(originalParent != nullptr);
|
|
TRI_ASSERT(node != plan->root());
|
|
}
|
|
else {
|
|
TRI_ASSERT(node == plan->root());
|
|
}
|
|
}
|
|
|
|
// when we get here, we have found a matching data-modification node!
|
|
auto const nodeType = node->getType();
|
|
|
|
TRI_ASSERT(nodeType == ExecutionNode::INSERT ||
|
|
nodeType == ExecutionNode::REMOVE ||
|
|
nodeType == ExecutionNode::UPDATE ||
|
|
nodeType == ExecutionNode::REPLACE ||
|
|
nodeType == ExecutionNode::UPSERT);
|
|
|
|
Collection const* collection = static_cast<ModificationNode*>(node)->collection();
|
|
|
|
bool const defaultSharding = collection->usesDefaultSharding();
|
|
|
|
if (nodeType == ExecutionNode::REMOVE ||
|
|
nodeType == ExecutionNode::UPDATE) {
|
|
if (! defaultSharding) {
|
|
// We have to use a ScatterNode.
|
|
opt->addPlan(plan, rule, wasModified);
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
}
|
|
|
|
|
|
// In the INSERT and REPLACE cases we use a DistributeNode...
|
|
|
|
TRI_ASSERT(node->hasDependency());
|
|
auto const& deps = node->getDependencies();
|
|
|
|
if (originalParent != nullptr) {
|
|
originalParent->removeDependency(node);
|
|
// unlink the node
|
|
auto root = plan->root();
|
|
plan->unlinkNode(node, true);
|
|
plan->root(root, true); // fix root node
|
|
}
|
|
else {
|
|
// unlink the node
|
|
plan->unlinkNode(node, true);
|
|
plan->root(deps[0], true); // fix root node
|
|
}
|
|
|
|
|
|
// extract database from plan node
|
|
TRI_vocbase_t* vocbase = static_cast<ModificationNode*>(node)->vocbase();
|
|
|
|
// insert a distribute node
|
|
ExecutionNode* distNode = nullptr;
|
|
Variable const* inputVariable;
|
|
if (nodeType == ExecutionNode::INSERT ||
|
|
nodeType == ExecutionNode::REMOVE) {
|
|
TRI_ASSERT(node->getVariablesUsedHere().size() == 1);
|
|
|
|
// in case of an INSERT, the DistributeNode is responsible for generating keys
|
|
// if none present
|
|
bool const createKeys = (nodeType == ExecutionNode::INSERT);
|
|
inputVariable = node->getVariablesUsedHere()[0];
|
|
distNode = new DistributeNode(plan, plan->nextId(),
|
|
vocbase, collection, inputVariable->id, createKeys);
|
|
}
|
|
else if (nodeType == ExecutionNode::REPLACE) {
|
|
std::vector<Variable const*> v = node->getVariablesUsedHere();
|
|
if (defaultSharding && v.size() > 1) {
|
|
// We only look into _inKeyVariable
|
|
inputVariable = v[1];
|
|
}
|
|
else {
|
|
// We only look into _inDocVariable
|
|
inputVariable = v[0];
|
|
}
|
|
distNode = new DistributeNode(plan, plan->nextId(),
|
|
vocbase, collection, inputVariable->id, false);
|
|
}
|
|
else if (nodeType == ExecutionNode::UPDATE) {
|
|
std::vector<Variable const*> v = node->getVariablesUsedHere();
|
|
if (v.size() > 1) {
|
|
// If there is a key variable:
|
|
inputVariable = v[1];
|
|
// This is the _inKeyVariable! This works, since we use a ScatterNode
|
|
// for non-default-sharding attributes.
|
|
}
|
|
else {
|
|
// was only UPDATE <doc> IN <collection>
|
|
inputVariable = v[0];
|
|
}
|
|
distNode = new DistributeNode(plan, plan->nextId(),
|
|
vocbase, collection, inputVariable->id, false);
|
|
}
|
|
else if (nodeType == ExecutionNode::UPSERT) {
|
|
// an UPSERT nodes has two input variables!
|
|
std::vector<Variable const*> const&& v = node->getVariablesUsedHere();
|
|
TRI_ASSERT(v.size() >= 2);
|
|
|
|
distNode = new DistributeNode(plan, plan->nextId(),
|
|
vocbase, collection, v[0]->id, v[2]->id, false);
|
|
}
|
|
else {
|
|
TRI_ASSERT(false);
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "logic error");
|
|
}
|
|
|
|
TRI_ASSERT(distNode != nullptr);
|
|
|
|
plan->registerNode(distNode);
|
|
distNode->addDependency(deps[0]);
|
|
|
|
// insert a remote node
|
|
ExecutionNode* remoteNode = new RemoteNode(plan, plan->nextId(), vocbase,
|
|
collection, "", "", "");
|
|
plan->registerNode(remoteNode);
|
|
remoteNode->addDependency(distNode);
|
|
|
|
// re-link with the remote node
|
|
node->addDependency(remoteNode);
|
|
|
|
// insert another remote node
|
|
remoteNode = new RemoteNode(plan, plan->nextId(), vocbase, collection, "", "", "");
|
|
plan->registerNode(remoteNode);
|
|
remoteNode->addDependency(node);
|
|
|
|
// insert a gather node
|
|
ExecutionNode* gatherNode = new GatherNode(plan, plan->nextId(), vocbase, collection);
|
|
plan->registerNode(gatherNode);
|
|
gatherNode->addDependency(remoteNode);
|
|
|
|
if (originalParent != nullptr) {
|
|
// we did not replace the root node
|
|
originalParent->addDependency(gatherNode);
|
|
}
|
|
else {
|
|
// we replaced the root node, set a new root node
|
|
plan->root(gatherNode, true);
|
|
}
|
|
wasModified = true;
|
|
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, wasModified);
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief move filters up into the cluster distribution part of the plan
|
|
/// this rule modifies the plan in place
|
|
/// filters are moved as far up in the plan as possible to make result sets
|
|
/// as small as possible as early as possible
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::distributeFilternCalcToClusterRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
bool modified = false;
|
|
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::GATHER, true);
|
|
|
|
for (auto& n : nodes) {
|
|
auto const& remoteNodeList = n->getDependencies();
|
|
TRI_ASSERT(remoteNodeList.size() > 0);
|
|
auto rn = remoteNodeList[0];
|
|
|
|
if (! n->hasParent()) {
|
|
continue;
|
|
}
|
|
|
|
auto parents = n->getParents();
|
|
|
|
while (true) {
|
|
bool stopSearching = false;
|
|
auto inspectNode = parents[0];
|
|
|
|
switch (inspectNode->getType()) {
|
|
case EN::ENUMERATE_LIST:
|
|
case EN::SINGLETON:
|
|
case EN::INSERT:
|
|
case EN::REMOVE:
|
|
case EN::REPLACE:
|
|
case EN::UPDATE:
|
|
case EN::UPSERT:
|
|
parents = inspectNode->getParents();
|
|
continue;
|
|
|
|
case EN::AGGREGATE:
|
|
case EN::SUBQUERY:
|
|
case EN::RETURN:
|
|
case EN::NORESULTS:
|
|
case EN::SCATTER:
|
|
case EN::DISTRIBUTE:
|
|
case EN::GATHER:
|
|
case EN::ILLEGAL:
|
|
case EN::REMOTE:
|
|
case EN::LIMIT:
|
|
case EN::SORT:
|
|
case EN::INDEX_RANGE:
|
|
case EN::ENUMERATE_COLLECTION:
|
|
//do break
|
|
stopSearching = true;
|
|
break;
|
|
|
|
case EN::CALCULATION: {
|
|
auto calc = static_cast<CalculationNode const*>(inspectNode);
|
|
// check if the expression can be executed on a DB server safely
|
|
if (! calc->expression()->canRunOnDBServer()) {
|
|
stopSearching = true;
|
|
break;
|
|
}
|
|
// intentionally fall through here
|
|
}
|
|
case EN::FILTER:
|
|
// remember our cursor...
|
|
parents = inspectNode->getParents();
|
|
// then unlink the filter/calculator from the plan
|
|
plan->unlinkNode(inspectNode);
|
|
// and re-insert into plan in front of the remoteNode
|
|
plan->insertDependency(rn, inspectNode);
|
|
|
|
modified = true;
|
|
//ready to rumble!
|
|
break;
|
|
}
|
|
|
|
if (stopSearching) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief move sorts up into the cluster distribution part of the plan
|
|
/// this rule modifies the plan in place
|
|
/// sorts are moved as far up in the plan as possible to make result sets
|
|
/// as small as possible as early as possible
|
|
///
|
|
/// filters are not pushed beyond limits
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::distributeSortToClusterRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
bool modified = false;
|
|
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::GATHER, true);
|
|
|
|
for (auto& n : nodes) {
|
|
auto const& remoteNodeList = n->getDependencies();
|
|
auto gatherNode = static_cast<GatherNode*>(n);
|
|
TRI_ASSERT(remoteNodeList.size() > 0);
|
|
auto rn = remoteNodeList[0];
|
|
|
|
if (! n->hasParent()) {
|
|
continue;
|
|
}
|
|
|
|
auto parents = n->getParents();
|
|
|
|
while (1) {
|
|
bool stopSearching = false;
|
|
|
|
auto inspectNode = parents[0];
|
|
|
|
switch (inspectNode->getType()) {
|
|
case EN::ENUMERATE_LIST:
|
|
case EN::SINGLETON:
|
|
case EN::AGGREGATE:
|
|
case EN::INSERT:
|
|
case EN::REMOVE:
|
|
case EN::REPLACE:
|
|
case EN::UPDATE:
|
|
case EN::UPSERT:
|
|
case EN::CALCULATION:
|
|
case EN::FILTER:
|
|
case EN::SUBQUERY:
|
|
case EN::RETURN:
|
|
case EN::NORESULTS:
|
|
case EN::SCATTER:
|
|
case EN::DISTRIBUTE:
|
|
case EN::GATHER:
|
|
case EN::ILLEGAL:
|
|
case EN::REMOTE:
|
|
case EN::LIMIT:
|
|
case EN::INDEX_RANGE:
|
|
case EN::ENUMERATE_COLLECTION:
|
|
// For all these, we do not want to pull a SortNode further down
|
|
// out to the DBservers, note that potential FilterNodes and
|
|
// CalculationNodes that can be moved to the DBservers have
|
|
// already been moved over by the distribute-filtercalc-to-cluster
|
|
// rule which is done first.
|
|
stopSearching = true;
|
|
break;
|
|
case EN::SORT:
|
|
auto thisSortNode = static_cast<SortNode*>(inspectNode);
|
|
|
|
// remember our cursor...
|
|
parents = inspectNode->getParents();
|
|
// then unlink the filter/calculator from the plan
|
|
plan->unlinkNode(inspectNode);
|
|
// and re-insert into plan in front of the remoteNode
|
|
plan->insertDependency(rn, inspectNode);
|
|
gatherNode->setElements(thisSortNode->getElements());
|
|
modified = true;
|
|
//ready to rumble!
|
|
}
|
|
|
|
if (stopSearching) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief try to get rid of a RemoteNode->ScatterNode combination which has
|
|
/// only a SingletonNode and possibly some CalculationNodes as dependencies
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::removeUnnecessaryRemoteScatterRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::REMOTE, true);
|
|
std::unordered_set<ExecutionNode*> toUnlink;
|
|
|
|
for (auto& n : nodes) {
|
|
// check if the remote node is preceeded by a scatter node and any number of
|
|
// calculation and singleton nodes. if yes, remove remote and scatter
|
|
if (! n->hasDependency()) {
|
|
continue;
|
|
}
|
|
|
|
auto const dep = n->getFirstDependency();
|
|
if (dep->getType() != EN::SCATTER) {
|
|
continue;
|
|
}
|
|
|
|
bool canOptimize = true;
|
|
auto node = dep;
|
|
while (node != nullptr) {
|
|
auto const& d = node->getDependencies();
|
|
|
|
if (d.size() != 1) {
|
|
break;
|
|
}
|
|
|
|
node = d[0];
|
|
if (node->getType() != EN::SINGLETON &&
|
|
node->getType() != EN::CALCULATION) {
|
|
// found some other node type...
|
|
// this disqualifies the optimization
|
|
canOptimize = false;
|
|
break;
|
|
}
|
|
|
|
if (node->getType() == EN::CALCULATION) {
|
|
auto calc = static_cast<CalculationNode const*>(node);
|
|
// check if the expression can be executed on a DB server safely
|
|
if (! calc->expression()->canRunOnDBServer()) {
|
|
canOptimize = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (canOptimize) {
|
|
toUnlink.emplace(n);
|
|
toUnlink.emplace(dep);
|
|
}
|
|
}
|
|
|
|
if (! toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, ! toUnlink.empty());
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// WalkerWorker for undistributeRemoveAfterEnumColl
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
class RemoveToEnumCollFinder final : public WalkerWorker<ExecutionNode> {
|
|
ExecutionPlan* _plan;
|
|
std::unordered_set<ExecutionNode*>& _toUnlink;
|
|
bool _remove;
|
|
bool _scatter;
|
|
bool _gather;
|
|
EnumerateCollectionNode* _enumColl;
|
|
ExecutionNode* _setter;
|
|
const Variable* _variable;
|
|
ExecutionNode* _lastNode;
|
|
|
|
public:
|
|
RemoveToEnumCollFinder (ExecutionPlan* plan,
|
|
std::unordered_set<ExecutionNode*>& toUnlink)
|
|
: _plan(plan),
|
|
_toUnlink(toUnlink),
|
|
_remove(false),
|
|
_scatter(false),
|
|
_gather(false),
|
|
_enumColl(nullptr),
|
|
_setter(nullptr),
|
|
_variable(nullptr),
|
|
_lastNode(nullptr) {
|
|
};
|
|
|
|
~RemoveToEnumCollFinder () {
|
|
}
|
|
|
|
bool before (ExecutionNode* en) override final {
|
|
switch (en->getType()) {
|
|
case EN::REMOVE: {
|
|
TRI_ASSERT(_remove == false);
|
|
|
|
// find the variable we are removing . . .
|
|
auto rn = static_cast<RemoveNode*>(en);
|
|
auto varsToRemove = rn->getVariablesUsedHere();
|
|
|
|
// remove nodes always have one input variable
|
|
TRI_ASSERT(varsToRemove.size() == 1);
|
|
|
|
_setter = _plan->getVarSetBy(varsToRemove[0]->id);
|
|
TRI_ASSERT(_setter != nullptr);
|
|
auto enumColl = _setter;
|
|
|
|
if (_setter->getType() == EN::CALCULATION) {
|
|
// this should be an attribute access for _key
|
|
auto cn = static_cast<CalculationNode*>(_setter);
|
|
if (! cn->expression()->isAttributeAccess()) {
|
|
break; // abort . . .
|
|
}
|
|
// check the variable is the same as the remove variable
|
|
auto vars = cn->getVariablesSetHere();
|
|
if (vars.size() != 1 || vars[0]->id != varsToRemove[0]->id) {
|
|
break; // abort . . .
|
|
}
|
|
// check the remove node's collection is sharded over _key
|
|
std::vector<std::string> shardKeys = rn->collection()->shardKeys();
|
|
if (shardKeys.size() != 1 || shardKeys[0] != TRI_VOC_ATTRIBUTE_KEY) {
|
|
break; // abort . . .
|
|
}
|
|
|
|
// set the varsToRemove to the variable in the expression of this
|
|
// node and also define enumColl
|
|
varsToRemove = cn->getVariablesUsedHere();
|
|
TRI_ASSERT(varsToRemove.size() == 1);
|
|
enumColl = _plan->getVarSetBy(varsToRemove[0]->id);
|
|
TRI_ASSERT(_setter != nullptr);
|
|
}
|
|
|
|
if (enumColl->getType() != EN::ENUMERATE_COLLECTION) {
|
|
break; // abort . . .
|
|
}
|
|
|
|
_enumColl = static_cast<EnumerateCollectionNode*>(enumColl);
|
|
|
|
if (_enumColl->collection() != rn->collection()) {
|
|
break; // abort . . .
|
|
}
|
|
|
|
_variable = varsToRemove[0]; // the variable we'll remove
|
|
_remove = true;
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
case EN::REMOTE: {
|
|
_toUnlink.emplace(en);
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
case EN::DISTRIBUTE:
|
|
case EN::SCATTER: {
|
|
if (_scatter) { // met more than one scatter node
|
|
break; // abort . . .
|
|
}
|
|
_scatter = true;
|
|
_toUnlink.emplace(en);
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
case EN::GATHER: {
|
|
if (_gather) { // met more than one gather node
|
|
break; // abort . . .
|
|
}
|
|
_gather = true;
|
|
_toUnlink.emplace(en);
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
case EN::FILTER: {
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
case EN::CALCULATION: {
|
|
TRI_ASSERT(_setter != nullptr);
|
|
if (_setter->getType() == EN::CALCULATION && _setter->id() == en->id()) {
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
if (_lastNode == nullptr || _lastNode->getType() != EN::FILTER) {
|
|
// doesn't match the last filter node
|
|
break; // abort . . .
|
|
}
|
|
auto cn = static_cast<CalculationNode*>(en);
|
|
auto fn = static_cast<FilterNode*>(_lastNode);
|
|
|
|
// check these are a Calc-Filter pair
|
|
if (cn->getVariablesSetHere()[0]->id != fn->getVariablesUsedHere()[0]->id) {
|
|
break; // abort . . .
|
|
}
|
|
|
|
// check that we are filtering/calculating something with the variable
|
|
// we are to remove
|
|
auto varsUsedHere = cn->getVariablesUsedHere();
|
|
|
|
if (varsUsedHere.size() != 1) {
|
|
break; //abort . . .
|
|
}
|
|
if (varsUsedHere[0]->id != _variable->id) {
|
|
break;
|
|
}
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
case EN::ENUMERATE_COLLECTION: {
|
|
// check that we are enumerating the variable we are to remove
|
|
// and that we have already seen a remove node
|
|
TRI_ASSERT(_enumColl != nullptr);
|
|
if (en->id() != _enumColl->id()) {
|
|
break;
|
|
}
|
|
return true; // reached the end!
|
|
}
|
|
case EN::SINGLETON:
|
|
case EN::ENUMERATE_LIST:
|
|
case EN::SUBQUERY:
|
|
case EN::AGGREGATE:
|
|
case EN::INSERT:
|
|
case EN::REPLACE:
|
|
case EN::UPDATE:
|
|
case EN::UPSERT:
|
|
case EN::RETURN:
|
|
case EN::NORESULTS:
|
|
case EN::ILLEGAL:
|
|
case EN::LIMIT:
|
|
case EN::SORT:
|
|
case EN::INDEX_RANGE: {
|
|
// if we meet any of the above, then we abort . . .
|
|
}
|
|
}
|
|
_toUnlink.clear();
|
|
return true;
|
|
}
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief recognises that a RemoveNode can be moved to the shards.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::undistributeRemoveAfterEnumCollRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::REMOVE, true);
|
|
std::unordered_set<ExecutionNode*> toUnlink;
|
|
|
|
for (auto& n : nodes) {
|
|
RemoveToEnumCollFinder finder(plan, toUnlink);
|
|
n->walk(&finder);
|
|
}
|
|
|
|
bool modified = false;
|
|
if (! toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
plan->findVarUsage();
|
|
modified = true;
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief auxilliary struct for finding common nodes in OR conditions
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
struct CommonNodeFinder {
|
|
std::vector<AstNode const*> possibleNodes;
|
|
|
|
bool find (AstNode const* node,
|
|
AstNodeType condition,
|
|
AstNode const*& commonNode,
|
|
std::string& commonName) {
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_OR) {
|
|
return (find(node->getMember(0), condition, commonNode, commonName)
|
|
&& find(node->getMember(1), condition, commonNode, commonName));
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_VALUE) {
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
|
|
if (node->type == condition
|
|
|| (condition != NODE_TYPE_OPERATOR_BINARY_EQ
|
|
&& ( node->type == NODE_TYPE_OPERATOR_BINARY_LE
|
|
|| node->type == NODE_TYPE_OPERATOR_BINARY_LT
|
|
|| node->type == NODE_TYPE_OPERATOR_BINARY_GE
|
|
|| node->type == NODE_TYPE_OPERATOR_BINARY_GT ))) {
|
|
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
if (lhs->isConstant()) {
|
|
commonNode = rhs;
|
|
commonName = commonNode->toString();
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
|
|
if (rhs->isConstant()) {
|
|
commonNode = lhs;
|
|
commonName = commonNode->toString();
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
|
|
if (rhs->type == NODE_TYPE_FCALL ||
|
|
rhs->type == NODE_TYPE_FCALL_USER ||
|
|
rhs->type == NODE_TYPE_REFERENCE) {
|
|
commonNode = lhs;
|
|
commonName = commonNode->toString();
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
|
|
if (lhs->type == NODE_TYPE_FCALL ||
|
|
lhs->type == NODE_TYPE_FCALL_USER ||
|
|
lhs->type == NODE_TYPE_REFERENCE) {
|
|
commonNode = rhs;
|
|
commonName = commonNode->toString();
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
|
|
if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
|
|
lhs->type == NODE_TYPE_INDEXED_ACCESS) {
|
|
if (possibleNodes.size() == 2) {
|
|
for (size_t i = 0; i < 2; i++) {
|
|
if (lhs->toString() == possibleNodes[i]->toString()) {
|
|
commonNode = possibleNodes[i];
|
|
commonName = commonNode->toString();
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
}
|
|
// don't return, must consider the other side of the condition
|
|
}
|
|
else {
|
|
possibleNodes.emplace_back(lhs);
|
|
}
|
|
}
|
|
if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
|
|
rhs->type == NODE_TYPE_INDEXED_ACCESS) {
|
|
if (possibleNodes.size() == 2) {
|
|
for (size_t i = 0; i < 2; i++) {
|
|
if (rhs->toString() == possibleNodes[i]->toString()) {
|
|
commonNode = possibleNodes[i];
|
|
commonName = commonNode->toString();
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
else {
|
|
possibleNodes.emplace_back(rhs);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
possibleNodes.clear();
|
|
return (! commonName.empty());
|
|
}
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief auxilliary struct for the OR-to-IN conversion
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
struct OrToInConverter {
|
|
|
|
std::vector<AstNode const*> valueNodes;
|
|
CommonNodeFinder finder;
|
|
AstNode const* commonNode = nullptr;
|
|
std::string commonName;
|
|
|
|
AstNode* buildInExpression (Ast* ast) {
|
|
// the list of comparison values
|
|
auto list = ast->createNodeArray();
|
|
for (auto& x : valueNodes) {
|
|
list->addMember(x);
|
|
}
|
|
|
|
// return a new IN operator node
|
|
return ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_IN,
|
|
commonNode->clone(ast),
|
|
list);
|
|
}
|
|
|
|
bool canConvertExpression (AstNode const* node) {
|
|
if (finder.find(node, NODE_TYPE_OPERATOR_BINARY_EQ, commonNode, commonName)) {
|
|
return canConvertExpressionWalker(node);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool canConvertExpressionWalker (AstNode const* node) {
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_OR) {
|
|
return (canConvertExpressionWalker(node->getMember(0)) &&
|
|
canConvertExpressionWalker(node->getMember(1)));
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
if (canConvertExpressionWalker(rhs) && ! canConvertExpressionWalker(lhs)) {
|
|
valueNodes.emplace_back(lhs);
|
|
return true;
|
|
}
|
|
|
|
if (canConvertExpressionWalker(lhs) && ! canConvertExpressionWalker(rhs)) {
|
|
valueNodes.emplace_back(rhs);
|
|
return true;
|
|
}
|
|
// if canConvertExpressionWalker(lhs) and canConvertExpressionWalker(rhs), then one of
|
|
// the equalities in the OR statement is of the form x == x
|
|
// fall-through intentional
|
|
}
|
|
else if (node->type == NODE_TYPE_REFERENCE ||
|
|
node->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
|
|
node->type == NODE_TYPE_INDEXED_ACCESS) {
|
|
// get a string representation of the node for comparisons
|
|
return (node->toString() == commonName);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief this rule replaces expressions of the type:
|
|
/// x.val == 1 || x.val == 2 || x.val == 3
|
|
// with
|
|
// x.val IN [1,2,3]
|
|
// when the OR conditions are present in the same FILTER node, and refer to the
|
|
// same (single) attribute.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::replaceOrWithInRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::FILTER, true);
|
|
|
|
bool modified = false;
|
|
for (auto const& n : nodes) {
|
|
TRI_ASSERT(n->hasDependency());
|
|
|
|
auto const dep = n->getFirstDependency();
|
|
|
|
if (dep->getType() != EN::CALCULATION) {
|
|
continue;
|
|
}
|
|
|
|
auto fn = static_cast<FilterNode*>(n);
|
|
auto inVar = fn->getVariablesUsedHere();
|
|
|
|
auto cn = static_cast<CalculationNode*>(dep);
|
|
auto outVar = cn->getVariablesSetHere();
|
|
|
|
if (outVar.size() != 1 || outVar[0]->id != inVar[0]->id) {
|
|
continue;
|
|
}
|
|
if (cn->expression()->node()->type != NODE_TYPE_OPERATOR_BINARY_OR) {
|
|
continue;
|
|
}
|
|
|
|
OrToInConverter converter;
|
|
if (converter.canConvertExpression(cn->expression()->node())) {
|
|
ExecutionNode* newNode = nullptr;
|
|
auto inNode = converter.buildInExpression(plan->getAst());
|
|
|
|
Expression* expr = new Expression(plan->getAst(), inNode);
|
|
|
|
try {
|
|
TRI_IF_FAILURE("OptimizerRules::replaceOrWithInRuleOom") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
newNode = new CalculationNode(plan, plan->nextId(), expr, outVar[0]);
|
|
}
|
|
catch (...) {
|
|
delete expr;
|
|
throw;
|
|
}
|
|
|
|
plan->registerNode(newNode);
|
|
plan->replaceNode(cn, newNode);
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
struct RemoveRedundantOr {
|
|
AstNode const* bestValue = nullptr;
|
|
AstNodeType comparison;
|
|
bool inclusive;
|
|
bool isComparisonSet = false;
|
|
CommonNodeFinder finder;
|
|
AstNode const* commonNode = nullptr;
|
|
std::string commonName;
|
|
|
|
AstNode* createReplacementNode (Ast* ast) {
|
|
TRI_ASSERT(commonNode != nullptr);
|
|
TRI_ASSERT(bestValue != nullptr);
|
|
TRI_ASSERT(isComparisonSet == true);
|
|
return ast->createNodeBinaryOperator(comparison, commonNode->clone(ast),
|
|
bestValue);
|
|
}
|
|
|
|
bool isInclusiveBound (AstNodeType type) {
|
|
return (type == NODE_TYPE_OPERATOR_BINARY_GE || type == NODE_TYPE_OPERATOR_BINARY_LE);
|
|
}
|
|
|
|
int isCompatibleBound (AstNodeType type, AstNode const* value) {
|
|
if ((comparison == NODE_TYPE_OPERATOR_BINARY_LE
|
|
|| comparison == NODE_TYPE_OPERATOR_BINARY_LT) &&
|
|
(type == NODE_TYPE_OPERATOR_BINARY_LE
|
|
|| type == NODE_TYPE_OPERATOR_BINARY_LT)) {
|
|
return -1; //high bound
|
|
}
|
|
else if ((comparison == NODE_TYPE_OPERATOR_BINARY_GE
|
|
|| comparison == NODE_TYPE_OPERATOR_BINARY_GT) &&
|
|
(type == NODE_TYPE_OPERATOR_BINARY_GE
|
|
|| type == NODE_TYPE_OPERATOR_BINARY_GT)) {
|
|
return 1; //low bound
|
|
}
|
|
return 0; //incompatible bounds
|
|
}
|
|
|
|
// returns false if the existing value is better and true if the input value is
|
|
// better
|
|
bool compareBounds (AstNodeType type, AstNode const* value, int lowhigh) {
|
|
int cmp = CompareAstNodes(bestValue, value, true);
|
|
|
|
if (cmp == 0 && (isInclusiveBound(comparison) != isInclusiveBound(type))) {
|
|
return (isInclusiveBound(type) ? true : false);
|
|
}
|
|
return (cmp * lowhigh == 1);
|
|
}
|
|
|
|
bool hasRedundantCondition (AstNode const* node) {
|
|
if (finder.find(node, NODE_TYPE_OPERATOR_BINARY_LT, commonNode, commonName)) {
|
|
return hasRedundantConditionWalker(node);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool hasRedundantConditionWalker (AstNode const* node) {
|
|
AstNodeType type = node->type;
|
|
|
|
if (type == NODE_TYPE_OPERATOR_BINARY_OR) {
|
|
return (hasRedundantConditionWalker(node->getMember(0)) &&
|
|
hasRedundantConditionWalker(node->getMember(1)));
|
|
}
|
|
|
|
if (type == NODE_TYPE_OPERATOR_BINARY_LE
|
|
|| type == NODE_TYPE_OPERATOR_BINARY_LT
|
|
|| type == NODE_TYPE_OPERATOR_BINARY_GE
|
|
|| type == NODE_TYPE_OPERATOR_BINARY_GT) {
|
|
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
if (hasRedundantConditionWalker(rhs)
|
|
&& ! hasRedundantConditionWalker(lhs)
|
|
&& lhs->isConstant()) {
|
|
|
|
if (! isComparisonSet) {
|
|
comparison = Ast::ReverseOperator(type);
|
|
bestValue = lhs;
|
|
isComparisonSet = true;
|
|
return true;
|
|
}
|
|
|
|
int lowhigh = isCompatibleBound(Ast::ReverseOperator(type), lhs);
|
|
if (lowhigh == 0) {
|
|
return false;
|
|
}
|
|
|
|
if (compareBounds(type, lhs, lowhigh)) {
|
|
comparison = Ast::ReverseOperator(type);
|
|
bestValue = lhs;
|
|
}
|
|
return true;
|
|
}
|
|
if (hasRedundantConditionWalker(lhs)
|
|
&& ! hasRedundantConditionWalker(rhs)
|
|
&& rhs->isConstant()) {
|
|
if (! isComparisonSet) {
|
|
comparison = type;
|
|
bestValue = rhs;
|
|
isComparisonSet = true;
|
|
return true;
|
|
}
|
|
|
|
int lowhigh = isCompatibleBound(type, rhs);
|
|
if (lowhigh == 0) {
|
|
return false;
|
|
}
|
|
|
|
if (compareBounds(type, rhs, lowhigh)) {
|
|
comparison = type;
|
|
bestValue = rhs;
|
|
}
|
|
return true;
|
|
}
|
|
// if hasRedundantConditionWalker(lhs) and
|
|
// hasRedundantConditionWalker(rhs), then one of the conditions in the OR
|
|
// statement is of the form x == x fall-through intentional
|
|
}
|
|
else if (type == NODE_TYPE_REFERENCE ||
|
|
type == NODE_TYPE_ATTRIBUTE_ACCESS ||
|
|
type == NODE_TYPE_INDEXED_ACCESS) {
|
|
// get a string representation of the node for comparisons
|
|
return (node->toString() == commonName);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
};
|
|
|
|
int triagens::aql::removeRedundantOrRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::FILTER, true);
|
|
|
|
bool modified = false;
|
|
for (auto const& n : nodes) {
|
|
TRI_ASSERT(n->hasDependency());
|
|
|
|
auto const dep = n->getFirstDependency();
|
|
|
|
if (dep->getType() != EN::CALCULATION) {
|
|
continue;
|
|
}
|
|
|
|
auto fn = static_cast<FilterNode*>(n);
|
|
auto inVar = fn->getVariablesUsedHere();
|
|
|
|
auto cn = static_cast<CalculationNode*>(dep);
|
|
auto outVar = cn->getVariablesSetHere();
|
|
|
|
if (outVar.size() != 1 || outVar[0]->id != inVar[0]->id) {
|
|
continue;
|
|
}
|
|
if (cn->expression()->node()->type != NODE_TYPE_OPERATOR_BINARY_OR) {
|
|
continue;
|
|
}
|
|
|
|
RemoveRedundantOr remover;
|
|
if (remover.hasRedundantCondition(cn->expression()->node())) {
|
|
Expression* expr = nullptr;
|
|
ExecutionNode* newNode = nullptr;
|
|
auto astNode = remover.createReplacementNode(plan->getAst());
|
|
|
|
expr = new Expression(plan->getAst(), astNode);
|
|
|
|
try {
|
|
newNode = new CalculationNode(plan, plan->nextId(), expr, outVar[0]);
|
|
}
|
|
catch (...) {
|
|
delete expr;
|
|
throw;
|
|
}
|
|
|
|
plan->registerNode(newNode);
|
|
plan->replaceNode(cn, newNode);
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief remove $OLD and $NEW variables from data-modification statements
|
|
/// if not required
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::removeDataModificationOutVariablesRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
bool modified = false;
|
|
std::vector<ExecutionNode::NodeType> const types = {
|
|
EN::REMOVE,
|
|
EN::INSERT,
|
|
EN::UPDATE,
|
|
EN::REPLACE,
|
|
EN::UPSERT
|
|
};
|
|
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(types, true);
|
|
|
|
for (auto const& n : nodes) {
|
|
auto node = static_cast<ModificationNode*>(n);
|
|
TRI_ASSERT(node != nullptr);
|
|
|
|
auto varsUsedLater = n->getVarsUsedLater();
|
|
if (varsUsedLater.find(node->getOutVariableOld()) == varsUsedLater.end()) {
|
|
// "$OLD" is not used later
|
|
node->clearOutVariableOld();
|
|
modified = true;
|
|
}
|
|
|
|
if (varsUsedLater.find(node->getOutVariableNew()) == varsUsedLater.end()) {
|
|
// "$NEW" is not used later
|
|
node->clearOutVariableNew();
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
if (modified) {
|
|
plan->findVarUsage();
|
|
}
|
|
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief patch UPDATE statement on single collection that iterates over the
|
|
/// entire collection to operate in batches
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int triagens::aql::patchUpdateStatementsRule (Optimizer* opt,
|
|
ExecutionPlan* plan,
|
|
Optimizer::Rule const* rule) {
|
|
bool modified = false;
|
|
|
|
// not need to dive into subqueries here, as UPDATE needs to be on the top level
|
|
std::vector<ExecutionNode*>&& nodes = plan->findNodesOfType(EN::UPDATE, false);
|
|
|
|
for (auto const& n : nodes) {
|
|
// we should only get through here a single time
|
|
auto node = static_cast<ModificationNode*>(n);
|
|
TRI_ASSERT(node != nullptr);
|
|
|
|
auto& options = node->getOptions();
|
|
if (! options.readCompleteInput) {
|
|
// already ok
|
|
continue;
|
|
}
|
|
|
|
auto const collection = node->collection();
|
|
|
|
auto dep = n->getFirstDependency();
|
|
|
|
while (dep != nullptr) {
|
|
auto const type = dep->getType();
|
|
|
|
if (type == EN::ENUMERATE_LIST ||
|
|
type == EN::INDEX_RANGE ||
|
|
type == EN::SUBQUERY) {
|
|
// not suitable
|
|
modified = false;
|
|
break;
|
|
}
|
|
|
|
if (type == EN::ENUMERATE_COLLECTION) {
|
|
auto collectionNode = static_cast<EnumerateCollectionNode const*>(dep);
|
|
|
|
if (collectionNode->collection() != collection) {
|
|
// different collection, not suitable
|
|
modified = false;
|
|
break;
|
|
}
|
|
else {
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
dep = dep->getFirstDependency();
|
|
}
|
|
|
|
if (modified) {
|
|
options.readCompleteInput = false;
|
|
}
|
|
}
|
|
|
|
// always re-add the original plan, be it modified or not
|
|
// only a flag in the plan will be modified
|
|
opt->addPlan(plan, rule, modified);
|
|
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
// Local Variables:
|
|
// mode: outline-minor
|
|
// outline-regexp: "^\\(/// @brief\\|/// {@inheritDoc}\\|/// @addtogroup\\|// --SECTION--\\|/// @\\}\\)"
|
|
// End:
|
|
|