mirror of https://gitee.com/bigwinds/arangodb
Merge branch 'obi-geo-index' into devel
* obi-geo-index: prepare merge with devel WIP: delete SortNode and replace EnumerateCollectionNode with IndexNode add todos and more description of what is going on avoid 3 copies check if index node has geoindex and if the access paths match implement parts of the geoindex optimizer rule add geo index rule
This commit is contained in:
commit
e67296cc6e
|
@ -21,6 +21,33 @@
|
|||
/// @author Max Neunhoeffer
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
// Execution plans like the one below are made of Nodes that inherit the
|
||||
// ExecutionNode class as a base class.
|
||||
//
|
||||
// Execution plan:
|
||||
// Id NodeType Est. Comment
|
||||
// 1 SingletonNode 1 * ROOT
|
||||
// 2 EnumerateCollectionNode 6400 - FOR d IN ulf /* full collection scan */
|
||||
// 3 CalculationNode 6400 - LET #1 = DISTANCE(d.`lat`, d.`lon`, 0, 0) /* simple expression */ /* collections used: d : ulf */
|
||||
// 4 SortNode 6400 - SORT #1 ASC
|
||||
// 5 LimitNode 5 - LIMIT 0, 5
|
||||
// 6 ReturnNode 5 - RETURN d
|
||||
//
|
||||
// Even though the Singleton Node has a comment saying it is the "ROOT" node
|
||||
// you receive a pointer to LimitNode by calling getFirstParent on the SortNode
|
||||
// (effectively going down the list). If you want to go up from 5 to 4 you need
|
||||
// to call getFirstDependency to get a pointer to the SortNode.
|
||||
//
|
||||
// For most maybe all operations you will only need to operate on the Dependencies
|
||||
// the parents will be updated automatically.
|
||||
//
|
||||
// If you wish to unlink (remove) or replace a node you should to it by using
|
||||
// one of the plans operations.
|
||||
//
|
||||
// addDependency(Parent) has a totally different functionality as addDependencies(Parents)
|
||||
// the latter is not adding a list of Dependencies to a node!!!
|
||||
|
||||
#ifndef ARANGOD_AQL_EXECUTION_NODE_H
|
||||
#define ARANGOD_AQL_EXECUTION_NODE_H 1
|
||||
|
||||
|
@ -156,6 +183,8 @@ class ExecutionNode {
|
|||
bool hasDependency() const { return (_dependencies.size() == 1); }
|
||||
|
||||
/// @brief add the node dependencies to a vector
|
||||
/// ATTENTION - this function has nothing to do with the addDependency function
|
||||
// maybe another name should be used.
|
||||
void addDependencies(std::vector<ExecutionNode*>& result) const {
|
||||
for (auto const& it : _dependencies) {
|
||||
result.emplace_back(it);
|
||||
|
@ -433,7 +462,7 @@ class ExecutionNode {
|
|||
return false;
|
||||
}
|
||||
|
||||
ExecutionPlan const* plan() const {
|
||||
ExecutionPlan const* plan() const {
|
||||
return _plan;
|
||||
}
|
||||
|
||||
|
@ -510,7 +539,7 @@ class ExecutionNode {
|
|||
|
||||
/// @brief get depth
|
||||
int getDepth() const { return _depth; }
|
||||
|
||||
|
||||
/// @brief get registers to clear
|
||||
std::unordered_set<RegisterId> const& getRegsToClear() const {
|
||||
return _regsToClear;
|
||||
|
@ -677,7 +706,7 @@ class EnumerateCollectionNode : public ExecutionNode {
|
|||
std::vector<Variable const*> getVariablesSetHere() const override final {
|
||||
return std::vector<Variable const*>{_outVariable};
|
||||
}
|
||||
|
||||
|
||||
/// @brief the node is only non-deterministic if it uses a random sort order
|
||||
bool isDeterministic() override final { return !_random; }
|
||||
|
||||
|
@ -927,7 +956,7 @@ class CalculationNode : public ExecutionNode {
|
|||
|
||||
/// @brief can the node throw?
|
||||
bool canThrow() override final { return _expression->canThrow(); }
|
||||
|
||||
|
||||
bool isDeterministic() override final { return _expression->isDeterministic(); }
|
||||
|
||||
private:
|
||||
|
@ -1014,10 +1043,10 @@ class SubqueryNode : public ExecutionNode {
|
|||
/// *originate* from this node. That is, this method does not need to
|
||||
/// return true just because a dependent node can throw an exception.
|
||||
bool canThrow() override final;
|
||||
|
||||
|
||||
bool isDeterministic() override final;
|
||||
|
||||
bool isConst();
|
||||
bool isConst();
|
||||
|
||||
private:
|
||||
/// @brief we need to have an expression and where to write the result
|
||||
|
@ -1181,7 +1210,7 @@ class NoResultsNode : public ExecutionNode {
|
|||
/// @brief constructor with an id
|
||||
public:
|
||||
NoResultsNode(ExecutionPlan* plan, size_t id) : ExecutionNode(plan, id) {}
|
||||
|
||||
|
||||
NoResultsNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base)
|
||||
: ExecutionNode(plan, base) {}
|
||||
|
||||
|
|
|
@ -347,7 +347,7 @@ void Optimizer::setupRules() {
|
|||
// rule not yet tested
|
||||
registerRule("split-filters",
|
||||
splitFiltersRule,
|
||||
splitFiltersRule_pass1,
|
||||
splitFiltersRule_pass1,
|
||||
true);
|
||||
#endif
|
||||
|
||||
|
@ -413,7 +413,7 @@ void Optimizer::setupRules() {
|
|||
// merge filters into traversals
|
||||
registerRule("optimize-traversals", optimizeTraversalsRule,
|
||||
optimizeTraversalsRule_pass6, DoesNotCreateAdditionalPlans, true);
|
||||
|
||||
|
||||
// prepare traversal info
|
||||
registerRule("prepare-traversals", prepareTraversalsRule,
|
||||
prepareTraversalsRule_pass6, DoesNotCreateAdditionalPlans, false, true);
|
||||
|
@ -485,6 +485,10 @@ void Optimizer::setupRules() {
|
|||
registerRule("patch-update-statements", patchUpdateStatementsRule,
|
||||
patchUpdateStatementsRule_pass9, DoesNotCreateAdditionalPlans, true);
|
||||
|
||||
// patch update statements
|
||||
registerRule("geo-index-optimizer", optimizeGeoIndexRule,
|
||||
geoDistanceRule, DoesNotCreateAdditionalPlans, true);
|
||||
|
||||
if (arangodb::ServerState::instance()->isCoordinator()) {
|
||||
// distribute operations in cluster
|
||||
registerRule("scatter-in-cluster", scatterInClusterRule,
|
||||
|
|
|
@ -69,7 +69,7 @@ class Optimizer {
|
|||
// determine the "right" type of CollectNode and
|
||||
// add a sort node for each COLLECT (may be removed later)
|
||||
specializeCollectRule_pass1 = 105,
|
||||
|
||||
|
||||
inlineSubqueriesRule_pass1 = 106,
|
||||
|
||||
// split and-combined filters into multiple smaller filters
|
||||
|
@ -192,7 +192,9 @@ class Optimizer {
|
|||
removeUnnecessaryRemoteScatterRule_pass10 = 1040,
|
||||
|
||||
// recognize that a RemoveNode can be moved to the shards
|
||||
undistributeRemoveAfterEnumCollRule_pass10 = 1050
|
||||
undistributeRemoveAfterEnumCollRule_pass10 = 1050,
|
||||
|
||||
geoDistanceRule = 1060
|
||||
};
|
||||
|
||||
public:
|
||||
|
|
|
@ -46,6 +46,10 @@
|
|||
#include "Cluster/ClusterInfo.h"
|
||||
#include "Utils/Transaction.h"
|
||||
#include "VocBase/TraverserOptions.h"
|
||||
#include "Indexes/Index.h"
|
||||
#include <boost/optional.hpp>
|
||||
#include <tuple>
|
||||
#include <iostream>
|
||||
|
||||
using namespace arangodb;
|
||||
using namespace arangodb::aql;
|
||||
|
@ -57,7 +61,7 @@ void arangodb::aql::sortInValuesRule(Optimizer* opt, ExecutionPlan* plan,
|
|||
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
||||
SmallVector<ExecutionNode*> nodes{a};
|
||||
plan->findNodesOfType(nodes, EN::FILTER, true);
|
||||
|
||||
|
||||
bool modified = false;
|
||||
|
||||
for (auto const& n : nodes) {
|
||||
|
@ -383,7 +387,7 @@ void arangodb::aql::removeUnnecessaryFiltersRule(Optimizer* opt,
|
|||
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
||||
SmallVector<ExecutionNode*> nodes{a};
|
||||
plan->findNodesOfType(nodes, EN::FILTER, true);
|
||||
|
||||
|
||||
bool modified = false;
|
||||
std::unordered_set<ExecutionNode*> toUnlink;
|
||||
|
||||
|
@ -447,7 +451,7 @@ void arangodb::aql::removeCollectVariablesRule(Optimizer* opt,
|
|||
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
||||
SmallVector<ExecutionNode*> nodes{a};
|
||||
plan->findNodesOfType(nodes, EN::COLLECT, true);
|
||||
|
||||
|
||||
bool modified = false;
|
||||
|
||||
for (auto const& n : nodes) {
|
||||
|
@ -705,7 +709,7 @@ void arangodb::aql::removeSortRandRule(Optimizer* opt, ExecutionPlan* plan,
|
|||
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
||||
SmallVector<ExecutionNode*> nodes{a};
|
||||
plan->findNodesOfType(nodes, EN::SORT, true);
|
||||
|
||||
|
||||
bool modified = false;
|
||||
|
||||
for (auto const& n : nodes) {
|
||||
|
@ -954,8 +958,8 @@ void arangodb::aql::moveCalculationsDownRule(Optimizer* opt,
|
|||
} else if (currentType == EN::INDEX ||
|
||||
currentType == EN::ENUMERATE_COLLECTION ||
|
||||
currentType == EN::ENUMERATE_LIST ||
|
||||
currentType == EN::TRAVERSAL ||
|
||||
currentType == EN::SHORTEST_PATH ||
|
||||
currentType == EN::TRAVERSAL ||
|
||||
currentType == EN::SHORTEST_PATH ||
|
||||
currentType == EN::COLLECT ||
|
||||
currentType == EN::NORESULTS) {
|
||||
// we will not push further down than such nodes
|
||||
|
@ -1191,7 +1195,7 @@ void arangodb::aql::moveFiltersUpRule(Optimizer* opt, ExecutionPlan* plan,
|
|||
// must not move a filter beyond a node that can throw
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
if (current->isModificationNode()) {
|
||||
// must not move a filter beyond a modification node
|
||||
break;
|
||||
|
@ -1249,7 +1253,7 @@ class arangodb::aql::RedundantCalculationsReplacer final
|
|||
std::unordered_map<VariableId, Variable const*> const& replacements)
|
||||
: _replacements(replacements) {
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void replaceStartTargetVariables(ExecutionNode* en) {
|
||||
auto node = static_cast<T*>(en);
|
||||
|
@ -1271,7 +1275,7 @@ class arangodb::aql::RedundantCalculationsReplacer final
|
|||
auto node = static_cast<CalculationNode*>(en);
|
||||
std::unordered_set<Variable const*> variables;
|
||||
node->expression()->variables(variables);
|
||||
|
||||
|
||||
// check if the calculation uses any of the variables that we want to
|
||||
// replace
|
||||
for (auto const& it : variables) {
|
||||
|
@ -1304,12 +1308,12 @@ class arangodb::aql::RedundantCalculationsReplacer final
|
|||
replaceInVariable<FilterNode>(en);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case EN::TRAVERSAL: {
|
||||
replaceInVariable<TraversalNode>(en);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case EN::SHORTEST_PATH: {
|
||||
replaceStartTargetVariables<ShortestPathNode>(en);
|
||||
break;
|
||||
|
@ -1331,7 +1335,7 @@ class arangodb::aql::RedundantCalculationsReplacer final
|
|||
}
|
||||
// node->_keepVariables does not need to be updated at the moment as the
|
||||
// "remove-redundant-calculations" rule will stop when it finds a COLLECT
|
||||
// with an INTO, and the "inline-subqueries" rule will abort there as well
|
||||
// with an INTO, and the "inline-subqueries" rule will abort there as well
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1352,7 +1356,7 @@ class arangodb::aql::RedundantCalculationsReplacer final
|
|||
replaceInVariable<InsertNode>(en);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case EN::UPSERT: {
|
||||
auto node = static_cast<UpsertNode*>(en);
|
||||
|
||||
|
@ -1379,7 +1383,7 @@ class arangodb::aql::RedundantCalculationsReplacer final
|
|||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case EN::REPLACE: {
|
||||
auto node = static_cast<ReplaceNode*>(en);
|
||||
|
||||
|
@ -1465,7 +1469,7 @@ void arangodb::aql::removeRedundantCalculationsRule(
|
|||
continue;
|
||||
}
|
||||
|
||||
bool const isEqual = (buffer.length() == referenceExpression.size() &&
|
||||
bool const isEqual = (buffer.length() == referenceExpression.size() &&
|
||||
memcmp(buffer.c_str(), referenceExpression.c_str(), buffer.length()) == 0);
|
||||
buffer.reset();
|
||||
|
||||
|
@ -1571,7 +1575,7 @@ void arangodb::aql::removeUnnecessaryCalculationsRule(
|
|||
continue;
|
||||
}
|
||||
// will remove subquery when we get here
|
||||
}
|
||||
}
|
||||
|
||||
auto outvars = n->getVariablesSetHere();
|
||||
TRI_ASSERT(outvars.size() == 1);
|
||||
|
@ -1589,13 +1593,13 @@ void arangodb::aql::removeUnnecessaryCalculationsRule(
|
|||
// it's a temporary variable that we can fuse with the other
|
||||
// calculation easily
|
||||
|
||||
if (n->canThrow() ||
|
||||
if (n->canThrow() ||
|
||||
!static_cast<CalculationNode*>(n)->expression()->isDeterministic()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
AstNode const* rootNode = static_cast<CalculationNode*>(n)->expression()->node();
|
||||
|
||||
|
||||
if (rootNode->type == NODE_TYPE_REFERENCE) {
|
||||
// if the LET is a simple reference to another variable, e.g. LET a = b
|
||||
// then replace all references to a with references to b
|
||||
|
@ -1643,7 +1647,7 @@ void arangodb::aql::removeUnnecessaryCalculationsRule(
|
|||
usageCount = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (current->getType() != EN::CALCULATION) {
|
||||
// don't know how to replace the variable in a non-LET node
|
||||
// abort the search
|
||||
|
@ -1651,7 +1655,7 @@ void arangodb::aql::removeUnnecessaryCalculationsRule(
|
|||
break;
|
||||
}
|
||||
|
||||
// got a LET. we can replace the variable reference in it by
|
||||
// got a LET. we can replace the variable reference in it by
|
||||
// something else
|
||||
++usageCount;
|
||||
other = static_cast<CalculationNode*>(current);
|
||||
|
@ -1688,7 +1692,7 @@ void arangodb::aql::removeUnnecessaryCalculationsRule(
|
|||
otherExpression->replaceVariableReference(outvars[0], rootNode);
|
||||
|
||||
toUnlink.emplace(n);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1777,7 +1781,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
|
|||
size_t coveredAttributes = 0;
|
||||
auto resultPair = trx->getIndexForSortCondition(
|
||||
enumerateCollectionNode->collection()->getName(),
|
||||
&sortCondition, outVariable,
|
||||
&sortCondition, outVariable,
|
||||
enumerateCollectionNode->collection()->count(),
|
||||
usedIndexes, coveredAttributes);
|
||||
if (resultPair.second) {
|
||||
|
@ -1856,7 +1860,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
|
|||
// all indexes use the same attributes and index conditions guarantee
|
||||
// sorted output
|
||||
}
|
||||
|
||||
|
||||
TRI_ASSERT(indexes.size() == 1 || cond->isSorted());
|
||||
|
||||
// if we get here, we either have one index or multiple indexes on the same
|
||||
|
@ -1883,7 +1887,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
|
|||
// order as the IndexNode...
|
||||
// now check if the sort attributes match the ones of the index
|
||||
size_t const numCovered =
|
||||
sortCondition.coveredAttributes(outVariable, fields);
|
||||
sortCondition.coveredAttributes(outVariable, fields);
|
||||
|
||||
if (numCovered >= sortCondition.numAttributes()) {
|
||||
// sort condition is fully covered by index... now we can remove the
|
||||
|
@ -1908,7 +1912,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
|
|||
// now check if the index fields are the same as the sort condition
|
||||
// fields
|
||||
// e.g. FILTER c.value1 == 1 && c.value2 == 42 SORT c.value1, c.value2
|
||||
size_t const numCovered =
|
||||
size_t const numCovered =
|
||||
sortCondition.coveredAttributes(outVariable, fields);
|
||||
|
||||
if (numCovered == sortCondition.numAttributes() &&
|
||||
|
@ -1997,7 +2001,7 @@ void arangodb::aql::useIndexForSortRule(Optimizer* opt, ExecutionPlan* plan,
|
|||
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
||||
SmallVector<ExecutionNode*> nodes{a};
|
||||
plan->findNodesOfType(nodes, EN::SORT, true);
|
||||
|
||||
|
||||
bool modified = false;
|
||||
|
||||
for (auto const& n : nodes) {
|
||||
|
@ -2017,11 +2021,11 @@ void arangodb::aql::useIndexForSortRule(Optimizer* opt, ExecutionPlan* plan,
|
|||
/// @brief try to remove filters which are covered by indexes
|
||||
void arangodb::aql::removeFiltersCoveredByIndexRule(
|
||||
Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule) {
|
||||
|
||||
|
||||
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
||||
SmallVector<ExecutionNode*> nodes{a};
|
||||
plan->findNodesOfType(nodes, EN::FILTER, true);
|
||||
|
||||
|
||||
std::unordered_set<ExecutionNode*> toUnlink;
|
||||
bool modified = false;
|
||||
|
||||
|
@ -2188,7 +2192,7 @@ void arangodb::aql::interchangeAdjacentEnumerationsRule(
|
|||
|
||||
auto dep = nwalker->getFirstDependency();
|
||||
|
||||
if (dep->getType() != EN::ENUMERATE_COLLECTION &&
|
||||
if (dep->getType() != EN::ENUMERATE_COLLECTION &&
|
||||
dep->getType() != EN::ENUMERATE_LIST) {
|
||||
break;
|
||||
}
|
||||
|
@ -2284,7 +2288,7 @@ void arangodb::aql::scatterInClusterRule(Optimizer* opt, ExecutionPlan* plan,
|
|||
if (arangodb::ServerState::instance()->isCoordinator()) {
|
||||
// find subqueries
|
||||
std::unordered_map<ExecutionNode*, ExecutionNode*> subqueries;
|
||||
|
||||
|
||||
SmallVector<ExecutionNode*>::allocator_type::arena_type s;
|
||||
SmallVector<ExecutionNode*> subs{s};
|
||||
plan->findNodesOfType(subs, ExecutionNode::SUBQUERY, true);
|
||||
|
@ -2726,7 +2730,7 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt,
|
|||
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
||||
SmallVector<ExecutionNode*> nodes{a};
|
||||
plan->findNodesOfType(nodes, EN::GATHER, true);
|
||||
|
||||
|
||||
bool modified = false;
|
||||
|
||||
for (auto& n : nodes) {
|
||||
|
@ -2804,7 +2808,7 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt,
|
|||
/// only a SingletonNode and possibly some CalculationNodes as dependencies
|
||||
void arangodb::aql::removeUnnecessaryRemoteScatterRule(
|
||||
Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule) {
|
||||
|
||||
|
||||
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
||||
SmallVector<ExecutionNode*> nodes{a};
|
||||
plan->findNodesOfType(nodes, EN::REMOTE, true);
|
||||
|
@ -3177,7 +3181,7 @@ struct OrSimplifier {
|
|||
Ast* ast;
|
||||
|
||||
explicit OrSimplifier(Ast* ast) : ast(ast) {}
|
||||
|
||||
|
||||
std::string stringifyNode(AstNode const* node) const {
|
||||
try {
|
||||
return node->toString();
|
||||
|
@ -3252,7 +3256,7 @@ struct OrSimplifier {
|
|||
else {
|
||||
values->addMember(lhs);
|
||||
}
|
||||
|
||||
|
||||
if (rightIsArray) {
|
||||
size_t const n = rhs->numMembers();
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
|
@ -3274,16 +3278,16 @@ struct OrSimplifier {
|
|||
if (node->type == NODE_TYPE_OPERATOR_BINARY_OR) {
|
||||
auto lhs = node->getMember(0);
|
||||
auto rhs = node->getMember(1);
|
||||
|
||||
|
||||
auto lhsNew = simplify(lhs);
|
||||
auto rhsNew = simplify(rhs);
|
||||
|
||||
|
||||
if (lhs != lhsNew || rhs != rhsNew) {
|
||||
// create a modified node
|
||||
node = ast->createNodeBinaryOperator(node->type, lhsNew, rhsNew);
|
||||
}
|
||||
|
||||
if ((lhsNew->type == NODE_TYPE_OPERATOR_BINARY_EQ || lhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN) &&
|
||||
if ((lhsNew->type == NODE_TYPE_OPERATOR_BINARY_EQ || lhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN) &&
|
||||
(rhsNew->type == NODE_TYPE_OPERATOR_BINARY_EQ || rhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN)) {
|
||||
std::string leftName;
|
||||
std::string rightName;
|
||||
|
@ -3293,8 +3297,8 @@ struct OrSimplifier {
|
|||
AstNode const* rightValue = nullptr;
|
||||
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
if (detect(lhsNew, i >= 2, leftName, leftAttr, leftValue) &&
|
||||
detect(rhsNew, i % 2 == 0, rightName, rightAttr, rightValue) &&
|
||||
if (detect(lhsNew, i >= 2, leftName, leftAttr, leftValue) &&
|
||||
detect(rhsNew, i % 2 == 0, rightName, rightAttr, rightValue) &&
|
||||
leftName == rightName) {
|
||||
return buildValues(leftAttr, leftValue, lhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN, rightValue, rhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN);
|
||||
}
|
||||
|
@ -3304,11 +3308,11 @@ struct OrSimplifier {
|
|||
// return node as is
|
||||
return const_cast<AstNode*>(node);
|
||||
}
|
||||
|
||||
|
||||
if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) {
|
||||
auto lhs = node->getMember(0);
|
||||
auto rhs = node->getMember(1);
|
||||
|
||||
|
||||
auto lhsNew = simplify(lhs);
|
||||
auto rhsNew = simplify(rhs);
|
||||
|
||||
|
@ -3319,7 +3323,7 @@ struct OrSimplifier {
|
|||
|
||||
// fallthrough intentional
|
||||
}
|
||||
|
||||
|
||||
return const_cast<AstNode*>(node);
|
||||
}
|
||||
};
|
||||
|
@ -3355,7 +3359,7 @@ void arangodb::aql::replaceOrWithInRule(Optimizer* opt, ExecutionPlan* plan,
|
|||
if (outVar.size() != 1 || outVar[0]->id != inVar[0]->id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
auto root = cn->expression()->node();
|
||||
|
||||
OrSimplifier simplifier(plan->getAst());
|
||||
|
@ -3364,7 +3368,7 @@ void arangodb::aql::replaceOrWithInRule(Optimizer* opt, ExecutionPlan* plan,
|
|||
if (newRoot != root) {
|
||||
ExecutionNode* newNode = nullptr;
|
||||
Expression* expr = new Expression(plan->getAst(), newRoot);
|
||||
|
||||
|
||||
try {
|
||||
TRI_IF_FAILURE("OptimizerRules::replaceOrWithInRuleOom") {
|
||||
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
||||
|
@ -3607,7 +3611,7 @@ void arangodb::aql::patchUpdateStatementsRule(Optimizer* opt,
|
|||
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
||||
SmallVector<ExecutionNode*> nodes{a};
|
||||
plan->findNodesOfType(nodes, EN::UPDATE, false);
|
||||
|
||||
|
||||
bool modified = false;
|
||||
|
||||
for (auto const& n : nodes) {
|
||||
|
@ -3680,16 +3684,16 @@ void arangodb::aql::optimizeTraversalsRule(Optimizer* opt,
|
|||
opt->addPlan(plan, rule, false);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
bool modified = false;
|
||||
|
||||
// first make a pass over all traversal nodes and remove unused
|
||||
// variables from them
|
||||
|
||||
// first make a pass over all traversal nodes and remove unused
|
||||
// variables from them
|
||||
for (auto const& n : tNodes) {
|
||||
TraversalNode* traversal = static_cast<TraversalNode*>(n);
|
||||
|
||||
auto varsUsedLater = n->getVarsUsedLater();
|
||||
|
||||
|
||||
// note that we can NOT optimize away the vertex output variable
|
||||
// yet, as many traversal internals depend on the number of vertices
|
||||
// found/built
|
||||
|
@ -3700,7 +3704,7 @@ void arangodb::aql::optimizeTraversalsRule(Optimizer* opt,
|
|||
traversal->setEdgeOutput(nullptr);
|
||||
modified = true;
|
||||
}
|
||||
|
||||
|
||||
outVariable = traversal->pathOutVariable();
|
||||
if (outVariable != nullptr &&
|
||||
varsUsedLater.find(outVariable) == varsUsedLater.end()) {
|
||||
|
@ -3738,16 +3742,16 @@ void arangodb::aql::prepareTraversalsRule(Optimizer* opt,
|
|||
opt->addPlan(plan, rule, false);
|
||||
return;
|
||||
}
|
||||
|
||||
// first make a pass over all traversal nodes and remove unused
|
||||
// variables from them
|
||||
|
||||
// first make a pass over all traversal nodes and remove unused
|
||||
// variables from them
|
||||
for (auto const& n : tNodes) {
|
||||
TraversalNode* traversal = static_cast<TraversalNode*>(n);
|
||||
traversal->prepareOptions();
|
||||
}
|
||||
|
||||
|
||||
opt->addPlan(plan, rule, true);
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief pulls out simple subqueries and merges them with the level above
|
||||
///
|
||||
|
@ -3755,17 +3759,17 @@ void arangodb::aql::prepareTraversalsRule(Optimizer* opt,
|
|||
///
|
||||
/// FOR x IN (
|
||||
/// FOR y IN collection FILTER y.value >= 5 RETURN y.test
|
||||
/// )
|
||||
/// )
|
||||
/// RETURN x.a
|
||||
///
|
||||
/// then this rule will transform it into:
|
||||
///
|
||||
///
|
||||
/// FOR tmp IN collection
|
||||
/// FILTER tmp.value >= 5
|
||||
/// FILTER tmp.value >= 5
|
||||
/// LET x = tmp.test
|
||||
/// RETURN x.a
|
||||
void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
|
||||
ExecutionPlan* plan,
|
||||
void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
|
||||
ExecutionPlan* plan,
|
||||
Optimizer::Rule const* rule) {
|
||||
|
||||
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
||||
|
@ -3781,12 +3785,12 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
|
|||
|
||||
for (auto const& n : nodes) {
|
||||
auto subqueryNode = static_cast<SubqueryNode*>(n);
|
||||
|
||||
|
||||
if (subqueryNode->isModificationQuery()) {
|
||||
// can't modify modifying subqueries
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (subqueryNode->canThrow()) {
|
||||
// can't inline throwing subqueries
|
||||
continue;
|
||||
|
@ -3845,10 +3849,10 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
|
|||
auto previous = n->getFirstDependency();
|
||||
auto insert = n->getFirstParent();
|
||||
TRI_ASSERT(insert != nullptr);
|
||||
|
||||
|
||||
// unlink the original SubqueryNode
|
||||
plan->unlinkNode(n, false);
|
||||
|
||||
|
||||
for (auto& it : subNodes) {
|
||||
// first unlink them all
|
||||
plan->unlinkNode(it, true);
|
||||
|
@ -3872,7 +3876,7 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
|
|||
queryVariables->renameVariable(variable->id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// link the top node in the subquery with the original plan
|
||||
if (previous != nullptr) {
|
||||
|
@ -3883,13 +3887,13 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
|
|||
plan->unlinkNode(listNode, false);
|
||||
|
||||
queryVariables->renameVariable(returnNode->inVariable()->id, listNode->outVariable()->name);
|
||||
|
||||
|
||||
// finally replace the variables
|
||||
std::unordered_map<VariableId, Variable const*> replacements;
|
||||
replacements.emplace(listNode->outVariable()->id, returnNode->inVariable());
|
||||
RedundantCalculationsReplacer finder(replacements);
|
||||
plan->root()->walk(&finder);
|
||||
|
||||
|
||||
// abort optimization
|
||||
current = nullptr;
|
||||
}
|
||||
|
@ -3898,8 +3902,8 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
|
|||
if (current == nullptr) {
|
||||
break;
|
||||
}
|
||||
|
||||
varsUsed.clear();
|
||||
|
||||
varsUsed.clear();
|
||||
current->getVariablesUsedHere(varsUsed);
|
||||
if (varsUsed.find(out) != varsUsed.end()) {
|
||||
// we found another node that uses the subquery variable
|
||||
|
@ -3914,3 +3918,196 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
|
|||
opt->addPlan(plan, rule, modified);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
struct GeoIndexInfo {
|
||||
EnumerateCollectionNode* _collectionNode;
|
||||
Collection const* _collection;
|
||||
std::shared_ptr<arangodb::Index> _index;
|
||||
std::vector<std::string> _longitude;
|
||||
std::vector<std::string> _latitude;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
// TODO - remove debug code
|
||||
#ifdef OBIDEBUG
|
||||
#define OBILEVEL ERR
|
||||
#else
|
||||
#define OBILEVEL TRACE
|
||||
#endif
|
||||
static boost::optional<GeoIndexInfo>
|
||||
geoDistanceFunctionArgCheck(std::pair<AstNode*,AstNode*> const& pair, ExecutionNode* ex, ExecutionPlan* plan){
|
||||
using SV = std::vector<std::string>;
|
||||
LOG(OBILEVEL) << " enter argument check";
|
||||
// first and second should be based on the same document - need to provide the document
|
||||
// in order to see which collection is bound to it and if that collections supports geo-index
|
||||
if( !pair.first->isAttributeAccessForVariable() || !pair.second->isAttributeAccessForVariable()){
|
||||
LOG(OBILEVEL) << " not both args are of type attribute access";
|
||||
return boost::none;
|
||||
}
|
||||
|
||||
// expect access of the for doc.attribute
|
||||
// TODO: more complex access path have to be added: loop until REFERENCE TYPE IS FOUND
|
||||
auto setter1 = plan->getVarSetBy(static_cast<Variable const*>(pair.first->getMember(0)->getData())->id);
|
||||
auto setter2 = plan->getVarSetBy(static_cast<Variable const*>(pair.second->getMember(0)->getData())->id);
|
||||
SV accessPath1{pair.first->getString()};
|
||||
SV accessPath2{pair.second->getString()};
|
||||
|
||||
LOG(OBILEVEL) << " got setter";
|
||||
if(setter1 == setter2){
|
||||
if(setter1->getType() == EN::ENUMERATE_COLLECTION){
|
||||
auto collNode = reinterpret_cast<EnumerateCollectionNode*>(setter1);
|
||||
auto coll = collNode->collection(); //what kind of indexes does it have on what attributes
|
||||
auto lcoll = coll->getCollection();
|
||||
// TODO - check collection for suitable geo-indexes
|
||||
LOG(OBILEVEL) << " SETTER IS ENUMERATE_COLLECTION: " << coll->getName();
|
||||
for(auto indexShardPtr : lcoll->getIndexes()){
|
||||
// get real index
|
||||
arangodb::Index& index = *indexShardPtr.get();
|
||||
|
||||
// check if current index is a geo-index
|
||||
if( index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO1_INDEX
|
||||
&& index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO2_INDEX){
|
||||
continue;
|
||||
}
|
||||
|
||||
#ifdef OBIDEBUG
|
||||
//FIXME - REMOVE DEBUG CODE LATER
|
||||
auto vecs = std::vector<std::vector<SV>>{index.fieldNames(), std::vector<SV>{accessPath1, accessPath2}};
|
||||
for(auto vec : vecs ){
|
||||
for(auto path : vec){
|
||||
std::cout << "AccessPath VECTOR: ";
|
||||
for(auto word : path){
|
||||
std::cout << word << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
//check access paths of attribues in ast and those in index match
|
||||
if( index.fieldNames()[0] == accessPath1 && index.fieldNames()[1] == accessPath2 ){
|
||||
return GeoIndexInfo{collNode, coll, indexShardPtr, std::move(accessPath1), std::move(accessPath2) };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return boost::none;
|
||||
}
|
||||
|
||||
void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt,
|
||||
ExecutionPlan* plan,
|
||||
Optimizer::Rule const* rule) {
|
||||
|
||||
LOG(OBILEVEL) << "ENTER GEO RULE";
|
||||
|
||||
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
||||
SmallVector<ExecutionNode*> nodes{a};
|
||||
bool modified = false;
|
||||
|
||||
plan->findNodesOfType(nodes, EN::SORT, true);
|
||||
|
||||
for (auto const& n : nodes) {
|
||||
auto node = static_cast<SortNode*>(n);
|
||||
auto const& elements = node->getElements();
|
||||
|
||||
// we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion
|
||||
if ( !(elements.size() == 1 && elements[0].second)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
//variable of sort expression
|
||||
auto const variable = elements[0].first;
|
||||
TRI_ASSERT(variable != nullptr);
|
||||
|
||||
//// find the expression that is bound to the variable
|
||||
// get the expression node that holds the cacluation
|
||||
auto setter = plan->getVarSetBy(variable->id);
|
||||
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// downcast to calculation node and get expression
|
||||
auto cn = static_cast<CalculationNode*>(setter);
|
||||
auto const expression = cn->expression();
|
||||
|
||||
// the expression must exist and it must be a function call
|
||||
if (expression == nullptr || expression->node() == nullptr ||
|
||||
expression->node()->type != NODE_TYPE_FCALL) {
|
||||
// not the right type of node
|
||||
continue;
|
||||
}
|
||||
|
||||
//get the ast node of the expression
|
||||
AstNode const* funcNode = expression->node();
|
||||
auto func = static_cast<Function const*>(funcNode->getData());
|
||||
|
||||
// we're looking for "DISTANCE()", which is a function call
|
||||
// with an empty parameters array
|
||||
if ( func->externalName != "DISTANCE" || funcNode->numMembers() != 1 ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG(OBILEVEL) << " FOUND DISTANCE RULE";
|
||||
|
||||
auto const& distanceArgs = funcNode->getMember(0);
|
||||
if(distanceArgs->numMembers() != 4){
|
||||
continue;
|
||||
}
|
||||
|
||||
std::pair<AstNode*,AstNode*> argPair1 = { distanceArgs->getMember(0), distanceArgs->getMember(1) };
|
||||
std::pair<AstNode*,AstNode*> argPair2 = { distanceArgs->getMember(2), distanceArgs->getMember(3) };
|
||||
|
||||
auto result1 = geoDistanceFunctionArgCheck(argPair1, node, plan);
|
||||
auto result2 = geoDistanceFunctionArgCheck(argPair2, node, plan);
|
||||
|
||||
// xor only one argument pair shall have a geoIndex
|
||||
if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG(OBILEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS";
|
||||
|
||||
if(!result1){
|
||||
result1 = std::move(result2);
|
||||
}
|
||||
|
||||
LOG(OBILEVEL) << " attributes: " << result1.get()._longitude[0]
|
||||
<< ", " << result1.get()._longitude
|
||||
<< " of collection:" << result1.get()._collection->getName()
|
||||
<< " are geoindexed";
|
||||
|
||||
break; //remove this to make use of the index
|
||||
|
||||
auto cnode = result1.get()._collectionNode;
|
||||
auto& idxPtr = result1.get()._index;
|
||||
|
||||
//create new index node and register it
|
||||
auto condition = std::make_unique<Condition>(plan->getAst()); //What is this condition exactly about
|
||||
condition->normalize(plan);
|
||||
auto inode = new IndexNode(
|
||||
plan, plan->nextId(), cnode->vocbase(),
|
||||
cnode->collection(), cnode->outVariable(),
|
||||
std::vector<Transaction::IndexHandle>{Transaction::IndexHandle{idxPtr}},
|
||||
condition.get(), !elements[0].second);
|
||||
plan->registerNode(inode);
|
||||
condition.release();
|
||||
|
||||
plan->unlinkNode(n);
|
||||
plan->replaceNode(cnode,inode);
|
||||
|
||||
//signal that plan has been changed
|
||||
modified=true;
|
||||
|
||||
}
|
||||
|
||||
opt->addPlan(plan, rule, modified);
|
||||
|
||||
LOG(OBILEVEL) << "EXIT GEO RULE";
|
||||
LOG(OBILEVEL) << "";
|
||||
}
|
||||
|
||||
|
|
|
@ -198,6 +198,7 @@ void prepareTraversalsRule(Optimizer* opt, ExecutionPlan* plan,
|
|||
/// @brief moves simple subqueries one level higher
|
||||
void inlineSubqueriesRule(Optimizer*, ExecutionPlan*, Optimizer::Rule const*);
|
||||
|
||||
void optimizeGeoIndexRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule);
|
||||
} // namespace aql
|
||||
} // namespace arangodb
|
||||
|
||||
|
|
Loading…
Reference in New Issue