1
0
Fork 0

Merge branch 'obi-geo-index' into devel

* obi-geo-index:
  prepare merge with devel
  WIP: delete SortNode and replace EnumerateCollectionNode with IndexNode
  add todos and more description of what is going on
  avoid 3 copies
  check if index node has geoindex and if the access paths match
  implement parts of the geoindex optimizer rule
  add geo index rule
This commit is contained in:
Jan Christoph Uhde 2016-11-28 16:03:29 +01:00
commit e67296cc6e
5 changed files with 314 additions and 81 deletions

View File

@ -21,6 +21,33 @@
/// @author Max Neunhoeffer
////////////////////////////////////////////////////////////////////////////////
// Execution plans like the one below are made of Nodes that inherit the
// ExecutionNode class as a base class.
//
// Execution plan:
// Id NodeType Est. Comment
// 1 SingletonNode 1 * ROOT
// 2 EnumerateCollectionNode 6400 - FOR d IN ulf /* full collection scan */
// 3 CalculationNode 6400 - LET #1 = DISTANCE(d.`lat`, d.`lon`, 0, 0) /* simple expression */ /* collections used: d : ulf */
// 4 SortNode 6400 - SORT #1 ASC
// 5 LimitNode 5 - LIMIT 0, 5
// 6 ReturnNode 5 - RETURN d
//
// Even though the Singleton Node has a comment saying it is the "ROOT" node
// you receive a pointer to LimitNode by calling getFirstParent on the SortNode
// (effectively going down the list). If you want to go up from 5 to 4 you need
// to call getFirstDependency to get a pointer to the SortNode.
//
// For most maybe all operations you will only need to operate on the Dependencies
// the parents will be updated automatically.
//
// If you wish to unlink (remove) or replace a node you should to it by using
// one of the plans operations.
//
// addDependency(Parent) has a totally different functionality as addDependencies(Parents)
// the latter is not adding a list of Dependencies to a node!!!
#ifndef ARANGOD_AQL_EXECUTION_NODE_H
#define ARANGOD_AQL_EXECUTION_NODE_H 1
@ -156,6 +183,8 @@ class ExecutionNode {
bool hasDependency() const { return (_dependencies.size() == 1); }
/// @brief add the node dependencies to a vector
/// ATTENTION - this function has nothing to do with the addDependency function
// maybe another name should be used.
void addDependencies(std::vector<ExecutionNode*>& result) const {
for (auto const& it : _dependencies) {
result.emplace_back(it);
@ -433,7 +462,7 @@ class ExecutionNode {
return false;
}
ExecutionPlan const* plan() const {
ExecutionPlan const* plan() const {
return _plan;
}
@ -510,7 +539,7 @@ class ExecutionNode {
/// @brief get depth
int getDepth() const { return _depth; }
/// @brief get registers to clear
std::unordered_set<RegisterId> const& getRegsToClear() const {
return _regsToClear;
@ -677,7 +706,7 @@ class EnumerateCollectionNode : public ExecutionNode {
std::vector<Variable const*> getVariablesSetHere() const override final {
return std::vector<Variable const*>{_outVariable};
}
/// @brief the node is only non-deterministic if it uses a random sort order
bool isDeterministic() override final { return !_random; }
@ -927,7 +956,7 @@ class CalculationNode : public ExecutionNode {
/// @brief can the node throw?
bool canThrow() override final { return _expression->canThrow(); }
bool isDeterministic() override final { return _expression->isDeterministic(); }
private:
@ -1014,10 +1043,10 @@ class SubqueryNode : public ExecutionNode {
/// *originate* from this node. That is, this method does not need to
/// return true just because a dependent node can throw an exception.
bool canThrow() override final;
bool isDeterministic() override final;
bool isConst();
bool isConst();
private:
/// @brief we need to have an expression and where to write the result
@ -1181,7 +1210,7 @@ class NoResultsNode : public ExecutionNode {
/// @brief constructor with an id
public:
NoResultsNode(ExecutionPlan* plan, size_t id) : ExecutionNode(plan, id) {}
NoResultsNode(ExecutionPlan* plan, arangodb::velocypack::Slice const& base)
: ExecutionNode(plan, base) {}

View File

@ -347,7 +347,7 @@ void Optimizer::setupRules() {
// rule not yet tested
registerRule("split-filters",
splitFiltersRule,
splitFiltersRule_pass1,
splitFiltersRule_pass1,
true);
#endif
@ -413,7 +413,7 @@ void Optimizer::setupRules() {
// merge filters into traversals
registerRule("optimize-traversals", optimizeTraversalsRule,
optimizeTraversalsRule_pass6, DoesNotCreateAdditionalPlans, true);
// prepare traversal info
registerRule("prepare-traversals", prepareTraversalsRule,
prepareTraversalsRule_pass6, DoesNotCreateAdditionalPlans, false, true);
@ -485,6 +485,10 @@ void Optimizer::setupRules() {
registerRule("patch-update-statements", patchUpdateStatementsRule,
patchUpdateStatementsRule_pass9, DoesNotCreateAdditionalPlans, true);
// patch update statements
registerRule("geo-index-optimizer", optimizeGeoIndexRule,
geoDistanceRule, DoesNotCreateAdditionalPlans, true);
if (arangodb::ServerState::instance()->isCoordinator()) {
// distribute operations in cluster
registerRule("scatter-in-cluster", scatterInClusterRule,

View File

@ -69,7 +69,7 @@ class Optimizer {
// determine the "right" type of CollectNode and
// add a sort node for each COLLECT (may be removed later)
specializeCollectRule_pass1 = 105,
inlineSubqueriesRule_pass1 = 106,
// split and-combined filters into multiple smaller filters
@ -192,7 +192,9 @@ class Optimizer {
removeUnnecessaryRemoteScatterRule_pass10 = 1040,
// recognize that a RemoveNode can be moved to the shards
undistributeRemoveAfterEnumCollRule_pass10 = 1050
undistributeRemoveAfterEnumCollRule_pass10 = 1050,
geoDistanceRule = 1060
};
public:

View File

@ -46,6 +46,10 @@
#include "Cluster/ClusterInfo.h"
#include "Utils/Transaction.h"
#include "VocBase/TraverserOptions.h"
#include "Indexes/Index.h"
#include <boost/optional.hpp>
#include <tuple>
#include <iostream>
using namespace arangodb;
using namespace arangodb::aql;
@ -57,7 +61,7 @@ void arangodb::aql::sortInValuesRule(Optimizer* opt, ExecutionPlan* plan,
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, EN::FILTER, true);
bool modified = false;
for (auto const& n : nodes) {
@ -383,7 +387,7 @@ void arangodb::aql::removeUnnecessaryFiltersRule(Optimizer* opt,
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, EN::FILTER, true);
bool modified = false;
std::unordered_set<ExecutionNode*> toUnlink;
@ -447,7 +451,7 @@ void arangodb::aql::removeCollectVariablesRule(Optimizer* opt,
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, EN::COLLECT, true);
bool modified = false;
for (auto const& n : nodes) {
@ -705,7 +709,7 @@ void arangodb::aql::removeSortRandRule(Optimizer* opt, ExecutionPlan* plan,
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, EN::SORT, true);
bool modified = false;
for (auto const& n : nodes) {
@ -954,8 +958,8 @@ void arangodb::aql::moveCalculationsDownRule(Optimizer* opt,
} else if (currentType == EN::INDEX ||
currentType == EN::ENUMERATE_COLLECTION ||
currentType == EN::ENUMERATE_LIST ||
currentType == EN::TRAVERSAL ||
currentType == EN::SHORTEST_PATH ||
currentType == EN::TRAVERSAL ||
currentType == EN::SHORTEST_PATH ||
currentType == EN::COLLECT ||
currentType == EN::NORESULTS) {
// we will not push further down than such nodes
@ -1191,7 +1195,7 @@ void arangodb::aql::moveFiltersUpRule(Optimizer* opt, ExecutionPlan* plan,
// must not move a filter beyond a node that can throw
break;
}
if (current->isModificationNode()) {
// must not move a filter beyond a modification node
break;
@ -1249,7 +1253,7 @@ class arangodb::aql::RedundantCalculationsReplacer final
std::unordered_map<VariableId, Variable const*> const& replacements)
: _replacements(replacements) {
}
template <typename T>
void replaceStartTargetVariables(ExecutionNode* en) {
auto node = static_cast<T*>(en);
@ -1271,7 +1275,7 @@ class arangodb::aql::RedundantCalculationsReplacer final
auto node = static_cast<CalculationNode*>(en);
std::unordered_set<Variable const*> variables;
node->expression()->variables(variables);
// check if the calculation uses any of the variables that we want to
// replace
for (auto const& it : variables) {
@ -1304,12 +1308,12 @@ class arangodb::aql::RedundantCalculationsReplacer final
replaceInVariable<FilterNode>(en);
break;
}
case EN::TRAVERSAL: {
replaceInVariable<TraversalNode>(en);
break;
}
case EN::SHORTEST_PATH: {
replaceStartTargetVariables<ShortestPathNode>(en);
break;
@ -1331,7 +1335,7 @@ class arangodb::aql::RedundantCalculationsReplacer final
}
// node->_keepVariables does not need to be updated at the moment as the
// "remove-redundant-calculations" rule will stop when it finds a COLLECT
// with an INTO, and the "inline-subqueries" rule will abort there as well
// with an INTO, and the "inline-subqueries" rule will abort there as well
break;
}
@ -1352,7 +1356,7 @@ class arangodb::aql::RedundantCalculationsReplacer final
replaceInVariable<InsertNode>(en);
break;
}
case EN::UPSERT: {
auto node = static_cast<UpsertNode*>(en);
@ -1379,7 +1383,7 @@ class arangodb::aql::RedundantCalculationsReplacer final
}
break;
}
case EN::REPLACE: {
auto node = static_cast<ReplaceNode*>(en);
@ -1465,7 +1469,7 @@ void arangodb::aql::removeRedundantCalculationsRule(
continue;
}
bool const isEqual = (buffer.length() == referenceExpression.size() &&
bool const isEqual = (buffer.length() == referenceExpression.size() &&
memcmp(buffer.c_str(), referenceExpression.c_str(), buffer.length()) == 0);
buffer.reset();
@ -1571,7 +1575,7 @@ void arangodb::aql::removeUnnecessaryCalculationsRule(
continue;
}
// will remove subquery when we get here
}
}
auto outvars = n->getVariablesSetHere();
TRI_ASSERT(outvars.size() == 1);
@ -1589,13 +1593,13 @@ void arangodb::aql::removeUnnecessaryCalculationsRule(
// it's a temporary variable that we can fuse with the other
// calculation easily
if (n->canThrow() ||
if (n->canThrow() ||
!static_cast<CalculationNode*>(n)->expression()->isDeterministic()) {
continue;
}
AstNode const* rootNode = static_cast<CalculationNode*>(n)->expression()->node();
if (rootNode->type == NODE_TYPE_REFERENCE) {
// if the LET is a simple reference to another variable, e.g. LET a = b
// then replace all references to a with references to b
@ -1643,7 +1647,7 @@ void arangodb::aql::removeUnnecessaryCalculationsRule(
usageCount = 0;
break;
}
}
}
if (current->getType() != EN::CALCULATION) {
// don't know how to replace the variable in a non-LET node
// abort the search
@ -1651,7 +1655,7 @@ void arangodb::aql::removeUnnecessaryCalculationsRule(
break;
}
// got a LET. we can replace the variable reference in it by
// got a LET. we can replace the variable reference in it by
// something else
++usageCount;
other = static_cast<CalculationNode*>(current);
@ -1688,7 +1692,7 @@ void arangodb::aql::removeUnnecessaryCalculationsRule(
otherExpression->replaceVariableReference(outvars[0], rootNode);
toUnlink.emplace(n);
}
}
}
}
@ -1777,7 +1781,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
size_t coveredAttributes = 0;
auto resultPair = trx->getIndexForSortCondition(
enumerateCollectionNode->collection()->getName(),
&sortCondition, outVariable,
&sortCondition, outVariable,
enumerateCollectionNode->collection()->count(),
usedIndexes, coveredAttributes);
if (resultPair.second) {
@ -1856,7 +1860,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
// all indexes use the same attributes and index conditions guarantee
// sorted output
}
TRI_ASSERT(indexes.size() == 1 || cond->isSorted());
// if we get here, we either have one index or multiple indexes on the same
@ -1883,7 +1887,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
// order as the IndexNode...
// now check if the sort attributes match the ones of the index
size_t const numCovered =
sortCondition.coveredAttributes(outVariable, fields);
sortCondition.coveredAttributes(outVariable, fields);
if (numCovered >= sortCondition.numAttributes()) {
// sort condition is fully covered by index... now we can remove the
@ -1908,7 +1912,7 @@ struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
// now check if the index fields are the same as the sort condition
// fields
// e.g. FILTER c.value1 == 1 && c.value2 == 42 SORT c.value1, c.value2
size_t const numCovered =
size_t const numCovered =
sortCondition.coveredAttributes(outVariable, fields);
if (numCovered == sortCondition.numAttributes() &&
@ -1997,7 +2001,7 @@ void arangodb::aql::useIndexForSortRule(Optimizer* opt, ExecutionPlan* plan,
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, EN::SORT, true);
bool modified = false;
for (auto const& n : nodes) {
@ -2017,11 +2021,11 @@ void arangodb::aql::useIndexForSortRule(Optimizer* opt, ExecutionPlan* plan,
/// @brief try to remove filters which are covered by indexes
void arangodb::aql::removeFiltersCoveredByIndexRule(
Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule) {
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, EN::FILTER, true);
std::unordered_set<ExecutionNode*> toUnlink;
bool modified = false;
@ -2188,7 +2192,7 @@ void arangodb::aql::interchangeAdjacentEnumerationsRule(
auto dep = nwalker->getFirstDependency();
if (dep->getType() != EN::ENUMERATE_COLLECTION &&
if (dep->getType() != EN::ENUMERATE_COLLECTION &&
dep->getType() != EN::ENUMERATE_LIST) {
break;
}
@ -2284,7 +2288,7 @@ void arangodb::aql::scatterInClusterRule(Optimizer* opt, ExecutionPlan* plan,
if (arangodb::ServerState::instance()->isCoordinator()) {
// find subqueries
std::unordered_map<ExecutionNode*, ExecutionNode*> subqueries;
SmallVector<ExecutionNode*>::allocator_type::arena_type s;
SmallVector<ExecutionNode*> subs{s};
plan->findNodesOfType(subs, ExecutionNode::SUBQUERY, true);
@ -2726,7 +2730,7 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt,
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, EN::GATHER, true);
bool modified = false;
for (auto& n : nodes) {
@ -2804,7 +2808,7 @@ void arangodb::aql::distributeSortToClusterRule(Optimizer* opt,
/// only a SingletonNode and possibly some CalculationNodes as dependencies
void arangodb::aql::removeUnnecessaryRemoteScatterRule(
Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule) {
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, EN::REMOTE, true);
@ -3177,7 +3181,7 @@ struct OrSimplifier {
Ast* ast;
explicit OrSimplifier(Ast* ast) : ast(ast) {}
std::string stringifyNode(AstNode const* node) const {
try {
return node->toString();
@ -3252,7 +3256,7 @@ struct OrSimplifier {
else {
values->addMember(lhs);
}
if (rightIsArray) {
size_t const n = rhs->numMembers();
for (size_t i = 0; i < n; ++i) {
@ -3274,16 +3278,16 @@ struct OrSimplifier {
if (node->type == NODE_TYPE_OPERATOR_BINARY_OR) {
auto lhs = node->getMember(0);
auto rhs = node->getMember(1);
auto lhsNew = simplify(lhs);
auto rhsNew = simplify(rhs);
if (lhs != lhsNew || rhs != rhsNew) {
// create a modified node
node = ast->createNodeBinaryOperator(node->type, lhsNew, rhsNew);
}
if ((lhsNew->type == NODE_TYPE_OPERATOR_BINARY_EQ || lhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN) &&
if ((lhsNew->type == NODE_TYPE_OPERATOR_BINARY_EQ || lhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN) &&
(rhsNew->type == NODE_TYPE_OPERATOR_BINARY_EQ || rhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN)) {
std::string leftName;
std::string rightName;
@ -3293,8 +3297,8 @@ struct OrSimplifier {
AstNode const* rightValue = nullptr;
for (size_t i = 0; i < 4; ++i) {
if (detect(lhsNew, i >= 2, leftName, leftAttr, leftValue) &&
detect(rhsNew, i % 2 == 0, rightName, rightAttr, rightValue) &&
if (detect(lhsNew, i >= 2, leftName, leftAttr, leftValue) &&
detect(rhsNew, i % 2 == 0, rightName, rightAttr, rightValue) &&
leftName == rightName) {
return buildValues(leftAttr, leftValue, lhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN, rightValue, rhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN);
}
@ -3304,11 +3308,11 @@ struct OrSimplifier {
// return node as is
return const_cast<AstNode*>(node);
}
if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) {
auto lhs = node->getMember(0);
auto rhs = node->getMember(1);
auto lhsNew = simplify(lhs);
auto rhsNew = simplify(rhs);
@ -3319,7 +3323,7 @@ struct OrSimplifier {
// fallthrough intentional
}
return const_cast<AstNode*>(node);
}
};
@ -3355,7 +3359,7 @@ void arangodb::aql::replaceOrWithInRule(Optimizer* opt, ExecutionPlan* plan,
if (outVar.size() != 1 || outVar[0]->id != inVar[0]->id) {
continue;
}
auto root = cn->expression()->node();
OrSimplifier simplifier(plan->getAst());
@ -3364,7 +3368,7 @@ void arangodb::aql::replaceOrWithInRule(Optimizer* opt, ExecutionPlan* plan,
if (newRoot != root) {
ExecutionNode* newNode = nullptr;
Expression* expr = new Expression(plan->getAst(), newRoot);
try {
TRI_IF_FAILURE("OptimizerRules::replaceOrWithInRuleOom") {
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
@ -3607,7 +3611,7 @@ void arangodb::aql::patchUpdateStatementsRule(Optimizer* opt,
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, EN::UPDATE, false);
bool modified = false;
for (auto const& n : nodes) {
@ -3680,16 +3684,16 @@ void arangodb::aql::optimizeTraversalsRule(Optimizer* opt,
opt->addPlan(plan, rule, false);
return;
}
bool modified = false;
// first make a pass over all traversal nodes and remove unused
// variables from them
// first make a pass over all traversal nodes and remove unused
// variables from them
for (auto const& n : tNodes) {
TraversalNode* traversal = static_cast<TraversalNode*>(n);
auto varsUsedLater = n->getVarsUsedLater();
// note that we can NOT optimize away the vertex output variable
// yet, as many traversal internals depend on the number of vertices
// found/built
@ -3700,7 +3704,7 @@ void arangodb::aql::optimizeTraversalsRule(Optimizer* opt,
traversal->setEdgeOutput(nullptr);
modified = true;
}
outVariable = traversal->pathOutVariable();
if (outVariable != nullptr &&
varsUsedLater.find(outVariable) == varsUsedLater.end()) {
@ -3738,16 +3742,16 @@ void arangodb::aql::prepareTraversalsRule(Optimizer* opt,
opt->addPlan(plan, rule, false);
return;
}
// first make a pass over all traversal nodes and remove unused
// variables from them
// first make a pass over all traversal nodes and remove unused
// variables from them
for (auto const& n : tNodes) {
TraversalNode* traversal = static_cast<TraversalNode*>(n);
traversal->prepareOptions();
}
opt->addPlan(plan, rule, true);
}
}
/// @brief pulls out simple subqueries and merges them with the level above
///
@ -3755,17 +3759,17 @@ void arangodb::aql::prepareTraversalsRule(Optimizer* opt,
///
/// FOR x IN (
/// FOR y IN collection FILTER y.value >= 5 RETURN y.test
/// )
/// )
/// RETURN x.a
///
/// then this rule will transform it into:
///
///
/// FOR tmp IN collection
/// FILTER tmp.value >= 5
/// FILTER tmp.value >= 5
/// LET x = tmp.test
/// RETURN x.a
void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
ExecutionPlan* plan,
void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
ExecutionPlan* plan,
Optimizer::Rule const* rule) {
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
@ -3781,12 +3785,12 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
for (auto const& n : nodes) {
auto subqueryNode = static_cast<SubqueryNode*>(n);
if (subqueryNode->isModificationQuery()) {
// can't modify modifying subqueries
continue;
}
if (subqueryNode->canThrow()) {
// can't inline throwing subqueries
continue;
@ -3845,10 +3849,10 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
auto previous = n->getFirstDependency();
auto insert = n->getFirstParent();
TRI_ASSERT(insert != nullptr);
// unlink the original SubqueryNode
plan->unlinkNode(n, false);
for (auto& it : subNodes) {
// first unlink them all
plan->unlinkNode(it, true);
@ -3872,7 +3876,7 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
queryVariables->renameVariable(variable->id);
}
}
}
}
// link the top node in the subquery with the original plan
if (previous != nullptr) {
@ -3883,13 +3887,13 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
plan->unlinkNode(listNode, false);
queryVariables->renameVariable(returnNode->inVariable()->id, listNode->outVariable()->name);
// finally replace the variables
std::unordered_map<VariableId, Variable const*> replacements;
replacements.emplace(listNode->outVariable()->id, returnNode->inVariable());
RedundantCalculationsReplacer finder(replacements);
plan->root()->walk(&finder);
// abort optimization
current = nullptr;
}
@ -3898,8 +3902,8 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
if (current == nullptr) {
break;
}
varsUsed.clear();
varsUsed.clear();
current->getVariablesUsedHere(varsUsed);
if (varsUsed.find(out) != varsUsed.end()) {
// we found another node that uses the subquery variable
@ -3914,3 +3918,196 @@ void arangodb::aql::inlineSubqueriesRule(Optimizer* opt,
opt->addPlan(plan, rule, modified);
}
struct GeoIndexInfo {
EnumerateCollectionNode* _collectionNode;
Collection const* _collection;
std::shared_ptr<arangodb::Index> _index;
std::vector<std::string> _longitude;
std::vector<std::string> _latitude;
};
// TODO - remove debug code
#ifdef OBIDEBUG
#define OBILEVEL ERR
#else
#define OBILEVEL TRACE
#endif
static boost::optional<GeoIndexInfo>
geoDistanceFunctionArgCheck(std::pair<AstNode*,AstNode*> const& pair, ExecutionNode* ex, ExecutionPlan* plan){
using SV = std::vector<std::string>;
LOG(OBILEVEL) << " enter argument check";
// first and second should be based on the same document - need to provide the document
// in order to see which collection is bound to it and if that collections supports geo-index
if( !pair.first->isAttributeAccessForVariable() || !pair.second->isAttributeAccessForVariable()){
LOG(OBILEVEL) << " not both args are of type attribute access";
return boost::none;
}
// expect access of the for doc.attribute
// TODO: more complex access path have to be added: loop until REFERENCE TYPE IS FOUND
auto setter1 = plan->getVarSetBy(static_cast<Variable const*>(pair.first->getMember(0)->getData())->id);
auto setter2 = plan->getVarSetBy(static_cast<Variable const*>(pair.second->getMember(0)->getData())->id);
SV accessPath1{pair.first->getString()};
SV accessPath2{pair.second->getString()};
LOG(OBILEVEL) << " got setter";
if(setter1 == setter2){
if(setter1->getType() == EN::ENUMERATE_COLLECTION){
auto collNode = reinterpret_cast<EnumerateCollectionNode*>(setter1);
auto coll = collNode->collection(); //what kind of indexes does it have on what attributes
auto lcoll = coll->getCollection();
// TODO - check collection for suitable geo-indexes
LOG(OBILEVEL) << " SETTER IS ENUMERATE_COLLECTION: " << coll->getName();
for(auto indexShardPtr : lcoll->getIndexes()){
// get real index
arangodb::Index& index = *indexShardPtr.get();
// check if current index is a geo-index
if( index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO1_INDEX
&& index.type() != arangodb::Index::IndexType::TRI_IDX_TYPE_GEO2_INDEX){
continue;
}
#ifdef OBIDEBUG
//FIXME - REMOVE DEBUG CODE LATER
auto vecs = std::vector<std::vector<SV>>{index.fieldNames(), std::vector<SV>{accessPath1, accessPath2}};
for(auto vec : vecs ){
for(auto path : vec){
std::cout << "AccessPath VECTOR: ";
for(auto word : path){
std::cout << word << " ";
}
std::cout << std::endl;
}
}
#endif
//check access paths of attribues in ast and those in index match
if( index.fieldNames()[0] == accessPath1 && index.fieldNames()[1] == accessPath2 ){
return GeoIndexInfo{collNode, coll, indexShardPtr, std::move(accessPath1), std::move(accessPath2) };
}
}
}
}
return boost::none;
}
void arangodb::aql::optimizeGeoIndexRule(Optimizer* opt,
ExecutionPlan* plan,
Optimizer::Rule const* rule) {
LOG(OBILEVEL) << "ENTER GEO RULE";
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
bool modified = false;
plan->findNodesOfType(nodes, EN::SORT, true);
for (auto const& n : nodes) {
auto node = static_cast<SortNode*>(n);
auto const& elements = node->getElements();
// we're looking for "SORT DISTANCE(x,y,a,b) ASC", which has just one sort criterion
if ( !(elements.size() == 1 && elements[0].second)) {
continue;
}
//variable of sort expression
auto const variable = elements[0].first;
TRI_ASSERT(variable != nullptr);
//// find the expression that is bound to the variable
// get the expression node that holds the cacluation
auto setter = plan->getVarSetBy(variable->id);
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
continue;
}
// downcast to calculation node and get expression
auto cn = static_cast<CalculationNode*>(setter);
auto const expression = cn->expression();
// the expression must exist and it must be a function call
if (expression == nullptr || expression->node() == nullptr ||
expression->node()->type != NODE_TYPE_FCALL) {
// not the right type of node
continue;
}
//get the ast node of the expression
AstNode const* funcNode = expression->node();
auto func = static_cast<Function const*>(funcNode->getData());
// we're looking for "DISTANCE()", which is a function call
// with an empty parameters array
if ( func->externalName != "DISTANCE" || funcNode->numMembers() != 1 ) {
continue;
}
LOG(OBILEVEL) << " FOUND DISTANCE RULE";
auto const& distanceArgs = funcNode->getMember(0);
if(distanceArgs->numMembers() != 4){
continue;
}
std::pair<AstNode*,AstNode*> argPair1 = { distanceArgs->getMember(0), distanceArgs->getMember(1) };
std::pair<AstNode*,AstNode*> argPair2 = { distanceArgs->getMember(2), distanceArgs->getMember(3) };
auto result1 = geoDistanceFunctionArgCheck(argPair1, node, plan);
auto result2 = geoDistanceFunctionArgCheck(argPair2, node, plan);
// xor only one argument pair shall have a geoIndex
if ( ( !result1 && !result2 ) || ( result1 && result2 ) ){
continue;
}
LOG(OBILEVEL) << " FOUND DISTANCE RULE WITH ATTRIBUTE ACCESS";
if(!result1){
result1 = std::move(result2);
}
LOG(OBILEVEL) << " attributes: " << result1.get()._longitude[0]
<< ", " << result1.get()._longitude
<< " of collection:" << result1.get()._collection->getName()
<< " are geoindexed";
break; //remove this to make use of the index
auto cnode = result1.get()._collectionNode;
auto& idxPtr = result1.get()._index;
//create new index node and register it
auto condition = std::make_unique<Condition>(plan->getAst()); //What is this condition exactly about
condition->normalize(plan);
auto inode = new IndexNode(
plan, plan->nextId(), cnode->vocbase(),
cnode->collection(), cnode->outVariable(),
std::vector<Transaction::IndexHandle>{Transaction::IndexHandle{idxPtr}},
condition.get(), !elements[0].second);
plan->registerNode(inode);
condition.release();
plan->unlinkNode(n);
plan->replaceNode(cnode,inode);
//signal that plan has been changed
modified=true;
}
opt->addPlan(plan, rule, modified);
LOG(OBILEVEL) << "EXIT GEO RULE";
LOG(OBILEVEL) << "";
}

View File

@ -198,6 +198,7 @@ void prepareTraversalsRule(Optimizer* opt, ExecutionPlan* plan,
/// @brief moves simple subqueries one level higher
void inlineSubqueriesRule(Optimizer*, ExecutionPlan*, Optimizer::Rule const*);
void optimizeGeoIndexRule(Optimizer* opt, ExecutionPlan* plan, Optimizer::Rule const* rule);
} // namespace aql
} // namespace arangodb