mirror of https://gitee.com/bigwinds/arangodb
7121 lines
249 KiB
C++
7121 lines
249 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Max Neunhoeffer
|
|
/// @author Jan Steemann
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "OptimizerRules.h"
|
|
#include "Aql/AqlItemBlock.h"
|
|
#include "Aql/ClusterNodes.h"
|
|
#include "Aql/CollectNode.h"
|
|
#include "Aql/CollectOptions.h"
|
|
#include "Aql/Collection.h"
|
|
#include "Aql/ConditionFinder.h"
|
|
#include "Aql/DocumentProducingNode.h"
|
|
#include "Aql/ExecutionEngine.h"
|
|
#include "Aql/ExecutionNode.h"
|
|
#include "Aql/ExecutionPlan.h"
|
|
#include "Aql/Function.h"
|
|
#include "Aql/IResearchViewNode.h"
|
|
#include "Aql/IndexNode.h"
|
|
#include "Aql/KShortestPathsNode.h"
|
|
#include "Aql/ModificationNodes.h"
|
|
#include "Aql/Optimizer.h"
|
|
#include "Aql/Query.h"
|
|
#include "Aql/ShortestPathNode.h"
|
|
#include "Aql/SortCondition.h"
|
|
#include "Aql/SortNode.h"
|
|
#include "Aql/TraversalConditionFinder.h"
|
|
#include "Aql/TraversalNode.h"
|
|
#include "Aql/Variable.h"
|
|
#include "Aql/types.h"
|
|
#include "Basics/AttributeNameParser.h"
|
|
#include "Basics/NumberUtils.h"
|
|
#include "Basics/SmallVector.h"
|
|
#include "Basics/StaticStrings.h"
|
|
#include "Basics/StringBuffer.h"
|
|
#include "Cluster/ClusterInfo.h"
|
|
#include "Geo/GeoParams.h"
|
|
#include "GeoIndex/Index.h"
|
|
#include "Graph/TraverserOptions.h"
|
|
#include "Indexes/Index.h"
|
|
#include "StorageEngine/EngineSelectorFeature.h"
|
|
#include "StorageEngine/StorageEngine.h"
|
|
#include "Transaction/Methods.h"
|
|
#include "Utils/CollectionNameResolver.h"
|
|
#include "VocBase/Methods/Collections.h"
|
|
|
|
|
|
#include <boost/optional.hpp>
|
|
#include <tuple>
|
|
|
|
namespace {
|
|
|
|
bool accessesCollectionVariable(arangodb::aql::ExecutionPlan const* plan,
|
|
arangodb::aql::CalculationNode const* node,
|
|
arangodb::HashSet<arangodb::aql::Variable const*>& vars) {
|
|
using EN = arangodb::aql::ExecutionNode;
|
|
|
|
vars.clear();
|
|
arangodb::aql::Ast::getReferencedVariables(node->expression()->node(), vars);
|
|
for (auto const& it : vars) {
|
|
auto setter = plan->getVarSetBy(it->id);
|
|
if (setter == nullptr) {
|
|
continue;
|
|
}
|
|
if (setter->getType() == EN::INDEX || setter->getType() == EN::ENUMERATE_COLLECTION ||
|
|
setter->getType() == EN::ENUMERATE_IRESEARCH_VIEW ||
|
|
setter->getType() == EN::SUBQUERY ||
|
|
setter->getType() == EN::TRAVERSAL ||
|
|
setter->getType() == EN::K_SHORTEST_PATHS ||
|
|
setter->getType() == EN::SHORTEST_PATH) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
std::string getSingleShardId(arangodb::aql::ExecutionPlan const* plan,
|
|
arangodb::aql::ExecutionNode const* node,
|
|
arangodb::aql::Collection const* collection,
|
|
arangodb::aql::Variable const* collectionVariable = nullptr);
|
|
|
|
arangodb::aql::Collection const* getCollection(arangodb::aql::ExecutionNode const* node) {
|
|
using EN = arangodb::aql::ExecutionNode;
|
|
using arangodb::aql::ExecutionNode;
|
|
|
|
switch (node->getType()) {
|
|
case EN::ENUMERATE_COLLECTION:
|
|
return ExecutionNode::castTo<arangodb::aql::EnumerateCollectionNode const*>(node)
|
|
->collection();
|
|
case EN::INDEX:
|
|
return ExecutionNode::castTo<arangodb::aql::IndexNode const*>(node)->collection();
|
|
case EN::TRAVERSAL:
|
|
case EN::K_SHORTEST_PATHS:
|
|
case EN::SHORTEST_PATH:
|
|
return ExecutionNode::castTo<arangodb::aql::GraphNode const*>(node)->collection();
|
|
|
|
default:
|
|
// note: modification nodes are not covered here yet
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL,
|
|
"node type does not have a collection");
|
|
}
|
|
}
|
|
|
|
arangodb::aql::Variable const* getOutVariable(arangodb::aql::ExecutionNode const* node) {
|
|
using EN = arangodb::aql::ExecutionNode;
|
|
using arangodb::aql::ExecutionNode;
|
|
|
|
switch (node->getType()) {
|
|
case EN::CALCULATION:
|
|
return ExecutionNode::castTo<arangodb::aql::CalculationNode const*>(node)->outVariable();
|
|
default: {
|
|
auto const* n = dynamic_cast<arangodb::aql::DocumentProducingNode const*>(node);
|
|
if (n != nullptr) {
|
|
return n->outVariable();
|
|
}
|
|
// note: modification nodes are not covered here yet
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL,
|
|
"node type does not have an out variable");
|
|
}
|
|
}
|
|
}
|
|
|
|
void restrictToShard(arangodb::aql::ExecutionNode* node, std::string shardId) {
|
|
auto* n = dynamic_cast<arangodb::aql::CollectionAccessingNode*>(node);
|
|
if (n != nullptr) {
|
|
return n->restrictToShard(shardId);
|
|
}
|
|
// note: modification nodes are not covered here yet
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
|
TRI_ERROR_INTERNAL, "node type cannot be restricted to a single shard");
|
|
}
|
|
|
|
struct PairHash {
|
|
template <class T1, class T2>
|
|
size_t operator()(std::pair<T1, T2> const& pair) const noexcept {
|
|
size_t first = std::hash<T1>()(pair.first);
|
|
size_t second = std::hash<T2>()(pair.second);
|
|
|
|
return first ^ second;
|
|
}
|
|
};
|
|
|
|
/// WalkerWorker to track collection variable dependencies
|
|
class CollectionVariableTracker final
|
|
: public arangodb::aql::WalkerWorker<arangodb::aql::ExecutionNode> {
|
|
using DependencyPair =
|
|
std::pair<arangodb::aql::Variable const*, arangodb::aql::Collection const*>;
|
|
using DependencySet = std::unordered_set<DependencyPair, ::PairHash>;
|
|
using VariableSet = arangodb::HashSet<arangodb::aql::Variable const*>;
|
|
bool _stop;
|
|
std::unordered_map<arangodb::aql::Variable const*, DependencySet> _dependencies;
|
|
std::unordered_map<arangodb::aql::Collection const*, VariableSet> _collectionVariables;
|
|
|
|
private:
|
|
template <class NodeType>
|
|
void processSetter(arangodb::aql::ExecutionNode const* en,
|
|
arangodb::aql::Variable const* outVariable) {
|
|
auto node = arangodb::aql::ExecutionNode::castTo<NodeType const*>(en);
|
|
try {
|
|
arangodb::HashSet<arangodb::aql::Variable const*> inputVariables;
|
|
node->getVariablesUsedHere(inputVariables);
|
|
for (auto var : inputVariables) {
|
|
for (auto dep : _dependencies[var]) {
|
|
_dependencies[outVariable].emplace(dep);
|
|
}
|
|
}
|
|
} catch (...) {
|
|
_stop = true; // won't be able to recover correctly
|
|
}
|
|
}
|
|
|
|
template <class NodeType>
|
|
void processModificationNode(arangodb::aql::ExecutionNode const* en) {
|
|
auto node = arangodb::aql::ExecutionNode::castTo<NodeType const*>(en);
|
|
auto collection = node->collection();
|
|
std::vector<arangodb::aql::Variable const*> outVariables{node->getOutVariableOld(),
|
|
node->getOutVariableNew()};
|
|
for (auto outVariable : outVariables) {
|
|
if (nullptr != outVariable) {
|
|
processSetter<NodeType>(node, outVariable);
|
|
_collectionVariables[collection].emplace(outVariable);
|
|
}
|
|
}
|
|
}
|
|
|
|
public:
|
|
explicit CollectionVariableTracker() : _stop{false} {}
|
|
|
|
bool isSafeForOptimization() const { return !_stop; }
|
|
|
|
DependencySet const& getDependencies(arangodb::aql::Variable const* var) {
|
|
return _dependencies[var];
|
|
}
|
|
|
|
VariableSet const& getCollectionVariables(arangodb::aql::Collection const* collection) {
|
|
return _collectionVariables[collection];
|
|
}
|
|
|
|
void after(arangodb::aql::ExecutionNode* en) override final {
|
|
using EN = arangodb::aql::ExecutionNode;
|
|
using arangodb::aql::ExecutionNode;
|
|
|
|
switch (en->getType()) {
|
|
case EN::CALCULATION: {
|
|
auto outVariable = ::getOutVariable(en);
|
|
processSetter<arangodb::aql::CalculationNode>(en, outVariable);
|
|
break;
|
|
}
|
|
|
|
case EN::INDEX:
|
|
case EN::ENUMERATE_COLLECTION: {
|
|
auto collection = ::getCollection(en);
|
|
auto variable = ::getOutVariable(en);
|
|
|
|
// originates the collection variable, direct dependence
|
|
try {
|
|
_dependencies[variable].emplace(variable, collection);
|
|
_collectionVariables[collection].emplace(variable);
|
|
} catch (...) {
|
|
_stop = true; // we won't be able to figure it out
|
|
}
|
|
break;
|
|
}
|
|
|
|
case EN::UPDATE: {
|
|
processModificationNode<arangodb::aql::UpdateNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::UPSERT: {
|
|
processModificationNode<arangodb::aql::UpsertNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::INSERT: {
|
|
processModificationNode<arangodb::aql::InsertNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::REMOVE: {
|
|
processModificationNode<arangodb::aql::RemoveNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::REPLACE: {
|
|
processModificationNode<arangodb::aql::ReplaceNode>(en);
|
|
break;
|
|
}
|
|
|
|
default: {
|
|
// we don't support other node types yet
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
/// WalkerWorker for restrictToSingleShard
|
|
class RestrictToSingleShardChecker final
|
|
: public arangodb::aql::WalkerWorker<arangodb::aql::ExecutionNode> {
|
|
arangodb::aql::ExecutionPlan* _plan;
|
|
CollectionVariableTracker& _tracker;
|
|
std::unordered_map<arangodb::aql::Variable const*, std::unordered_set<std::string>> _shardsUsed;
|
|
std::unordered_map<arangodb::aql::Variable const*, std::unordered_set<std::string>> _shardsCleared;
|
|
bool _stop;
|
|
std::map<arangodb::aql::Collection const*, bool> _unsafe;
|
|
|
|
public:
|
|
explicit RestrictToSingleShardChecker(arangodb::aql::ExecutionPlan* plan,
|
|
CollectionVariableTracker& tracker)
|
|
: _plan{plan}, _tracker{tracker}, _stop{false} {}
|
|
|
|
bool isSafeForOptimization() const {
|
|
// we have found something in the execution plan that will
|
|
// render the optimization unsafe
|
|
return (!_stop && !_plan->getAst()->functionsMayAccessDocuments());
|
|
}
|
|
|
|
std::string getShard(arangodb::aql::Variable const* variable) const {
|
|
auto const& it = _shardsCleared.find(variable);
|
|
if (it == _shardsCleared.end()) {
|
|
return "";
|
|
}
|
|
|
|
auto set = it->second;
|
|
if (set.size() != 1 || *set.begin() == "all") {
|
|
return "";
|
|
}
|
|
|
|
return *set.begin();
|
|
}
|
|
|
|
bool isSafeForOptimization(arangodb::aql::Collection const* collection) const {
|
|
auto it = _unsafe.find(collection);
|
|
if (it == _unsafe.end()) {
|
|
return true;
|
|
}
|
|
return !it->second;
|
|
}
|
|
|
|
bool isSafeForOptimization(arangodb::aql::Variable const* variable) const {
|
|
auto it = _shardsCleared.find(variable);
|
|
if (it == _shardsCleared.end()) {
|
|
return false;
|
|
}
|
|
|
|
if ((*it).second.size() != 1) {
|
|
// more than one shard
|
|
return false;
|
|
}
|
|
|
|
// check for "all" marker
|
|
auto it2 = (*it).second.find("all");
|
|
if (it2 != (*it).second.end()) {
|
|
// "all" included
|
|
return false;
|
|
}
|
|
|
|
// all good -> safe to optimize
|
|
return true;
|
|
}
|
|
|
|
bool enterSubquery(arangodb::aql::ExecutionNode*, arangodb::aql::ExecutionNode*) override final {
|
|
return true;
|
|
}
|
|
|
|
bool before(arangodb::aql::ExecutionNode* en) override final {
|
|
using EN = arangodb::aql::ExecutionNode;
|
|
using arangodb::aql::ExecutionNode;
|
|
|
|
switch (en->getType()) {
|
|
case EN::TRAVERSAL:
|
|
case EN::K_SHORTEST_PATHS:
|
|
case EN::SHORTEST_PATH: {
|
|
_stop = true;
|
|
return true; // abort enumerating, we are done already!
|
|
}
|
|
|
|
case EN::FILTER: {
|
|
auto node = ExecutionNode::castTo<arangodb::aql::FilterNode const*>(en);
|
|
arangodb::aql::Variable const* inputVariable = node->inVariable();
|
|
handleInputVariable(en, inputVariable);
|
|
break;
|
|
}
|
|
|
|
case EN::ENUMERATE_COLLECTION: {
|
|
handleSourceNode(en);
|
|
break;
|
|
}
|
|
|
|
case EN::INDEX: {
|
|
handleIndexNode(en);
|
|
handleSourceNode(en);
|
|
break;
|
|
}
|
|
|
|
case EN::INSERT:
|
|
case EN::REPLACE:
|
|
case EN::UPDATE:
|
|
case EN::REMOVE: {
|
|
auto node = ExecutionNode::castTo<arangodb::aql::ModificationNode const*>(en);
|
|
// make sure we don't restrict this collection via a lower filter
|
|
_shardsUsed.clear();
|
|
std::string shardId = ::getSingleShardId(_plan, en, node->collection());
|
|
if (shardId.empty()) {
|
|
// mark the collection unsafe to restrict
|
|
_unsafe[node->collection()] = true;
|
|
}
|
|
// no need to track the shardId, we'll find it again later
|
|
break;
|
|
}
|
|
|
|
default: {
|
|
// we don't care about other execution node types here
|
|
break;
|
|
}
|
|
}
|
|
|
|
return false; // go on
|
|
}
|
|
|
|
private:
|
|
void handleInputVariable(arangodb::aql::ExecutionNode const* en,
|
|
arangodb::aql::Variable const* inputVariable) {
|
|
auto dependencies = _tracker.getDependencies(inputVariable);
|
|
for (auto dep : dependencies) {
|
|
auto variable = dep.first;
|
|
auto collection = dep.second;
|
|
auto shardId = ::getSingleShardId(_plan, en, collection, variable);
|
|
if (shardId.empty()) {
|
|
if (_shardsUsed[variable].empty()) {
|
|
_shardsUsed[variable].emplace("all");
|
|
}
|
|
} else {
|
|
if (1 == _shardsUsed[variable].size() && "all" == *_shardsUsed[variable].begin()) {
|
|
_shardsUsed[variable].clear();
|
|
}
|
|
_shardsUsed[variable].emplace(shardId);
|
|
}
|
|
}
|
|
}
|
|
|
|
void handleIndexNode(arangodb::aql::ExecutionNode const* en) {
|
|
auto collection = ::getCollection(en);
|
|
auto variable = ::getOutVariable(en);
|
|
auto shardId = ::getSingleShardId(_plan, en, collection, variable);
|
|
if (shardId.empty()) {
|
|
if (_shardsUsed[variable].empty()) {
|
|
_shardsUsed[variable].emplace("all");
|
|
}
|
|
} else {
|
|
if (1 == _shardsUsed[variable].size() && "all" == *_shardsUsed[variable].begin()) {
|
|
_shardsUsed[variable].clear();
|
|
}
|
|
_shardsUsed[variable].emplace(shardId);
|
|
}
|
|
}
|
|
|
|
void handleSourceNode(arangodb::aql::ExecutionNode const* en) {
|
|
auto variable = ::getOutVariable(en);
|
|
// now move all shards for this variable to the cleared list
|
|
_shardsCleared[variable] = std::move(_shardsUsed[variable]);
|
|
}
|
|
};
|
|
|
|
void findShardKeyInComparison(arangodb::aql::AstNode const* root,
|
|
arangodb::aql::Variable const* inputVariable,
|
|
std::unordered_set<std::string>& toFind,
|
|
arangodb::velocypack::Builder& builder) {
|
|
using arangodb::aql::AstNode;
|
|
using arangodb::aql::Variable;
|
|
TRI_ASSERT(root->type == arangodb::aql::AstNodeType::NODE_TYPE_OPERATOR_BINARY_EQ);
|
|
|
|
AstNode const* value = nullptr;
|
|
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> pair;
|
|
|
|
auto lhs = root->getMember(0);
|
|
auto rhs = root->getMember(1);
|
|
std::string result;
|
|
|
|
if (lhs->isAttributeAccessForVariable(pair, false) &&
|
|
pair.first == inputVariable && rhs->isConstant()) {
|
|
TRI_AttributeNamesToString(pair.second, result, true);
|
|
value = rhs;
|
|
} else if (rhs->isAttributeAccessForVariable(pair, false) &&
|
|
pair.first == inputVariable && lhs->isConstant()) {
|
|
TRI_AttributeNamesToString(pair.second, result, true);
|
|
value = lhs;
|
|
}
|
|
|
|
if (value != nullptr) {
|
|
TRI_ASSERT(!result.empty());
|
|
auto it = toFind.find(result);
|
|
|
|
if (it != toFind.end()) {
|
|
builder.add(VPackValue(result));
|
|
value->toVelocyPackValue(builder);
|
|
|
|
toFind.erase(it);
|
|
}
|
|
}
|
|
}
|
|
|
|
void findShardKeysInExpression(arangodb::aql::AstNode const* root,
|
|
arangodb::aql::Variable const* inputVariable,
|
|
std::unordered_set<std::string>& toFind,
|
|
arangodb::velocypack::Builder& builder) {
|
|
if (root == nullptr) {
|
|
return;
|
|
}
|
|
|
|
switch (root->type) {
|
|
case arangodb::aql::AstNodeType::NODE_TYPE_OPERATOR_NARY_OR: {
|
|
if (root->numMembers() != 1) {
|
|
return;
|
|
}
|
|
root = root->getMember(0);
|
|
if (root == nullptr || root->type != arangodb::aql::AstNodeType::NODE_TYPE_OPERATOR_NARY_AND) {
|
|
return;
|
|
}
|
|
} // falls through
|
|
case arangodb::aql::AstNodeType::NODE_TYPE_OPERATOR_BINARY_AND:
|
|
case arangodb::aql::AstNodeType::NODE_TYPE_OPERATOR_NARY_AND: {
|
|
for (size_t i = 0; i < root->numMembers(); ++i) {
|
|
if (root->getMember(i) != nullptr &&
|
|
root->getMember(i)->type == arangodb::aql::AstNodeType::NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
findShardKeyInComparison(root->getMember(i), inputVariable, toFind, builder);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case arangodb::aql::AstNodeType::NODE_TYPE_OPERATOR_BINARY_EQ: {
|
|
findShardKeyInComparison(root, inputVariable, toFind, builder);
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
// static node types used by some optimizer rules
|
|
// having them statically available avoids having to build the vectors over
|
|
// and over for each AQL query
|
|
std::vector<arangodb::aql::ExecutionNode::NodeType> const removeUnnecessaryCalculationsNodeTypes{
|
|
arangodb::aql::ExecutionNode::CALCULATION, arangodb::aql::ExecutionNode::SUBQUERY};
|
|
std::vector<arangodb::aql::ExecutionNode::NodeType> const interchangeAdjacentEnumerationsNodeTypes{
|
|
arangodb::aql::ExecutionNode::ENUMERATE_COLLECTION,
|
|
arangodb::aql::ExecutionNode::ENUMERATE_LIST};
|
|
std::vector<arangodb::aql::ExecutionNode::NodeType> const scatterInClusterNodeTypes{
|
|
arangodb::aql::ExecutionNode::ENUMERATE_COLLECTION,
|
|
arangodb::aql::ExecutionNode::INDEX,
|
|
arangodb::aql::ExecutionNode::INSERT,
|
|
arangodb::aql::ExecutionNode::UPDATE,
|
|
arangodb::aql::ExecutionNode::REPLACE,
|
|
arangodb::aql::ExecutionNode::REMOVE,
|
|
arangodb::aql::ExecutionNode::UPSERT};
|
|
std::vector<arangodb::aql::ExecutionNode::NodeType> const removeDataModificationOutVariablesNodeTypes{
|
|
arangodb::aql::ExecutionNode::REMOVE, arangodb::aql::ExecutionNode::INSERT,
|
|
arangodb::aql::ExecutionNode::UPDATE, arangodb::aql::ExecutionNode::REPLACE,
|
|
arangodb::aql::ExecutionNode::UPSERT};
|
|
std::vector<arangodb::aql::ExecutionNode::NodeType> const patchUpdateStatementsNodeTypes{
|
|
arangodb::aql::ExecutionNode::UPDATE, arangodb::aql::ExecutionNode::REPLACE};
|
|
std::vector<arangodb::aql::ExecutionNode::NodeType> const patchUpdateRemoveStatementsNodeTypes{
|
|
arangodb::aql::ExecutionNode::UPDATE, arangodb::aql::ExecutionNode::REPLACE,
|
|
arangodb::aql::ExecutionNode::REMOVE};
|
|
|
|
/// @brief find the single shard id for the node to restrict an operation to
|
|
/// this will check the conditions of an IndexNode or a data-modification node
|
|
/// (excluding UPSERT) and check if all shard keys are used in it. If all
|
|
/// shard keys are present and their values are fixed (constants), this
|
|
/// function will try to figure out the target shard. If the operation cannot
|
|
/// be restricted to a single shard, this function will return an empty string
|
|
std::string getSingleShardId(arangodb::aql::ExecutionPlan const* plan,
|
|
arangodb::aql::ExecutionNode const* node,
|
|
arangodb::aql::Collection const* collection,
|
|
arangodb::aql::Variable const* collectionVariable) {
|
|
using EN = arangodb::aql::ExecutionNode;
|
|
using arangodb::aql::ExecutionNode;
|
|
|
|
if (collection->isSmart() && collection->getCollection()->type() == TRI_COL_TYPE_EDGE) {
|
|
// no support for smart edge collections
|
|
return std::string();
|
|
}
|
|
|
|
TRI_ASSERT(node->getType() == EN::INDEX || node->getType() == EN::FILTER ||
|
|
node->getType() == EN::INSERT || node->getType() == EN::UPDATE ||
|
|
node->getType() == EN::REPLACE || node->getType() == EN::REMOVE);
|
|
|
|
arangodb::aql::Variable const* inputVariable = nullptr;
|
|
if (node->getType() == EN::INDEX) {
|
|
inputVariable =
|
|
ExecutionNode::castTo<arangodb::aql::IndexNode const*>(node)->outVariable();
|
|
} else if (node->getType() == EN::FILTER) {
|
|
inputVariable =
|
|
ExecutionNode::castTo<arangodb::aql::FilterNode const*>(node)->inVariable();
|
|
} else if (node->getType() == EN::INSERT) {
|
|
inputVariable =
|
|
ExecutionNode::castTo<arangodb::aql::InsertNode const*>(node)->inVariable();
|
|
} else if (node->getType() == EN::REMOVE) {
|
|
inputVariable =
|
|
ExecutionNode::castTo<arangodb::aql::RemoveNode const*>(node)->inVariable();
|
|
} else if (node->getType() == EN::REPLACE || node->getType() == EN::UPDATE) {
|
|
auto updateReplaceNode =
|
|
ExecutionNode::castTo<arangodb::aql::UpdateReplaceNode const*>(node);
|
|
if (updateReplaceNode->inKeyVariable() != nullptr) {
|
|
inputVariable = updateReplaceNode->inKeyVariable();
|
|
} else {
|
|
inputVariable = updateReplaceNode->inDocVariable();
|
|
}
|
|
} else {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "logic error");
|
|
}
|
|
|
|
TRI_ASSERT(inputVariable != nullptr);
|
|
|
|
// check if we can easily find out the setter of the input variable
|
|
// (and if we can find it, check if the data is constant so we can look
|
|
// up the shard key attribute values)
|
|
auto setter = plan->getVarSetBy(inputVariable->id);
|
|
|
|
if (setter == nullptr) {
|
|
// oops!
|
|
TRI_ASSERT(false);
|
|
return std::string();
|
|
}
|
|
|
|
// note for which shard keys we need to look for
|
|
auto shardKeys = collection->shardKeys(true);
|
|
std::unordered_set<std::string> toFind;
|
|
for (auto const& it : shardKeys) {
|
|
if (it.find('.') != std::string::npos) {
|
|
// shard key containing a "." (sub-attribute). this is not yet supported
|
|
return std::string();
|
|
}
|
|
toFind.emplace(it);
|
|
}
|
|
|
|
VPackBuilder builder;
|
|
builder.openObject();
|
|
|
|
if (setter->getType() == EN::CALCULATION) {
|
|
arangodb::aql::CalculationNode const* c =
|
|
ExecutionNode::castTo<arangodb::aql::CalculationNode const*>(setter);
|
|
auto ex = c->expression();
|
|
|
|
if (ex == nullptr) {
|
|
return std::string();
|
|
}
|
|
|
|
auto n = ex->node();
|
|
if (n == nullptr) {
|
|
return std::string();
|
|
}
|
|
|
|
if (n->isStringValue()) {
|
|
if (!n->isConstant() || toFind.size() != 1 ||
|
|
toFind.find(arangodb::StaticStrings::KeyString) == toFind.end()) {
|
|
return std::string();
|
|
}
|
|
|
|
// the lookup value is a string, and the only shard key is _key: so we
|
|
// can use it
|
|
builder.add(VPackValue(arangodb::StaticStrings::KeyString));
|
|
n->toVelocyPackValue(builder);
|
|
toFind.clear();
|
|
} else if (n->isObject()) {
|
|
// go through the input object attribute by attribute
|
|
// and look for our shard keys
|
|
for (size_t i = 0; i < n->numMembers(); ++i) {
|
|
auto sub = n->getMember(i);
|
|
|
|
if (sub->type != arangodb::aql::AstNodeType::NODE_TYPE_OBJECT_ELEMENT) {
|
|
continue;
|
|
}
|
|
|
|
auto it = toFind.find(sub->getString());
|
|
|
|
if (it != toFind.end()) {
|
|
// we found one of the shard keys!
|
|
auto v = sub->getMember(0);
|
|
if (v->isConstant()) {
|
|
// if the attribute value is a constant, we copy it into our
|
|
// builder
|
|
builder.add(VPackValue(sub->getString()));
|
|
v->toVelocyPackValue(builder);
|
|
// remove the attribute from our to-do list
|
|
toFind.erase(it);
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
if (nullptr != collectionVariable) {
|
|
::findShardKeysInExpression(n, collectionVariable, toFind, builder);
|
|
} else {
|
|
::findShardKeysInExpression(n, inputVariable, toFind, builder);
|
|
}
|
|
}
|
|
} else if (setter->getType() == ExecutionNode::INDEX && setter == node) {
|
|
auto const* c = ExecutionNode::castTo<arangodb::aql::IndexNode const*>(setter);
|
|
|
|
if (c->getIndexes().size() != 1) {
|
|
// we can only handle a single index here
|
|
return std::string();
|
|
}
|
|
auto const* condition = c->condition();
|
|
|
|
if (condition == nullptr) {
|
|
return std::string();
|
|
}
|
|
|
|
arangodb::aql::AstNode const* root = condition->root();
|
|
::findShardKeysInExpression(root, inputVariable, toFind, builder);
|
|
}
|
|
|
|
builder.close();
|
|
|
|
if (!toFind.empty()) {
|
|
return std::string();
|
|
}
|
|
|
|
// all shard keys found!!
|
|
|
|
// find the responsible shard for the data
|
|
std::string shardId;
|
|
|
|
int res = collection->getCollection()->getResponsibleShard(builder.slice(), true, shardId);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
// some error occurred. better do not use the
|
|
// single shard optimization here
|
|
return std::string();
|
|
}
|
|
|
|
// we will only need a single shard!
|
|
return shardId;
|
|
}
|
|
|
|
bool shouldApplyHeapOptimization(arangodb::aql::ExecutionNode* node,
|
|
arangodb::aql::LimitNode* limit) {
|
|
TRI_ASSERT(node != nullptr);
|
|
TRI_ASSERT(limit != nullptr);
|
|
size_t input = node->getCost().estimatedNrItems;
|
|
size_t output = limit->limit() + limit->offset();
|
|
|
|
// first check an easy case
|
|
if (input < 100) { // TODO fine-tune this cut-off
|
|
// no reason to complicate things for such a small input
|
|
return false;
|
|
}
|
|
|
|
// now check something a little more sophisticated, comparing best estimate of
|
|
// cost of heap sort to cost of regular sort (ignoring some variables)
|
|
double N = static_cast<double>(input);
|
|
double M = static_cast<double>(output);
|
|
double lgN = std::log2(N);
|
|
double lgM = std::log2(M);
|
|
|
|
// the 0.25 here comes from some experiments, may need to be tweaked;
|
|
// should kick in if output is roughly at most 3/4 of input
|
|
return (0.25 * N * lgM + M * lgM) < (N * lgN);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
using namespace arangodb;
|
|
using namespace arangodb::aql;
|
|
using EN = arangodb::aql::ExecutionNode;
|
|
|
|
namespace arangodb {
|
|
namespace aql {
|
|
|
|
// TODO cleanup this f-ing aql::Collection(s) mess
|
|
Collection* addCollectionToQuery(Query* query, std::string const& cname, bool assert) {
|
|
aql::Collection* coll = nullptr;
|
|
|
|
if (!cname.empty()) {
|
|
coll = query->addCollection(cname, AccessMode::Type::READ);
|
|
|
|
if (!ServerState::instance()->isCoordinator()) {
|
|
TRI_ASSERT(coll != nullptr);
|
|
auto cptr = query->trx()->vocbase().lookupCollection(cname);
|
|
|
|
coll->setCollection(cptr.get());
|
|
query->trx()->addCollectionAtRuntime(cname);
|
|
}
|
|
}
|
|
|
|
if (assert) {
|
|
TRI_ASSERT(coll != nullptr);
|
|
}
|
|
|
|
return coll;
|
|
}
|
|
|
|
} // namespace aql
|
|
} // namespace arangodb
|
|
|
|
/// @brief adds a SORT operation for IN right-hand side operands
|
|
void arangodb::aql::sortInValuesRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::FILTER, true);
|
|
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
// now check who introduced our variable
|
|
auto variable = ExecutionNode::castTo<FilterNode const*>(n)->inVariable();
|
|
auto setter = plan->getVarSetBy(variable->id);
|
|
|
|
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
|
|
// filter variable was not introduced by a calculation.
|
|
continue;
|
|
}
|
|
|
|
// filter variable was introduced a CalculationNode. now check the
|
|
// expression
|
|
auto s = ExecutionNode::castTo<CalculationNode*>(setter);
|
|
auto filterExpression = s->expression();
|
|
auto* inNode = filterExpression->nodeForModification();
|
|
|
|
TRI_ASSERT(inNode != nullptr);
|
|
|
|
// check the filter condition
|
|
if ((inNode->type != NODE_TYPE_OPERATOR_BINARY_IN && inNode->type != NODE_TYPE_OPERATOR_BINARY_NIN) ||
|
|
!inNode->isDeterministic()) {
|
|
// we better not tamper with this filter
|
|
continue;
|
|
}
|
|
|
|
auto rhs = inNode->getMember(1);
|
|
|
|
if (rhs->type != NODE_TYPE_REFERENCE && rhs->type != NODE_TYPE_ARRAY) {
|
|
continue;
|
|
}
|
|
|
|
auto loop = n->getLoop();
|
|
|
|
if (loop == nullptr) {
|
|
// FILTER is not used inside a loop. so it will be used at most once
|
|
// not need to sort the IN values then
|
|
continue;
|
|
}
|
|
|
|
if (rhs->type == NODE_TYPE_ARRAY) {
|
|
if (rhs->numMembers() < AstNode::SortNumberThreshold || rhs->isSorted()) {
|
|
// number of values is below threshold or array is already sorted
|
|
continue;
|
|
}
|
|
|
|
auto ast = plan->getAst();
|
|
auto args = ast->createNodeArray();
|
|
args->addMember(rhs);
|
|
auto sorted =
|
|
ast->createNodeFunctionCall(TRI_CHAR_LENGTH_PAIR("SORTED_UNIQUE"), args);
|
|
inNode->changeMember(1, sorted);
|
|
modified = true;
|
|
continue;
|
|
}
|
|
|
|
variable = static_cast<Variable const*>(rhs->getData());
|
|
setter = plan->getVarSetBy(variable->id);
|
|
|
|
if (setter == nullptr ||
|
|
(setter->getType() != EN::CALCULATION && setter->getType() != EN::SUBQUERY)) {
|
|
// variable itself was not introduced by a calculation.
|
|
continue;
|
|
}
|
|
|
|
if (loop == setter->getLoop()) {
|
|
// the FILTER and its value calculation are contained in the same loop
|
|
// this means the FILTER will be executed as many times as its value
|
|
// calculation. sorting the IN values will not provide a benefit here
|
|
continue;
|
|
}
|
|
|
|
auto ast = plan->getAst();
|
|
AstNode const* originalArg = nullptr;
|
|
|
|
if (setter->getType() == EN::CALCULATION) {
|
|
AstNode const* originalNode =
|
|
ExecutionNode::castTo<CalculationNode*>(setter)->expression()->node();
|
|
TRI_ASSERT(originalNode != nullptr);
|
|
|
|
AstNode const* testNode = originalNode;
|
|
|
|
if (originalNode->type == NODE_TYPE_FCALL &&
|
|
static_cast<Function const*>(originalNode->getData())->hasFlag(Function::Flags::NoEval)) {
|
|
// bypass NOOPT(...) for testing
|
|
TRI_ASSERT(originalNode->numMembers() == 1);
|
|
auto args = originalNode->getMember(0);
|
|
|
|
if (args->numMembers() > 0) {
|
|
testNode = args->getMember(0);
|
|
}
|
|
}
|
|
|
|
if (testNode->type == NODE_TYPE_VALUE || testNode->type == NODE_TYPE_OBJECT) {
|
|
// not really usable...
|
|
continue;
|
|
}
|
|
|
|
if (testNode->type == NODE_TYPE_ARRAY &&
|
|
testNode->numMembers() < AstNode::SortNumberThreshold) {
|
|
// number of values is below threshold
|
|
continue;
|
|
}
|
|
|
|
if (testNode->isSorted()) {
|
|
// already sorted
|
|
continue;
|
|
}
|
|
|
|
originalArg = originalNode;
|
|
} else {
|
|
TRI_ASSERT(setter->getType() == EN::SUBQUERY);
|
|
auto sub = ExecutionNode::castTo<SubqueryNode*>(setter);
|
|
|
|
// estimate items in subquery
|
|
CostEstimate estimate = sub->getSubquery()->getCost();
|
|
|
|
if (estimate.estimatedNrItems < AstNode::SortNumberThreshold) {
|
|
continue;
|
|
}
|
|
|
|
originalArg = ast->createNodeReference(sub->outVariable());
|
|
}
|
|
|
|
TRI_ASSERT(originalArg != nullptr);
|
|
|
|
auto args = ast->createNodeArray();
|
|
args->addMember(originalArg);
|
|
auto sorted =
|
|
ast->createNodeFunctionCall(TRI_CHAR_LENGTH_PAIR("SORTED_UNIQUE"), args);
|
|
|
|
auto outVar = ast->variables()->createTemporaryVariable();
|
|
ExecutionNode* calculationNode = nullptr;
|
|
auto expression = new Expression(plan.get(), ast, sorted);
|
|
try {
|
|
calculationNode =
|
|
new CalculationNode(plan.get(), plan->nextId(), expression, outVar);
|
|
} catch (...) {
|
|
delete expression;
|
|
throw;
|
|
}
|
|
plan->registerNode(calculationNode);
|
|
|
|
// make the new node a parent of the original calculation node
|
|
TRI_ASSERT(setter != nullptr);
|
|
calculationNode->addDependency(setter);
|
|
auto oldParent = setter->getFirstParent();
|
|
TRI_ASSERT(oldParent != nullptr);
|
|
calculationNode->addParent(oldParent);
|
|
|
|
oldParent->removeDependencies();
|
|
oldParent->addDependency(calculationNode);
|
|
setter->setParent(calculationNode);
|
|
|
|
AstNode* clone = ast->clone(inNode);
|
|
// set sortedness bit for the IN operator
|
|
clone->setBoolValue(true);
|
|
// finally adjust the variable inside the IN calculation
|
|
clone->changeMember(1, ast->createNodeReference(outVar));
|
|
filterExpression->replaceNode(clone);
|
|
|
|
modified = true;
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief remove redundant sorts
|
|
/// this rule modifies the plan in place:
|
|
/// - sorts that are covered by earlier sorts will be removed
|
|
void arangodb::aql::removeRedundantSortsRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::SORT, true);
|
|
|
|
if (nodes.empty()) {
|
|
// quick exit
|
|
opt->addPlan(std::move(plan), rule, false);
|
|
return;
|
|
}
|
|
|
|
arangodb::HashSet<ExecutionNode*> toUnlink;
|
|
arangodb::basics::StringBuffer buffer;
|
|
|
|
for (auto const& n : nodes) {
|
|
if (toUnlink.find(n) != toUnlink.end()) {
|
|
// encountered a sort node that we already deleted
|
|
continue;
|
|
}
|
|
|
|
auto const sortNode = ExecutionNode::castTo<SortNode*>(n);
|
|
|
|
auto sortInfo = sortNode->getSortInformation(plan.get(), &buffer);
|
|
|
|
if (sortInfo.isValid && !sortInfo.criteria.empty()) {
|
|
// we found a sort that we can understand
|
|
std::vector<ExecutionNode*> stack;
|
|
|
|
sortNode->dependencies(stack);
|
|
|
|
int nodesRelyingOnSort = 0;
|
|
|
|
while (!stack.empty()) {
|
|
auto current = stack.back();
|
|
stack.pop_back();
|
|
|
|
if (current->getType() == EN::SORT) {
|
|
// we found another sort. now check if they are compatible!
|
|
|
|
auto other =
|
|
ExecutionNode::castTo<SortNode*>(current)->getSortInformation(plan.get(), &buffer);
|
|
|
|
switch (sortInfo.isCoveredBy(other)) {
|
|
case SortInformation::unequal: {
|
|
// different sort criteria
|
|
if (nodesRelyingOnSort == 0) {
|
|
// a sort directly followed by another sort: now remove one of
|
|
// them
|
|
|
|
if (!other.isDeterministic) {
|
|
// if the sort is non-deterministic, we must not remove it
|
|
break;
|
|
}
|
|
|
|
if (sortNode->isStable()) {
|
|
// we should not optimize predecessors of a stable sort (used
|
|
// in a COLLECT node)
|
|
// the stable sort is for a reason, and removing any
|
|
// predecessors sorts might
|
|
// change the result
|
|
break;
|
|
}
|
|
|
|
// remove sort that is a direct predecessor of a sort
|
|
toUnlink.emplace(current);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case SortInformation::otherLessAccurate: {
|
|
toUnlink.emplace(current);
|
|
break;
|
|
}
|
|
|
|
case SortInformation::ourselvesLessAccurate: {
|
|
// the sort at the start of the pipeline makes the sort at the end
|
|
// superfluous, so we'll remove it
|
|
toUnlink.emplace(n);
|
|
break;
|
|
}
|
|
|
|
case SortInformation::allEqual: {
|
|
// the sort at the end of the pipeline makes the sort at the start
|
|
// superfluous, so we'll remove it
|
|
toUnlink.emplace(current);
|
|
break;
|
|
}
|
|
}
|
|
} else if (current->getType() == EN::FILTER) {
|
|
// ok: a filter does not depend on sort order
|
|
} else if (current->getType() == EN::CALCULATION) {
|
|
// ok: a calculation does not depend on sort order only if it is
|
|
// deterministic
|
|
if (!current->isDeterministic()) {
|
|
++nodesRelyingOnSort;
|
|
}
|
|
} else if (current->getType() == EN::ENUMERATE_LIST ||
|
|
current->getType() == EN::ENUMERATE_COLLECTION ||
|
|
current->getType() == EN::TRAVERSAL ||
|
|
current->getType() == EN::K_SHORTEST_PATHS ||
|
|
current->getType() == EN::SHORTEST_PATH) {
|
|
// ok, but we cannot remove two different sorts if one of these node
|
|
// types is between them
|
|
// example: in the following query, the one sort will be optimized
|
|
// away:
|
|
// FOR i IN [ { a: 1 }, { a: 2 } , { a: 3 } ] SORT i.a ASC SORT i.a
|
|
// DESC RETURN i
|
|
// but in the following query, the sorts will stay:
|
|
// FOR i IN [ { a: 1 }, { a: 2 } , { a: 3 } ] SORT i.a ASC LET a =
|
|
// i.a SORT i.a DESC RETURN i
|
|
++nodesRelyingOnSort;
|
|
} else {
|
|
// abort at all other type of nodes. we cannot remove a sort beyond
|
|
// them
|
|
// this includes COLLECT and LIMIT
|
|
break;
|
|
}
|
|
|
|
if (!current->hasDependency()) {
|
|
// node either has no or more than one dependency. we don't know what
|
|
// to do and must abort
|
|
// note: this will also handle Singleton nodes
|
|
break;
|
|
}
|
|
|
|
current->dependencies(stack);
|
|
}
|
|
|
|
if (toUnlink.find(n) == toUnlink.end() && sortNode->simplify(plan.get())) {
|
|
// sort node had only constant expressions. it will make no difference
|
|
// if we execute it or not
|
|
// so we can remove it
|
|
toUnlink.emplace(n);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, !toUnlink.empty());
|
|
}
|
|
|
|
/// @brief remove all unnecessary filters
|
|
/// this rule modifies the plan in place:
|
|
/// - filters that are always true are removed completely
|
|
/// - filters that are always false will be replaced by a NoResults node
|
|
void arangodb::aql::removeUnnecessaryFiltersRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::FILTER, true);
|
|
|
|
bool modified = false;
|
|
arangodb::HashSet<ExecutionNode*> toUnlink;
|
|
|
|
for (auto const& n : nodes) {
|
|
// now check who introduced our variable
|
|
auto variable = ExecutionNode::castTo<FilterNode const*>(n)->inVariable();
|
|
auto setter = plan->getVarSetBy(variable->id);
|
|
|
|
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
|
|
// filter variable was not introduced by a calculation.
|
|
continue;
|
|
}
|
|
|
|
// filter variable was introduced a CalculationNode. now check the
|
|
// expression
|
|
auto s = ExecutionNode::castTo<CalculationNode*>(setter);
|
|
auto root = s->expression()->node();
|
|
|
|
TRI_ASSERT(root != nullptr);
|
|
|
|
if (!root->isDeterministic()) {
|
|
// we better not tamper with this filter
|
|
continue;
|
|
}
|
|
|
|
// filter expression is constant and thus cannot throw
|
|
// we can now evaluate it safely
|
|
|
|
if (root->isTrue()) {
|
|
// filter is always true
|
|
// remove filter node and merge with following node
|
|
toUnlink.emplace(n);
|
|
modified = true;
|
|
} else if (root->isFalse()) {
|
|
// filter is always false
|
|
// now insert a NoResults node below it
|
|
auto noResults = new NoResultsNode(plan.get(), plan->nextId());
|
|
plan->registerNode(noResults);
|
|
plan->replaceNode(n, noResults);
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
if (!toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief remove INTO of a COLLECT if not used
|
|
/// additionally remove all unused aggregate calculations from a COLLECT
|
|
void arangodb::aql::removeCollectVariablesRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::COLLECT, true);
|
|
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto collectNode = ExecutionNode::castTo<CollectNode*>(n);
|
|
TRI_ASSERT(collectNode != nullptr);
|
|
|
|
auto const& varsUsedLater = n->getVarsUsedLater();
|
|
auto outVariable = collectNode->outVariable();
|
|
|
|
if (outVariable != nullptr &&
|
|
varsUsedLater.find(outVariable) == varsUsedLater.end()) {
|
|
// outVariable not used later
|
|
if (!collectNode->count()) {
|
|
collectNode->clearOutVariable();
|
|
}
|
|
modified = true;
|
|
} else if (outVariable != nullptr && !collectNode->count() &&
|
|
!collectNode->hasExpressionVariable() && !collectNode->hasKeepVariables()) {
|
|
// outVariable used later, no count, no INTO expression, no KEEP
|
|
// e.g. COLLECT something INTO g
|
|
// we will now check how many part of "g" are used later
|
|
std::unordered_set<std::string> keepAttributes;
|
|
|
|
bool stop = false;
|
|
auto p = collectNode->getFirstParent();
|
|
while (p != nullptr) {
|
|
if (p->getType() == EN::CALCULATION) {
|
|
auto cc = ExecutionNode::castTo<CalculationNode const*>(p);
|
|
Expression const* exp = cc->expression();
|
|
if (exp != nullptr && exp->node() != nullptr) {
|
|
bool isSafeForOptimization;
|
|
auto usedThere =
|
|
Ast::getReferencedAttributesForKeep(exp->node(), outVariable,
|
|
isSafeForOptimization);
|
|
if (isSafeForOptimization) {
|
|
for (auto const& it : usedThere) {
|
|
keepAttributes.emplace(it);
|
|
}
|
|
} else {
|
|
stop = true;
|
|
}
|
|
}
|
|
}
|
|
if (stop) {
|
|
break;
|
|
}
|
|
p = p->getFirstParent();
|
|
}
|
|
|
|
if (!stop) {
|
|
std::vector<Variable const*> keepVariables;
|
|
// we are allowed to do the optimization
|
|
auto current = n->getFirstDependency();
|
|
while (current != nullptr) {
|
|
for (auto const& var : current->getVariablesSetHere()) {
|
|
for (auto it = keepAttributes.begin(); it != keepAttributes.end();
|
|
/* no hoisting */) {
|
|
if ((*it) == var->name) {
|
|
keepVariables.emplace_back(var);
|
|
it = keepAttributes.erase(it);
|
|
} else {
|
|
++it;
|
|
}
|
|
}
|
|
}
|
|
if (keepAttributes.empty()) {
|
|
// done
|
|
break;
|
|
}
|
|
current = current->getFirstDependency();
|
|
}
|
|
|
|
if (keepAttributes.empty() && !keepVariables.empty()) {
|
|
collectNode->setKeepVariables(std::move(keepVariables));
|
|
modified = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
collectNode->clearAggregates(
|
|
[&varsUsedLater, &modified](
|
|
std::pair<Variable const*, std::pair<Variable const*, std::string>> const& aggregate) -> bool {
|
|
if (varsUsedLater.find(aggregate.first) == varsUsedLater.end()) {
|
|
// result of aggregate function not used later
|
|
modified = true;
|
|
return true;
|
|
}
|
|
return false;
|
|
});
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
class PropagateConstantAttributesHelper {
|
|
public:
|
|
explicit PropagateConstantAttributesHelper(ExecutionPlan* plan)
|
|
: _plan(plan), _modified(false) {}
|
|
|
|
bool modified() const { return _modified; }
|
|
|
|
/// @brief inspects a plan and propages constant values in expressions
|
|
void propagateConstants() {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
_plan->findNodesOfType(nodes, EN::FILTER, true);
|
|
|
|
for (auto const& node : nodes) {
|
|
auto fn = ExecutionNode::castTo<FilterNode const*>(node);
|
|
auto setter = _plan->getVarSetBy(fn->inVariable()->id);
|
|
if (setter != nullptr && setter->getType() == EN::CALCULATION) {
|
|
auto cn = ExecutionNode::castTo<CalculationNode*>(setter);
|
|
auto expression = cn->expression();
|
|
|
|
if (expression != nullptr) {
|
|
collectConstantAttributes(const_cast<AstNode*>(expression->node()));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!_constants.empty()) {
|
|
for (auto const& node : nodes) {
|
|
auto fn = ExecutionNode::castTo<FilterNode const*>(node);
|
|
auto setter = _plan->getVarSetBy(fn->inVariable()->id);
|
|
if (setter != nullptr && setter->getType() == EN::CALCULATION) {
|
|
auto cn = ExecutionNode::castTo<CalculationNode*>(setter);
|
|
auto expression = cn->expression();
|
|
|
|
if (expression != nullptr) {
|
|
insertConstantAttributes(const_cast<AstNode*>(expression->node()));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private:
|
|
AstNode const* getConstant(Variable const* variable, std::string const& attribute) const {
|
|
auto it = _constants.find(variable);
|
|
|
|
if (it == _constants.end()) {
|
|
return nullptr;
|
|
}
|
|
|
|
auto it2 = (*it).second.find(attribute);
|
|
|
|
if (it2 == (*it).second.end()) {
|
|
return nullptr;
|
|
}
|
|
|
|
return (*it2).second;
|
|
}
|
|
|
|
/// @brief inspects an expression (recursively) and notes constant attribute
|
|
/// values so they can be propagated later
|
|
void collectConstantAttributes(AstNode* node) {
|
|
if (node == nullptr) {
|
|
return;
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
collectConstantAttributes(lhs);
|
|
collectConstantAttributes(rhs);
|
|
} else if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
if (lhs->isConstant() && rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
inspectConstantAttribute(rhs, lhs);
|
|
} else if (rhs->isConstant() && lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
inspectConstantAttribute(lhs, rhs);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// @brief traverses an AST part recursively and patches it by inserting
|
|
/// constant values
|
|
void insertConstantAttributes(AstNode* node) {
|
|
if (node == nullptr) {
|
|
return;
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
insertConstantAttributes(lhs);
|
|
insertConstantAttributes(rhs);
|
|
} else if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
if (!lhs->isConstant() && rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
insertConstantAttribute(node, 1);
|
|
}
|
|
if (!rhs->isConstant() && lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
insertConstantAttribute(node, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// @brief extract an attribute and its variable from an attribute access
|
|
/// (e.g. `a.b.c` will return variable `a` and attribute name `b.c.`.
|
|
bool getAttribute(AstNode const* attribute, Variable const*& variable, std::string& name) {
|
|
TRI_ASSERT(attribute != nullptr && attribute->type == NODE_TYPE_ATTRIBUTE_ACCESS);
|
|
TRI_ASSERT(name.empty());
|
|
|
|
while (attribute->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
name = std::string(".") + attribute->getString() + name;
|
|
attribute = attribute->getMember(0);
|
|
}
|
|
|
|
if (attribute->type != NODE_TYPE_REFERENCE) {
|
|
return false;
|
|
}
|
|
|
|
variable = static_cast<Variable const*>(attribute->getData());
|
|
TRI_ASSERT(variable != nullptr);
|
|
|
|
return true;
|
|
}
|
|
|
|
/// @brief inspect the constant value assigned to an attribute
|
|
/// the attribute value will be stored so it can be inserted for the attribute
|
|
/// later
|
|
void inspectConstantAttribute(AstNode const* attribute, AstNode const* value) {
|
|
Variable const* variable = nullptr;
|
|
std::string name;
|
|
|
|
if (!getAttribute(attribute, variable, name)) {
|
|
return;
|
|
}
|
|
|
|
auto it = _constants.find(variable);
|
|
|
|
if (it == _constants.end()) {
|
|
_constants.emplace(variable,
|
|
std::unordered_map<std::string, AstNode const*>{{name, value}});
|
|
return;
|
|
}
|
|
|
|
auto it2 = (*it).second.find(name);
|
|
|
|
if (it2 == (*it).second.end()) {
|
|
// first value for the attribute
|
|
(*it).second.emplace(name, value);
|
|
} else {
|
|
auto previous = (*it2).second;
|
|
|
|
if (previous == nullptr) {
|
|
// we have multiple different values for the attribute. better not use
|
|
// this attribute
|
|
return;
|
|
}
|
|
|
|
if (!value->computeValue().equals(previous->computeValue())) {
|
|
// different value found for an already tracked attribute. better not
|
|
// use this attribute
|
|
(*it2).second = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// @brief patches an AstNode by inserting a constant value into it
|
|
void insertConstantAttribute(AstNode* parentNode, size_t accessIndex) {
|
|
Variable const* variable = nullptr;
|
|
std::string name;
|
|
|
|
AstNode* member = parentNode->getMember(accessIndex);
|
|
|
|
if (!getAttribute(member, variable, name)) {
|
|
return;
|
|
}
|
|
|
|
auto constantValue = getConstant(variable, name);
|
|
|
|
if (constantValue != nullptr) {
|
|
// first check if we would optimize away a join condition that uses a smartJoinAttribute...
|
|
// we must not do that, because that would otherwise disable smart join functionality
|
|
if (arangodb::ServerState::instance()->isCoordinator() &&
|
|
parentNode->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
AstNode const* current = parentNode->getMember(accessIndex == 0 ? 1 : 0);
|
|
if (current->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
AstNode const* nameAttribute = current;
|
|
current = current->getMember(0);
|
|
if (current->type == NODE_TYPE_REFERENCE) {
|
|
auto setter = _plan->getVarSetBy(static_cast<Variable const*>(current->getData())->id);
|
|
if (setter != nullptr &&
|
|
(setter->getType() == EN::ENUMERATE_COLLECTION || setter->getType() == EN::INDEX)) {
|
|
auto collection = ::getCollection(setter);
|
|
if (collection != nullptr) {
|
|
auto logical = collection->getCollection();
|
|
if (logical->hasSmartJoinAttribute() &&
|
|
logical->smartJoinAttribute() == nameAttribute->getString()) {
|
|
// don't remove a smart join attribute access!
|
|
return;
|
|
} else {
|
|
std::vector<std::string> shardKeys = collection->shardKeys(true);
|
|
if (std::find(shardKeys.begin(), shardKeys.end(), nameAttribute->getString()) != shardKeys.end()) {
|
|
// don't remove equality lookups on shard keys, as this may prevent
|
|
// the restrict-to-single-shard rule from being applied later!
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
parentNode->changeMember(accessIndex, const_cast<AstNode*>(constantValue));
|
|
_modified = true;
|
|
}
|
|
}
|
|
|
|
ExecutionPlan* _plan;
|
|
std::unordered_map<Variable const*, std::unordered_map<std::string, AstNode const*>> _constants;
|
|
bool _modified;
|
|
};
|
|
|
|
/// @brief propagate constant attributes in FILTERs
|
|
void arangodb::aql::propagateConstantAttributesRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
PropagateConstantAttributesHelper helper(plan.get());
|
|
helper.propagateConstants();
|
|
|
|
opt->addPlan(std::move(plan), rule, helper.modified());
|
|
}
|
|
|
|
/// @brief move calculations up in the plan
|
|
/// this rule modifies the plan in place
|
|
/// it aims to move up calculations as far up in the plan as possible, to
|
|
/// avoid redundant calculations in inner loops
|
|
void arangodb::aql::moveCalculationsUpRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::CALCULATION, true);
|
|
|
|
bool modified = false;
|
|
arangodb::HashSet<Variable const*> neededVars;
|
|
arangodb::HashSet<Variable const*> vars;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto nn = ExecutionNode::castTo<CalculationNode*>(n);
|
|
|
|
if (!nn->expression()->isDeterministic()) {
|
|
// we will only move expressions up that cannot throw and that are
|
|
// deterministic
|
|
continue;
|
|
}
|
|
|
|
neededVars.clear();
|
|
n->getVariablesUsedHere(neededVars);
|
|
|
|
auto current = n->getFirstDependency();
|
|
|
|
while (current != nullptr) {
|
|
auto dep = current->getFirstDependency();
|
|
|
|
if (dep == nullptr) {
|
|
// node either has no or more than one dependency. we don't know what to
|
|
// do and must abort
|
|
// note: this will also handle Singleton nodes
|
|
break;
|
|
}
|
|
|
|
if (current->setsVariable(neededVars)) {
|
|
// shared variable, cannot move up any more
|
|
// done with optimizing this calculation node
|
|
break;
|
|
}
|
|
|
|
if (current->getType() == EN::LIMIT) {
|
|
if (!arangodb::ServerState::instance()->isCoordinator()) {
|
|
// do not move calculations beyond a LIMIT on a single server,
|
|
// as this would mean carrying out potentially unnecessary
|
|
// calculations
|
|
break;
|
|
}
|
|
|
|
// coordinator case
|
|
// now check if the calculation uses data from any collection. if so,
|
|
// we expect that it is cheaper to execute the calculation close to the
|
|
// origin of data (e.g. IndexNode, EnumerateCollectionNode) on a DB
|
|
// server than on a coordinator. though executing the calculation will
|
|
// have the same costs on DB server and coordinator, the assumption is
|
|
// that we can reduce the amount of data we need to transfer between the
|
|
// two if we can execute the calculation on the DB server and only
|
|
// transfer the calculation result to the coordinator instead of the
|
|
// full documents
|
|
|
|
if (!::accessesCollectionVariable(plan.get(), nn, vars)) {
|
|
// not accessing any collection data
|
|
break;
|
|
}
|
|
// accessing collection data.
|
|
// allow the calculation to be moved beyond the LIMIT,
|
|
// in the hope that this reduces the amount of data we have
|
|
// to transfer between the DB server and the coordinator
|
|
}
|
|
|
|
// first, unlink the calculation from the plan
|
|
plan->unlinkNode(n);
|
|
|
|
// and re-insert into before the current node
|
|
plan->insertDependency(current, n);
|
|
|
|
modified = true;
|
|
current = dep;
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief move calculations down in the plan
|
|
/// this rule modifies the plan in place
|
|
/// it aims to move calculations as far down in the plan as possible, beyond
|
|
/// FILTER and LIMIT operations
|
|
void arangodb::aql::moveCalculationsDownRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::CALCULATION, true);
|
|
|
|
std::vector<ExecutionNode*> stack;
|
|
arangodb::HashSet<Variable const*> vars;
|
|
arangodb::HashSet<Variable const*> usedHere;
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto nn = ExecutionNode::castTo<CalculationNode*>(n);
|
|
if (!nn->expression()->isDeterministic()) {
|
|
// we will only move expressions down that cannot throw and that are
|
|
// deterministic
|
|
continue;
|
|
}
|
|
|
|
// this is the variable that the calculation will set
|
|
auto variable = nn->outVariable();
|
|
|
|
stack.clear();
|
|
n->parents(stack);
|
|
|
|
ExecutionNode* lastNode = nullptr;
|
|
|
|
while (!stack.empty()) {
|
|
auto current = stack.back();
|
|
stack.pop_back();
|
|
|
|
bool done = false;
|
|
|
|
usedHere.clear();
|
|
current->getVariablesUsedHere(usedHere);
|
|
for (auto const& v : usedHere) {
|
|
if (v == variable) {
|
|
// the node we're looking at needs the variable we're setting.
|
|
// can't push further!
|
|
done = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (done) {
|
|
// done with optimizing this calculation node
|
|
break;
|
|
}
|
|
|
|
auto const currentType = current->getType();
|
|
|
|
if (currentType == EN::FILTER || currentType == EN::SORT ||
|
|
currentType == EN::LIMIT || currentType == EN::SUBQUERY) {
|
|
// we found something interesting that justifies moving our node down
|
|
if (currentType == EN::LIMIT && arangodb::ServerState::instance()->isCoordinator()) {
|
|
// in a cluster, we do not want to move the calculations as far down
|
|
// as possible, because this will mean we may need to transfer a lot
|
|
// more data between DB servers and the coordinator
|
|
|
|
// assume first that we want to move the node past the LIMIT
|
|
|
|
// however, if our calculation uses any data from a
|
|
// collection/index/view, it probably makes sense to not move it,
|
|
// because the result set may be huge
|
|
if (::accessesCollectionVariable(plan.get(), nn, vars)) {
|
|
done = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!done) {
|
|
lastNode = current;
|
|
}
|
|
} else if (currentType == EN::INDEX || currentType == EN::ENUMERATE_COLLECTION ||
|
|
currentType == EN::ENUMERATE_IRESEARCH_VIEW ||
|
|
currentType == EN::ENUMERATE_LIST ||
|
|
currentType == EN::TRAVERSAL ||
|
|
currentType == EN::SHORTEST_PATH ||
|
|
currentType == EN::K_SHORTEST_PATHS ||
|
|
currentType == EN::COLLECT || currentType == EN::NORESULTS) {
|
|
// we will not push further down than such nodes
|
|
done = true;
|
|
break;
|
|
}
|
|
|
|
if (done || !current->hasParent()) {
|
|
break;
|
|
}
|
|
|
|
current->parents(stack);
|
|
}
|
|
|
|
if (lastNode != nullptr && lastNode->getFirstParent() != nullptr) {
|
|
// first, unlink the calculation from the plan
|
|
plan->unlinkNode(n);
|
|
|
|
// and re-insert into after the last "good" node
|
|
plan->insertDependency(lastNode->getFirstParent(), n);
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief determine the "right" type of CollectNode and
|
|
/// add a sort node for each COLLECT (note: the sort may be removed later)
|
|
/// this rule cannot be turned off (otherwise, the query result might be wrong!)
|
|
void arangodb::aql::specializeCollectRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::COLLECT, true);
|
|
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto collectNode = ExecutionNode::castTo<CollectNode*>(n);
|
|
|
|
if (collectNode->isSpecialized()) {
|
|
// already specialized this node
|
|
continue;
|
|
}
|
|
|
|
auto const& groupVariables = collectNode->groupVariables();
|
|
|
|
// test if we can use an alternative version of COLLECT with a hash table
|
|
bool const canUseHashAggregation =
|
|
(!groupVariables.empty() &&
|
|
(!collectNode->hasOutVariable() || collectNode->count()) &&
|
|
collectNode->getOptions().canUseMethod(CollectOptions::CollectMethod::HASH));
|
|
|
|
if (canUseHashAggregation && !opt->runOnlyRequiredRules(1)) {
|
|
if (collectNode->getOptions().shouldUseMethod(CollectOptions::CollectMethod::HASH)) {
|
|
// user has explicitly asked for hash method
|
|
// specialize existing the CollectNode so it will become a
|
|
// HashedCollectBlock later. additionally, add a SortNode BEHIND the
|
|
// CollectNode (to sort the final result)
|
|
collectNode->aggregationMethod(CollectOptions::CollectMethod::HASH);
|
|
collectNode->specialized();
|
|
|
|
if (!collectNode->isDistinctCommand()) {
|
|
// add the post-SORT
|
|
SortElementVector sortElements;
|
|
for (auto const& v : collectNode->groupVariables()) {
|
|
sortElements.emplace_back(v.first, true);
|
|
}
|
|
|
|
auto sortNode = new SortNode(plan.get(), plan->nextId(), sortElements, false);
|
|
plan->registerNode(sortNode);
|
|
|
|
TRI_ASSERT(collectNode->hasParent());
|
|
auto parent = collectNode->getFirstParent();
|
|
TRI_ASSERT(parent != nullptr);
|
|
|
|
sortNode->addDependency(collectNode);
|
|
parent->replaceDependency(collectNode, sortNode);
|
|
}
|
|
|
|
modified = true;
|
|
continue;
|
|
}
|
|
|
|
// create a new plan with the adjusted COLLECT node
|
|
std::unique_ptr<ExecutionPlan> newPlan(plan->clone());
|
|
|
|
// use the cloned COLLECT node
|
|
auto newCollectNode =
|
|
ExecutionNode::castTo<CollectNode*>(newPlan->getNodeById(collectNode->id()));
|
|
TRI_ASSERT(newCollectNode != nullptr);
|
|
|
|
// specialize the CollectNode so it will become a HashedCollectBlock
|
|
// later
|
|
// additionally, add a SortNode BEHIND the CollectNode (to sort the
|
|
// final result)
|
|
newCollectNode->aggregationMethod(CollectOptions::CollectMethod::HASH);
|
|
newCollectNode->specialized();
|
|
|
|
if (!collectNode->isDistinctCommand()) {
|
|
// add the post-SORT
|
|
SortElementVector sortElements;
|
|
for (auto const& v : newCollectNode->groupVariables()) {
|
|
sortElements.emplace_back(v.first, true);
|
|
}
|
|
|
|
auto sortNode =
|
|
new SortNode(newPlan.get(), newPlan->nextId(), sortElements, false);
|
|
newPlan->registerNode(sortNode);
|
|
|
|
TRI_ASSERT(newCollectNode->hasParent());
|
|
auto parent = newCollectNode->getFirstParent();
|
|
TRI_ASSERT(parent != nullptr);
|
|
|
|
sortNode->addDependency(newCollectNode);
|
|
parent->replaceDependency(newCollectNode, sortNode);
|
|
}
|
|
|
|
if (nodes.size() > 1) {
|
|
// this will tell the optimizer to optimize the cloned plan with this
|
|
// specific rule again
|
|
opt->addPlan(std::move(newPlan), rule, true, static_cast<int>(rule->level - 1));
|
|
} else {
|
|
// no need to run this specific rule again on the cloned plan
|
|
opt->addPlan(std::move(newPlan), rule, true);
|
|
}
|
|
} else if (groupVariables.empty() &&
|
|
collectNode->aggregateVariables().empty() && collectNode->count()) {
|
|
collectNode->aggregationMethod(CollectOptions::CollectMethod::COUNT);
|
|
collectNode->specialized();
|
|
modified = true;
|
|
continue;
|
|
}
|
|
|
|
// mark node as specialized, so we do not process it again
|
|
collectNode->specialized();
|
|
|
|
// finally, adjust the original plan and create a sorted version of COLLECT
|
|
|
|
// specialize the CollectNode so it will become a SortedCollectBlock
|
|
// later
|
|
collectNode->aggregationMethod(CollectOptions::CollectMethod::SORTED);
|
|
|
|
// insert a SortNode IN FRONT OF the CollectNode
|
|
if (!groupVariables.empty()) {
|
|
SortElementVector sortElements;
|
|
for (auto const& v : groupVariables) {
|
|
sortElements.emplace_back(v.second, true);
|
|
}
|
|
|
|
auto sortNode = new SortNode(plan.get(), plan->nextId(), sortElements, true);
|
|
plan->registerNode(sortNode);
|
|
|
|
TRI_ASSERT(collectNode->hasDependency());
|
|
auto dep = collectNode->getFirstDependency();
|
|
TRI_ASSERT(dep != nullptr);
|
|
sortNode->addDependency(dep);
|
|
collectNode->replaceDependency(dep, sortNode);
|
|
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief split and-combined filters and break them into smaller parts
|
|
void arangodb::aql::splitFiltersRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::FILTER, true);
|
|
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto fn = ExecutionNode::castTo<FilterNode const*>(n);
|
|
auto setter = plan->getVarSetBy(fn->inVariable()->id);
|
|
|
|
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
|
|
continue;
|
|
}
|
|
|
|
auto cn = ExecutionNode::castTo<CalculationNode*>(setter);
|
|
auto const expression = cn->expression();
|
|
|
|
if (!expression->isDeterministic() ||
|
|
expression->node()->type != NODE_TYPE_OPERATOR_BINARY_AND) {
|
|
continue;
|
|
}
|
|
|
|
std::vector<AstNode*> stack{expression->nodeForModification()};
|
|
|
|
while (!stack.empty()) {
|
|
auto current = stack.back();
|
|
stack.pop_back();
|
|
|
|
if (current->type == NODE_TYPE_OPERATOR_BINARY_AND) {
|
|
stack.emplace_back(current->getMember(0));
|
|
stack.emplace_back(current->getMember(1));
|
|
} else {
|
|
modified = true;
|
|
|
|
ExecutionNode* calculationNode = nullptr;
|
|
auto outVar = plan->getAst()->variables()->createTemporaryVariable();
|
|
auto expression = new Expression(plan.get(), plan->getAst(), current);
|
|
try {
|
|
calculationNode =
|
|
new CalculationNode(plan.get(), plan->nextId(), expression, outVar);
|
|
} catch (...) {
|
|
delete expression;
|
|
throw;
|
|
}
|
|
plan->registerNode(calculationNode);
|
|
|
|
plan->insertDependency(n, calculationNode);
|
|
|
|
auto filterNode = new FilterNode(plan.get(), plan->nextId(), outVar);
|
|
plan->registerNode(filterNode);
|
|
|
|
plan->insertDependency(n, filterNode);
|
|
}
|
|
}
|
|
|
|
if (modified) {
|
|
plan->unlinkNode(n, false);
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief move filters up in the plan
|
|
/// this rule modifies the plan in place
|
|
/// filters are moved as far up in the plan as possible to make result sets
|
|
/// as small as possible as early as possible
|
|
/// filters are not pushed beyond limits
|
|
void arangodb::aql::moveFiltersUpRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::FILTER, true);
|
|
|
|
std::vector<ExecutionNode*> stack;
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto fn = ExecutionNode::castTo<FilterNode const*>(n);
|
|
auto inVar = fn->inVariable();
|
|
|
|
stack.clear();
|
|
n->dependencies(stack);
|
|
|
|
while (!stack.empty()) {
|
|
auto current = stack.back();
|
|
stack.pop_back();
|
|
|
|
if (current->getType() == EN::LIMIT) {
|
|
// cannot push a filter beyond a LIMIT node
|
|
break;
|
|
}
|
|
|
|
if (!current->isDeterministic()) {
|
|
// TODO: validate if this is actually necessary
|
|
// must not move a filter beyond a node that is non-deterministic
|
|
break;
|
|
}
|
|
|
|
if (current->isModificationNode()) {
|
|
// must not move a filter beyond a modification node
|
|
break;
|
|
}
|
|
|
|
if (current->getType() == EN::CALCULATION) {
|
|
// must not move a filter beyond a node with a non-deterministic result
|
|
auto calculation = ExecutionNode::castTo<CalculationNode const*>(current);
|
|
if (!calculation->expression()->isDeterministic()) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool found = false;
|
|
|
|
for (auto const& v : current->getVariablesSetHere()) {
|
|
if (inVar == v) {
|
|
// shared variable, cannot move up any more
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (found) {
|
|
// done with optimizing this calculation node
|
|
break;
|
|
}
|
|
|
|
if (!current->hasDependency()) {
|
|
// node either has no or more than one dependency. we don't know what to
|
|
// do and must abort
|
|
// note: this will also handle Singleton nodes
|
|
break;
|
|
}
|
|
|
|
current->dependencies(stack);
|
|
|
|
// first, unlink the filter from the plan
|
|
plan->unlinkNode(n);
|
|
// and re-insert into plan in front of the current node
|
|
plan->insertDependency(current, n);
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
class arangodb::aql::RedundantCalculationsReplacer final
|
|
: public WalkerWorker<ExecutionNode> {
|
|
public:
|
|
explicit RedundantCalculationsReplacer(
|
|
Ast* ast, std::unordered_map<VariableId, Variable const*> const& replacements)
|
|
: _ast(ast), _replacements(replacements) {}
|
|
|
|
template <typename T>
|
|
void replaceStartTargetVariables(ExecutionNode* en) {
|
|
auto node = static_cast<T*>(en);
|
|
if (node->_inStartVariable != nullptr) {
|
|
node->_inStartVariable = Variable::replace(node->_inStartVariable, _replacements);
|
|
}
|
|
if (node->_inTargetVariable != nullptr) {
|
|
node->_inTargetVariable = Variable::replace(node->_inTargetVariable, _replacements);
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
void replaceInVariable(ExecutionNode* en) {
|
|
auto node = ExecutionNode::castTo<T*>(en);
|
|
node->_inVariable = Variable::replace(node->_inVariable, _replacements);
|
|
}
|
|
|
|
void replaceInCalculation(ExecutionNode* en) {
|
|
auto node = ExecutionNode::castTo<CalculationNode*>(en);
|
|
arangodb::HashSet<Variable const*> variables;
|
|
node->expression()->variables(variables);
|
|
|
|
// check if the calculation uses any of the variables that we want to
|
|
// replace
|
|
for (auto const& it : variables) {
|
|
if (_replacements.find(it->id) != _replacements.end()) {
|
|
// calculation uses a to-be-replaced variable
|
|
node->expression()->replaceVariables(_replacements);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void replaceInView(ExecutionNode* en) {
|
|
auto view = ExecutionNode::castTo<arangodb::iresearch::IResearchViewNode*>(en);
|
|
if (view->filterConditionIsEmpty()) {
|
|
// nothing to do
|
|
return;
|
|
}
|
|
AstNode const& search = view->filterCondition();
|
|
arangodb::HashSet<Variable const*> variables;
|
|
Ast::getReferencedVariables(&search, variables);
|
|
|
|
// check if the search condition uses any of the variables that we want to
|
|
// replace
|
|
AstNode* cloned = nullptr;
|
|
for (auto const& it : variables) {
|
|
if (_replacements.find(it->id) != _replacements.end()) {
|
|
if (cloned == nullptr) {
|
|
// only clone the original search condition once
|
|
cloned = _ast->clone(&search);
|
|
}
|
|
// calculation uses a to-be-replaced variable
|
|
_ast->replaceVariables(cloned, _replacements);
|
|
}
|
|
}
|
|
|
|
if (cloned != nullptr) {
|
|
// exchange the filter condition
|
|
view->filterCondition(cloned);
|
|
}
|
|
}
|
|
|
|
bool before(ExecutionNode* en) override final {
|
|
switch (en->getType()) {
|
|
case EN::ENUMERATE_LIST: {
|
|
replaceInVariable<EnumerateListNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::ENUMERATE_IRESEARCH_VIEW: {
|
|
replaceInView(en);
|
|
break;
|
|
}
|
|
|
|
case EN::RETURN: {
|
|
replaceInVariable<ReturnNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::CALCULATION: {
|
|
replaceInCalculation(en);
|
|
break;
|
|
}
|
|
|
|
case EN::FILTER: {
|
|
replaceInVariable<FilterNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::TRAVERSAL: {
|
|
replaceInVariable<TraversalNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::K_SHORTEST_PATHS: {
|
|
replaceStartTargetVariables<KShortestPathsNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::SHORTEST_PATH: {
|
|
replaceStartTargetVariables<ShortestPathNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::COLLECT: {
|
|
auto node = ExecutionNode::castTo<CollectNode*>(en);
|
|
for (auto& variable : node->_groupVariables) {
|
|
variable.second = Variable::replace(variable.second, _replacements);
|
|
}
|
|
for (auto& variable : node->_keepVariables) {
|
|
auto old = variable;
|
|
variable = Variable::replace(old, _replacements);
|
|
}
|
|
for (auto& variable : node->_aggregateVariables) {
|
|
variable.second.first = Variable::replace(variable.second.first, _replacements);
|
|
}
|
|
if (node->_expressionVariable != nullptr) {
|
|
node->_expressionVariable =
|
|
Variable::replace(node->_expressionVariable, _replacements);
|
|
}
|
|
for (auto const& it : _replacements) {
|
|
node->_variableMap.emplace(it.second->id, it.second->name);
|
|
}
|
|
// node->_keepVariables does not need to be updated at the moment as the
|
|
// "remove-redundant-calculations" rule will stop when it finds a
|
|
// COLLECT with an INTO, and the "inline-subqueries" rule will abort
|
|
// there as well
|
|
break;
|
|
}
|
|
|
|
case EN::SORT: {
|
|
auto node = ExecutionNode::castTo<SortNode*>(en);
|
|
for (auto& variable : node->_elements) {
|
|
variable.var = Variable::replace(variable.var, _replacements);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case EN::GATHER: {
|
|
auto node = ExecutionNode::castTo<GatherNode*>(en);
|
|
for (auto& variable : node->_elements) {
|
|
auto v = Variable::replace(variable.var, _replacements);
|
|
if (v != variable.var) {
|
|
variable.var = v;
|
|
}
|
|
variable.attributePath.clear();
|
|
}
|
|
break;
|
|
}
|
|
|
|
case EN::DISTRIBUTE: {
|
|
auto node = ExecutionNode::castTo<DistributeNode*>(en);
|
|
node->_variable = Variable::replace(node->_variable, _replacements);
|
|
node->_alternativeVariable =
|
|
Variable::replace(node->_alternativeVariable, _replacements);
|
|
break;
|
|
}
|
|
|
|
case EN::REMOVE: {
|
|
replaceInVariable<RemoveNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::INSERT: {
|
|
replaceInVariable<InsertNode>(en);
|
|
break;
|
|
}
|
|
|
|
case EN::UPSERT: {
|
|
auto node = ExecutionNode::castTo<UpsertNode*>(en);
|
|
|
|
if (node->_inDocVariable != nullptr) {
|
|
node->_inDocVariable = Variable::replace(node->_inDocVariable, _replacements);
|
|
}
|
|
if (node->_insertVariable != nullptr) {
|
|
node->_insertVariable = Variable::replace(node->_insertVariable, _replacements);
|
|
}
|
|
if (node->_updateVariable != nullptr) {
|
|
node->_updateVariable = Variable::replace(node->_updateVariable, _replacements);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case EN::UPDATE: {
|
|
auto node = ExecutionNode::castTo<UpdateNode*>(en);
|
|
|
|
if (node->_inDocVariable != nullptr) {
|
|
node->_inDocVariable = Variable::replace(node->_inDocVariable, _replacements);
|
|
}
|
|
if (node->_inKeyVariable != nullptr) {
|
|
node->_inKeyVariable = Variable::replace(node->_inKeyVariable, _replacements);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case EN::REPLACE: {
|
|
auto node = ExecutionNode::castTo<ReplaceNode*>(en);
|
|
|
|
if (node->_inDocVariable != nullptr) {
|
|
node->_inDocVariable = Variable::replace(node->_inDocVariable, _replacements);
|
|
}
|
|
if (node->_inKeyVariable != nullptr) {
|
|
node->_inKeyVariable = Variable::replace(node->_inKeyVariable, _replacements);
|
|
}
|
|
break;
|
|
}
|
|
|
|
#if 0
|
|
// TODO: figure out if this does any harm
|
|
case EN::REMOTESINGLE: {
|
|
replaceInVariable<SingleRemoteOperationNode>(en);
|
|
break;
|
|
}
|
|
#endif
|
|
default: {
|
|
// ignore all other types of nodes
|
|
}
|
|
}
|
|
|
|
// always continue
|
|
return false;
|
|
}
|
|
|
|
private:
|
|
Ast* _ast;
|
|
std::unordered_map<VariableId, Variable const*> const& _replacements;
|
|
};
|
|
|
|
/// @brief simplify conditions in CalculationNodes
|
|
void arangodb::aql::simplifyConditionsRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::CALCULATION, true);
|
|
|
|
if (nodes.empty()) {
|
|
opt->addPlan(std::move(plan), rule, false);
|
|
return;
|
|
}
|
|
|
|
bool modifiedNode = false;
|
|
auto p = plan.get();
|
|
|
|
auto visitor = [p, &modifiedNode](AstNode* node) {
|
|
AstNode* original = node;
|
|
|
|
again:
|
|
if (node->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
auto const* accessed = node->getMemberUnchecked(0);
|
|
|
|
if (accessed->type == NODE_TYPE_REFERENCE) {
|
|
Variable const* v = static_cast<Variable const*>(accessed->getData());
|
|
TRI_ASSERT(v != nullptr);
|
|
|
|
auto setter = p->getVarSetBy(v->id);
|
|
|
|
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
|
|
return node;
|
|
}
|
|
|
|
accessed = ExecutionNode::castTo<CalculationNode*>(setter)->expression()->node();
|
|
if (accessed == nullptr) {
|
|
return node;
|
|
}
|
|
}
|
|
|
|
TRI_ASSERT(accessed != nullptr);
|
|
|
|
if (accessed->type == NODE_TYPE_OBJECT) {
|
|
arangodb::velocypack::StringRef const attributeName(node->getStringValue(), node->getStringLength());
|
|
bool isDynamic = false;
|
|
size_t const n = accessed->numMembers();
|
|
for (size_t i = 0; i < n; ++i) {
|
|
auto member = accessed->getMemberUnchecked(i);
|
|
|
|
if (member->type == NODE_TYPE_OBJECT_ELEMENT &&
|
|
arangodb::velocypack::StringRef(member->getStringValue(), member->getStringLength()) == attributeName) {
|
|
// found the attribute!
|
|
AstNode* next = member->getMember(0);
|
|
if (!next->isDeterministic()) {
|
|
// do not descend into non-deterministic nodes
|
|
return node;
|
|
}
|
|
// descend further
|
|
node = next;
|
|
// now try optimizing the simplified condition
|
|
// time for a goto...!
|
|
goto again;
|
|
} else if (member->type == NODE_TYPE_CALCULATED_OBJECT_ELEMENT) {
|
|
// dynamic attribute name
|
|
isDynamic = true;
|
|
}
|
|
}
|
|
|
|
// attribute not found
|
|
if (!isDynamic) {
|
|
modifiedNode = true;
|
|
return Ast::createNodeValueNull();
|
|
}
|
|
}
|
|
} else if (node->type == NODE_TYPE_INDEXED_ACCESS) {
|
|
auto const* accessed = node->getMember(0);
|
|
|
|
if (accessed->type == NODE_TYPE_REFERENCE) {
|
|
Variable const* v = static_cast<Variable const*>(accessed->getData());
|
|
TRI_ASSERT(v != nullptr);
|
|
|
|
auto setter = p->getVarSetBy(v->id);
|
|
|
|
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
|
|
return node;
|
|
}
|
|
|
|
accessed = ExecutionNode::castTo<CalculationNode*>(setter)->expression()->node();
|
|
if (accessed == nullptr) {
|
|
return node;
|
|
}
|
|
}
|
|
|
|
auto indexValue = node->getMember(1);
|
|
|
|
if (!indexValue->isConstant() ||
|
|
!(indexValue->isStringValue() || indexValue->isNumericValue())) {
|
|
// cant handle this type of index statically
|
|
return node;
|
|
}
|
|
|
|
if (accessed->type == NODE_TYPE_OBJECT) {
|
|
arangodb::velocypack::StringRef attributeName;
|
|
std::string indexString;
|
|
|
|
if (indexValue->isStringValue()) {
|
|
// string index, e.g. ['123']
|
|
attributeName =
|
|
arangodb::velocypack::StringRef(indexValue->getStringValue(), indexValue->getStringLength());
|
|
} else {
|
|
// numeric index, e.g. [123]
|
|
TRI_ASSERT(indexValue->isNumericValue());
|
|
// convert the numeric index into a string
|
|
indexString = std::to_string(indexValue->getIntValue());
|
|
attributeName = arangodb::velocypack::StringRef(indexString);
|
|
}
|
|
|
|
bool isDynamic = false;
|
|
size_t const n = accessed->numMembers();
|
|
for (size_t i = 0; i < n; ++i) {
|
|
auto member = accessed->getMemberUnchecked(i);
|
|
|
|
if (member->type == NODE_TYPE_OBJECT_ELEMENT &&
|
|
arangodb::velocypack::StringRef(member->getStringValue(), member->getStringLength()) == attributeName) {
|
|
// found the attribute!
|
|
AstNode* next = member->getMember(0);
|
|
if (!next->isDeterministic()) {
|
|
// do not descend into non-deterministic nodes
|
|
return node;
|
|
}
|
|
// descend further
|
|
node = next;
|
|
// now try optimizing the simplified condition
|
|
// time for a goto...!
|
|
goto again;
|
|
} else if (member->type == NODE_TYPE_CALCULATED_OBJECT_ELEMENT) {
|
|
// dynamic attribute name
|
|
isDynamic = true;
|
|
}
|
|
}
|
|
|
|
// attribute not found
|
|
if (!isDynamic) {
|
|
modifiedNode = true;
|
|
return Ast::createNodeValueNull();
|
|
}
|
|
} else if (accessed->type == NODE_TYPE_ARRAY) {
|
|
int64_t position;
|
|
if (indexValue->isStringValue()) {
|
|
// string index, e.g. ['123'] -> convert to a numeric index
|
|
bool valid;
|
|
position = NumberUtils::atoi<int64_t>(indexValue->getStringValue(),
|
|
indexValue->getStringValue() +
|
|
indexValue->getStringLength(),
|
|
valid);
|
|
if (!valid) {
|
|
// invalid index
|
|
modifiedNode = true;
|
|
return Ast::createNodeValueNull();
|
|
}
|
|
} else {
|
|
// numeric index, e.g. [123]
|
|
TRI_ASSERT(indexValue->isNumericValue());
|
|
position = indexValue->getIntValue();
|
|
}
|
|
int64_t const n = accessed->numMembers();
|
|
if (position < 0) {
|
|
// a negative position is allowed
|
|
position = n + position;
|
|
}
|
|
if (position >= 0 && position < n) {
|
|
AstNode* next = accessed->getMember(static_cast<size_t>(position));
|
|
if (!next->isDeterministic()) {
|
|
// do not descend into non-deterministic nodes
|
|
return node;
|
|
}
|
|
// descend further
|
|
node = next;
|
|
// now try optimizing the simplified condition
|
|
// time for a goto...!
|
|
goto again;
|
|
}
|
|
|
|
// index out of bounds
|
|
modifiedNode = true;
|
|
return Ast::createNodeValueNull();
|
|
}
|
|
}
|
|
|
|
if (node != original) {
|
|
// we come out with a different, so we changed something...
|
|
modifiedNode = true;
|
|
}
|
|
return node;
|
|
};
|
|
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto nn = ExecutionNode::castTo<CalculationNode*>(n);
|
|
|
|
if (!nn->expression()->isDeterministic()) {
|
|
// If this node is non-deterministic, we must not touch it!
|
|
continue;
|
|
}
|
|
|
|
AstNode* root = nn->expression()->nodeForModification();
|
|
|
|
if (root != nullptr) {
|
|
// reset for every round. can be modified by the visitor function!
|
|
modifiedNode = false;
|
|
AstNode* simplified = plan->getAst()->traverseAndModify(root, visitor);
|
|
if (simplified != root) {
|
|
nn->expression()->replaceNode(simplified);
|
|
}
|
|
if (modifiedNode) {
|
|
nn->expression()->invalidateAfterReplacements();
|
|
modified = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief fuse filter conditions that follow each other
|
|
void arangodb::aql::fuseFiltersRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::FILTER, true);
|
|
|
|
if (nodes.size() < 2) {
|
|
opt->addPlan(std::move(plan), rule, false);
|
|
return;
|
|
}
|
|
|
|
arangodb::HashSet<ExecutionNode*> seen;
|
|
// candidates of CalculationNode, FilterNode
|
|
std::vector<std::pair<ExecutionNode*, ExecutionNode*>> candidates;
|
|
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
if (seen.find(n) != seen.end()) {
|
|
// already processed
|
|
continue;
|
|
}
|
|
|
|
Variable const* nextExpectedVariable = nullptr;
|
|
ExecutionNode* lastFilter = nullptr;
|
|
candidates.clear();
|
|
|
|
ExecutionNode* current = n;
|
|
while (current != nullptr) {
|
|
if (current->getType() == EN::CALCULATION) {
|
|
auto cn = ExecutionNode::castTo<CalculationNode*>(current);
|
|
if (!cn->isDeterministic() || cn->outVariable() != nextExpectedVariable) {
|
|
break;
|
|
}
|
|
TRI_ASSERT(lastFilter != nullptr);
|
|
candidates.emplace_back(current, lastFilter);
|
|
nextExpectedVariable = nullptr;
|
|
} else if (current->getType() == EN::FILTER) {
|
|
seen.emplace(current);
|
|
|
|
if (nextExpectedVariable != nullptr) {
|
|
// an unexpected order of nodes
|
|
break;
|
|
}
|
|
nextExpectedVariable =
|
|
ExecutionNode::castTo<FilterNode const*>(current)->inVariable();
|
|
TRI_ASSERT(nextExpectedVariable != nullptr);
|
|
if (current->isVarUsedLater(nextExpectedVariable)) {
|
|
// filter input variable is also used for other things. we must not
|
|
// remove it or the corresponding calculation
|
|
break;
|
|
}
|
|
lastFilter = current;
|
|
} else {
|
|
// all other types of nodes we cannot optimize
|
|
break;
|
|
}
|
|
current = current->getFirstDependency();
|
|
}
|
|
|
|
if (candidates.size() >= 2) {
|
|
modified = true;
|
|
AstNode* root = ExecutionNode::castTo<CalculationNode*>(candidates[0].first)
|
|
->expression()
|
|
->nodeForModification();
|
|
for (size_t i = 1; i < candidates.size(); ++i) {
|
|
root = plan->getAst()->createNodeBinaryOperator(
|
|
NODE_TYPE_OPERATOR_BINARY_AND,
|
|
ExecutionNode::castTo<CalculationNode const*>(candidates[i].first)
|
|
->expression()
|
|
->node(),
|
|
root);
|
|
|
|
// throw away all now-unused filters and calculations
|
|
plan->unlinkNode(candidates[i - 1].second);
|
|
plan->unlinkNode(candidates[i - 1].first);
|
|
}
|
|
|
|
ExecutionNode* en = candidates.back().first;
|
|
TRI_ASSERT(en->getType() == EN::CALCULATION);
|
|
ExecutionNode::castTo<CalculationNode*>(en)->expression()->replaceNode(root);
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief remove CalculationNode(s) that are repeatedly used in a query
|
|
/// (i.e. common expressions)
|
|
void arangodb::aql::removeRedundantCalculationsRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::CALCULATION, true);
|
|
|
|
if (nodes.size() < 2) {
|
|
// quick exit
|
|
opt->addPlan(std::move(plan), rule, false);
|
|
return;
|
|
}
|
|
|
|
arangodb::basics::StringBuffer buffer;
|
|
std::unordered_map<VariableId, Variable const*> replacements;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto nn = ExecutionNode::castTo<CalculationNode*>(n);
|
|
|
|
if (!nn->expression()->isDeterministic()) {
|
|
// If this node is non-deterministic, we must not touch it!
|
|
continue;
|
|
}
|
|
|
|
arangodb::aql::Variable const* outvar = nn->outVariable();
|
|
|
|
try {
|
|
nn->expression()->stringifyIfNotTooLong(&buffer);
|
|
} catch (...) {
|
|
// expression could not be stringified (maybe because not all node types
|
|
// are supported). this is not an error, we just skip the optimization
|
|
buffer.reset();
|
|
continue;
|
|
}
|
|
|
|
std::string const referenceExpression(buffer.c_str(), buffer.length());
|
|
buffer.reset();
|
|
|
|
std::vector<ExecutionNode*> stack;
|
|
n->dependencies(stack);
|
|
|
|
while (!stack.empty()) {
|
|
auto current = stack.back();
|
|
stack.pop_back();
|
|
|
|
if (current->getType() == EN::CALCULATION) {
|
|
try {
|
|
// ExecutionNode::castTo<CalculationNode*>(current)->expression()->node()->dump(0);
|
|
ExecutionNode::castTo<CalculationNode*>(current)->expression()->stringifyIfNotTooLong(
|
|
&buffer);
|
|
} catch (...) {
|
|
// expression could not be stringified (maybe because not all node
|
|
// types are supported). this is not an error, we just skip the
|
|
// optimization
|
|
buffer.reset();
|
|
continue;
|
|
}
|
|
|
|
bool const isEqual =
|
|
(buffer.length() == referenceExpression.size() &&
|
|
memcmp(buffer.c_str(), referenceExpression.c_str(), buffer.length()) == 0);
|
|
buffer.reset();
|
|
|
|
if (isEqual) {
|
|
// expressions are identical
|
|
// check if target variable is already registered as a replacement
|
|
// this covers the following case:
|
|
// - replacements is set to B => C
|
|
// - we're now inserting a replacement A => B
|
|
// the goal now is to enter a replacement A => C instead of A => B
|
|
auto target = ExecutionNode::castTo<CalculationNode const*>(current)->outVariable();
|
|
while (target != nullptr) {
|
|
auto it = replacements.find(target->id);
|
|
|
|
if (it != replacements.end()) {
|
|
target = (*it).second;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
replacements.emplace(outvar->id, target);
|
|
|
|
// also check if the insertion enables further shortcuts
|
|
// this covers the following case:
|
|
// - replacements is set to A => B
|
|
// - we have just inserted a replacement B => C
|
|
// the goal now is to change the replacement A => B to A => C
|
|
for (auto it = replacements.begin(); it != replacements.end(); ++it) {
|
|
if ((*it).second == outvar) {
|
|
(*it).second = target;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (current->getType() == EN::COLLECT) {
|
|
if (ExecutionNode::castTo<CollectNode*>(current)->hasOutVariable()) {
|
|
// COLLECT ... INTO is evil (tm): it needs to keep all already defined
|
|
// variables
|
|
// we need to abort optimization here
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!current->hasDependency()) {
|
|
// node either has no or more than one dependency. we don't know what to
|
|
// do and must abort
|
|
// note: this will also handle Singleton nodes
|
|
break;
|
|
}
|
|
|
|
current->dependencies(stack);
|
|
}
|
|
}
|
|
|
|
if (!replacements.empty()) {
|
|
// finally replace the variables
|
|
RedundantCalculationsReplacer finder(plan->getAst(), replacements);
|
|
plan->root()->walk(finder);
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, !replacements.empty());
|
|
}
|
|
|
|
/// @brief remove CalculationNodes and SubqueryNodes that are never needed
|
|
/// this modifies an existing plan in place
|
|
void arangodb::aql::removeUnnecessaryCalculationsRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, ::removeUnnecessaryCalculationsNodeTypes, true);
|
|
|
|
arangodb::HashSet<ExecutionNode*> toUnlink;
|
|
|
|
for (auto const& n : nodes) {
|
|
arangodb::aql::Variable const* outVariable = nullptr;
|
|
|
|
if (n->getType() == EN::CALCULATION) {
|
|
auto nn = ExecutionNode::castTo<CalculationNode*>(n);
|
|
|
|
if (!nn->isDeterministic()) {
|
|
// If this node is non-deterministic, we must not optimize it away!
|
|
continue;
|
|
}
|
|
|
|
outVariable = nn->outVariable();
|
|
// will remove calculation when we get here
|
|
} else if (n->getType() == EN::SUBQUERY) {
|
|
auto nn = ExecutionNode::castTo<SubqueryNode*>(n);
|
|
|
|
if (!nn->isDeterministic()) {
|
|
// subqueries that are non-deterministic must not be optimized away
|
|
continue;
|
|
}
|
|
|
|
if (nn->isModificationSubquery()) {
|
|
// subqueries that modify data must not be optimized away
|
|
continue;
|
|
}
|
|
// will remove subquery when we get here
|
|
outVariable = nn->outVariable();
|
|
} else {
|
|
TRI_ASSERT(false);
|
|
continue;
|
|
}
|
|
|
|
TRI_ASSERT(outVariable != nullptr);
|
|
|
|
if (!n->isVarUsedLater(outVariable)) {
|
|
// The variable whose value is calculated here is not used at
|
|
// all further down the pipeline! We remove the whole
|
|
// calculation node,
|
|
toUnlink.emplace(n);
|
|
} else if (n->getType() == EN::CALCULATION) {
|
|
// variable is still used later, but...
|
|
// ...if it's used exactly once later by another calculation,
|
|
// it's a temporary variable that we can fuse with the other
|
|
// calculation easily
|
|
|
|
if (!ExecutionNode::castTo<CalculationNode*>(n)->expression()->isDeterministic()) {
|
|
continue;
|
|
}
|
|
|
|
AstNode const* rootNode =
|
|
ExecutionNode::castTo<CalculationNode*>(n)->expression()->node();
|
|
|
|
if (rootNode->type == NODE_TYPE_REFERENCE) {
|
|
// if the LET is a simple reference to another variable, e.g. LET a = b
|
|
// then replace all references to a with references to b
|
|
bool hasCollectWithOutVariable = false;
|
|
auto current = n->getFirstParent();
|
|
|
|
// check first if we have a COLLECT with an INTO later in the query
|
|
// in this case we must not perform the replacements
|
|
while (current != nullptr) {
|
|
if (current->getType() == EN::COLLECT) {
|
|
if (ExecutionNode::castTo<CollectNode const*>(current)->hasOutVariableButNoCount()) {
|
|
hasCollectWithOutVariable = true;
|
|
break;
|
|
}
|
|
}
|
|
current = current->getFirstParent();
|
|
}
|
|
|
|
if (!hasCollectWithOutVariable) {
|
|
// no COLLECT found, now replace
|
|
std::unordered_map<VariableId, Variable const*> replacements;
|
|
replacements.emplace(outVariable->id,
|
|
static_cast<Variable const*>(rootNode->getData()));
|
|
|
|
RedundantCalculationsReplacer finder(plan->getAst(), replacements);
|
|
plan->root()->walk(finder);
|
|
toUnlink.emplace(n);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
arangodb::HashSet<Variable const*> vars;
|
|
|
|
size_t usageCount = 0;
|
|
CalculationNode* other = nullptr;
|
|
auto current = n->getFirstParent();
|
|
|
|
while (current != nullptr) {
|
|
current->getVariablesUsedHere(vars);
|
|
if (vars.find(outVariable) != vars.end()) {
|
|
if (current->getType() == EN::COLLECT) {
|
|
if (ExecutionNode::castTo<CollectNode const*>(current)->hasOutVariableButNoCount()) {
|
|
// COLLECT with an INTO variable will collect all variables from
|
|
// the scope, so we shouldn't try to remove or change the meaning
|
|
// of variables
|
|
usageCount = 0;
|
|
break;
|
|
}
|
|
}
|
|
if (current->getType() != EN::CALCULATION) {
|
|
// don't know how to replace the variable in a non-LET node
|
|
// abort the search
|
|
usageCount = 0;
|
|
break;
|
|
}
|
|
|
|
// got a LET. we can replace the variable reference in it by
|
|
// something else
|
|
++usageCount;
|
|
other = ExecutionNode::castTo<CalculationNode*>(current);
|
|
}
|
|
|
|
if (usageCount > 1) {
|
|
break;
|
|
}
|
|
|
|
current = current->getFirstParent();
|
|
vars.clear();
|
|
}
|
|
|
|
if (usageCount == 1) {
|
|
// our variable is used by exactly one other calculation
|
|
// now we can replace the reference to our variable in the other
|
|
// calculation with the variable's expression directly
|
|
auto otherExpression = other->expression();
|
|
TRI_ASSERT(otherExpression != nullptr);
|
|
|
|
if (rootNode->type != NODE_TYPE_ATTRIBUTE_ACCESS &&
|
|
Ast::countReferences(otherExpression->node(), outVariable) > 1) {
|
|
// used more than once... better give up
|
|
continue;
|
|
}
|
|
|
|
if (rootNode->isSimple() != otherExpression->node()->isSimple()) {
|
|
// expression types (V8 vs. non-V8) do not match. give up
|
|
continue;
|
|
}
|
|
|
|
if (!n->isInInnerLoop() && rootNode->callsFunction() && other->isInInnerLoop()) {
|
|
// original expression calls a function and is not contained in a loop
|
|
// we're about to move this expression into a loop, but we don't want
|
|
// to move (expensive) function calls into loops
|
|
continue;
|
|
}
|
|
|
|
TRI_ASSERT(other != nullptr);
|
|
otherExpression->replaceVariableReference(outVariable, rootNode);
|
|
|
|
toUnlink.emplace(n);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, !toUnlink.empty());
|
|
}
|
|
|
|
/// @brief useIndex, try to use an index for filtering
|
|
void arangodb::aql::useIndexesRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
// These are all the nodes where we start traversing (including all
|
|
// subqueries)
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findEndNodes(nodes, true);
|
|
|
|
std::unordered_map<size_t, ExecutionNode*> changes;
|
|
|
|
auto cleanupChanges = [&changes]() -> void {
|
|
for (auto& v : changes) {
|
|
delete v.second;
|
|
}
|
|
changes.clear();
|
|
};
|
|
|
|
TRI_DEFER(cleanupChanges());
|
|
bool hasEmptyResult = false;
|
|
for (auto const& n : nodes) {
|
|
ConditionFinder finder(plan.get(), &changes, &hasEmptyResult, false);
|
|
n->walk(finder);
|
|
}
|
|
|
|
if (!changes.empty()) {
|
|
for (auto& it : changes) {
|
|
plan->registerNode(it.second);
|
|
plan->replaceNode(plan->getNodeById(it.first), it.second);
|
|
|
|
// prevent double deletion by cleanupChanges()
|
|
it.second = nullptr;
|
|
}
|
|
opt->addPlan(std::move(plan), rule, true);
|
|
} else {
|
|
opt->addPlan(std::move(plan), rule, hasEmptyResult);
|
|
}
|
|
}
|
|
|
|
struct SortToIndexNode final : public WalkerWorker<ExecutionNode> {
|
|
ExecutionPlan* _plan;
|
|
SortNode* _sortNode;
|
|
std::vector<std::pair<Variable const*, bool>> _sorts;
|
|
std::unordered_map<VariableId, AstNode const*> _variableDefinitions;
|
|
bool _modified;
|
|
|
|
public:
|
|
explicit SortToIndexNode(ExecutionPlan* plan)
|
|
: _plan(plan), _sortNode(nullptr), _modified(false) {}
|
|
|
|
bool handleEnumerateCollectionNode(EnumerateCollectionNode* enumerateCollectionNode) {
|
|
if (_sortNode == nullptr) {
|
|
return true;
|
|
}
|
|
|
|
if (enumerateCollectionNode->isInInnerLoop()) {
|
|
// index node contained in an outer loop. must not optimize away the sort!
|
|
return true;
|
|
}
|
|
|
|
SortCondition sortCondition(_plan, _sorts,
|
|
std::vector<std::vector<arangodb::basics::AttributeName>>(),
|
|
_variableDefinitions);
|
|
|
|
if (!sortCondition.isEmpty() && sortCondition.isOnlyAttributeAccess() &&
|
|
sortCondition.isUnidirectional()) {
|
|
// we have found a sort condition, which is unidirectionl
|
|
// now check if any of the collection's indexes covers it
|
|
|
|
Variable const* outVariable = enumerateCollectionNode->outVariable();
|
|
std::vector<transaction::Methods::IndexHandle> usedIndexes;
|
|
auto trx = _plan->getAst()->query()->trx();
|
|
size_t coveredAttributes = 0;
|
|
bool canBeUsed = trx->getIndexForSortCondition(
|
|
enumerateCollectionNode->collection()->name(), &sortCondition,
|
|
outVariable, enumerateCollectionNode->collection()->count(trx),
|
|
enumerateCollectionNode->hint(), usedIndexes, coveredAttributes);
|
|
if (canBeUsed) {
|
|
// If this bit is set, then usedIndexes has length exactly one
|
|
// and contains the best index found.
|
|
auto condition = std::make_unique<Condition>(_plan->getAst());
|
|
condition->normalize(_plan);
|
|
|
|
IndexIteratorOptions opts;
|
|
opts.ascending = sortCondition.isAscending();
|
|
auto newNode =
|
|
std::make_unique<IndexNode>(_plan, _plan->nextId(),
|
|
enumerateCollectionNode->collection(), outVariable,
|
|
usedIndexes, std::move(condition), opts);
|
|
|
|
auto n = newNode.release();
|
|
|
|
_plan->registerNode(n);
|
|
_plan->replaceNode(enumerateCollectionNode, n);
|
|
_modified = true;
|
|
|
|
if (coveredAttributes == sortCondition.numAttributes()) {
|
|
// if the index covers the complete sort condition, we can also remove
|
|
// the sort node
|
|
n->needsGatherNodeSort(true);
|
|
_plan->unlinkNode(_plan->getNodeById(_sortNode->id()));
|
|
}
|
|
}
|
|
}
|
|
|
|
return true; // always abort further searching here
|
|
}
|
|
|
|
bool handleIndexNode(IndexNode* indexNode) {
|
|
if (_sortNode == nullptr) {
|
|
return true;
|
|
}
|
|
|
|
if (indexNode->isInInnerLoop()) {
|
|
// index node contained in an outer loop. must not optimize away the sort!
|
|
return true;
|
|
}
|
|
|
|
auto const& indexes = indexNode->getIndexes();
|
|
auto cond = indexNode->condition();
|
|
TRI_ASSERT(cond != nullptr);
|
|
|
|
Variable const* outVariable = indexNode->outVariable();
|
|
TRI_ASSERT(outVariable != nullptr);
|
|
|
|
auto index = indexes[0];
|
|
transaction::Methods* trx = _plan->getAst()->query()->trx();
|
|
bool isSorted = false;
|
|
bool isSparse = false;
|
|
std::vector<std::vector<arangodb::basics::AttributeName>> fields =
|
|
trx->getIndexFeatures(index, isSorted, isSparse);
|
|
if (indexes.size() != 1) {
|
|
// can only use this index node if it uses exactly one index or multiple
|
|
// indexes on exactly the same attributes
|
|
|
|
if (!cond->isSorted()) {
|
|
// index conditions do not guarantee sortedness
|
|
return true;
|
|
}
|
|
|
|
if (isSparse) {
|
|
return true;
|
|
}
|
|
|
|
for (auto& idx : indexes) {
|
|
if (idx != index) {
|
|
// Can only be sorted iff only one index is used.
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// all indexes use the same attributes and index conditions guarantee
|
|
// sorted output
|
|
}
|
|
|
|
TRI_ASSERT(indexes.size() == 1 || cond->isSorted());
|
|
|
|
// if we get here, we either have one index or multiple indexes on the same
|
|
// attributes
|
|
bool handled = false;
|
|
|
|
if (indexes.size() == 1 && isSorted) {
|
|
// if we have just a single index and we can use it for the filtering
|
|
// condition, then we can use the index for sorting, too. regardless of it
|
|
// the index is sparse or not. because the index would only return
|
|
// non-null attributes anyway, so we do not need to care about null values
|
|
// when sorting here
|
|
isSparse = false;
|
|
}
|
|
|
|
SortCondition sortCondition(_plan, _sorts,
|
|
cond->getConstAttributes(outVariable, !isSparse),
|
|
_variableDefinitions);
|
|
|
|
bool const isOnlyAttributeAccess =
|
|
(!sortCondition.isEmpty() && sortCondition.isOnlyAttributeAccess());
|
|
|
|
if (isOnlyAttributeAccess && isSorted && !isSparse && sortCondition.isUnidirectional() &&
|
|
sortCondition.isAscending() == indexNode->options().ascending) {
|
|
// we have found a sort condition, which is unidirectional and in the same
|
|
// order as the IndexNode...
|
|
// now check if the sort attributes match the ones of the index
|
|
size_t const numCovered = sortCondition.coveredAttributes(outVariable, fields);
|
|
|
|
if (numCovered >= sortCondition.numAttributes()) {
|
|
// sort condition is fully covered by index... now we can remove the
|
|
// sort node from the plan
|
|
_plan->unlinkNode(_plan->getNodeById(_sortNode->id()));
|
|
// we need to have a sorted result later on, so we will need a sorted
|
|
// GatherNode in the cluster
|
|
indexNode->needsGatherNodeSort(true);
|
|
_modified = true;
|
|
handled = true;
|
|
}
|
|
}
|
|
|
|
if (!handled && isOnlyAttributeAccess && indexes.size() == 1) {
|
|
// special case... the index cannot be used for sorting, but we only
|
|
// compare with equality
|
|
// lookups. now check if the equality lookup attributes are the same as
|
|
// the index attributes
|
|
auto root = cond->root();
|
|
|
|
if (root != nullptr) {
|
|
auto condNode = root->getMember(0);
|
|
|
|
if (condNode->isOnlyEqualityMatch()) {
|
|
// now check if the index fields are the same as the sort condition
|
|
// fields e.g. FILTER c.value1 == 1 && c.value2 == 42 SORT c.value1,
|
|
// c.value2
|
|
auto i = index.getIndex();
|
|
// some special handling for the MMFiles edge index here, which to the
|
|
// outside world is an index on attributes _from and _to at the same
|
|
// time, but only one can be queried at a time this special handling
|
|
// is required in order to prevent lookups by one of the index
|
|
// attributes (e.g. _from) and a sort clause on the other index
|
|
// attribte (e.g. _to) to be treated as the same index attribute, e.g.
|
|
// FOR doc IN edgeCol FILTER doc._from == ... SORT doc._to ...
|
|
// can use the index either for lookup or for sorting, but not for
|
|
// both at the same time. this is because if we do the lookup by
|
|
// _from, the results will be sorted by _from, and not by _to.
|
|
if (i->type() == arangodb::Index::IndexType::TRI_IDX_TYPE_EDGE_INDEX &&
|
|
fields.size() == 2) {
|
|
// looks like MMFiles edge index
|
|
if (condNode->type == NODE_TYPE_OPERATOR_NARY_AND) {
|
|
// check all conditions of the index node, and check if we can
|
|
// find _from or _to
|
|
for (size_t j = 0; j < condNode->numMembers(); ++j) {
|
|
auto sub = condNode->getMemberUnchecked(j);
|
|
if (sub->type != NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
continue;
|
|
}
|
|
auto lhs = sub->getMember(0);
|
|
if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS &&
|
|
lhs->getMember(0)->type == NODE_TYPE_REFERENCE &&
|
|
lhs->getMember(0)->getData() == outVariable) {
|
|
// check if this is either _from or _to
|
|
std::string attr = lhs->getString();
|
|
if (attr == StaticStrings::FromString || attr == StaticStrings::ToString) {
|
|
// reduce index fields to just the attribute we found in the
|
|
// index lookup condition
|
|
fields = {{arangodb::basics::AttributeName(attr, false)}};
|
|
}
|
|
}
|
|
|
|
auto rhs = sub->getMember(1);
|
|
if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS &&
|
|
rhs->getMember(0)->type == NODE_TYPE_REFERENCE &&
|
|
rhs->getMember(0)->getData() == outVariable) {
|
|
// check if this is either _from or _to
|
|
std::string attr = rhs->getString();
|
|
if (attr == StaticStrings::FromString || attr == StaticStrings::ToString) {
|
|
// reduce index fields to just the attribute we found in the
|
|
// index lookup condition
|
|
fields = {{arangodb::basics::AttributeName(attr, false)}};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
size_t const numCovered = sortCondition.coveredAttributes(outVariable, fields);
|
|
|
|
if (numCovered == sortCondition.numAttributes() &&
|
|
sortCondition.isUnidirectional() &&
|
|
(isSorted || fields.size() >= sortCondition.numAttributes())) {
|
|
// no need to sort
|
|
_plan->unlinkNode(_plan->getNodeById(_sortNode->id()));
|
|
indexNode->setAscending(sortCondition.isAscending());
|
|
// we need to have a sorted result later on, so we will need a
|
|
// sorted GatherNode in the cluster
|
|
indexNode->needsGatherNodeSort(true);
|
|
_modified = true;
|
|
} else if (numCovered > 0 && sortCondition.isUnidirectional()) {
|
|
// remove the first few attributes if they are constant
|
|
SortNode* sortNode =
|
|
ExecutionNode::castTo<SortNode*>(_plan->getNodeById(_sortNode->id()));
|
|
sortNode->removeConditions(numCovered);
|
|
_modified = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return true; // always abort after we found an IndexNode
|
|
}
|
|
|
|
bool enterSubquery(ExecutionNode*, ExecutionNode*) override final {
|
|
return false;
|
|
}
|
|
|
|
bool before(ExecutionNode* en) override final {
|
|
switch (en->getType()) {
|
|
case EN::TRAVERSAL:
|
|
case EN::K_SHORTEST_PATHS:
|
|
case EN::SHORTEST_PATH:
|
|
case EN::ENUMERATE_LIST:
|
|
case EN::ENUMERATE_IRESEARCH_VIEW:
|
|
// found some other FOR loop
|
|
return true;
|
|
|
|
case EN::SUBQUERY:
|
|
case EN::FILTER:
|
|
return false; // skip. we don't care.
|
|
|
|
case EN::CALCULATION: {
|
|
_variableDefinitions.emplace(
|
|
ExecutionNode::castTo<CalculationNode const*>(en)->outVariable()->id,
|
|
ExecutionNode::castTo<CalculationNode const*>(en)->expression()->node());
|
|
return false;
|
|
}
|
|
|
|
case EN::SINGLETON:
|
|
case EN::COLLECT:
|
|
case EN::INSERT:
|
|
case EN::REMOVE:
|
|
case EN::REPLACE:
|
|
case EN::UPDATE:
|
|
case EN::UPSERT:
|
|
case EN::RETURN:
|
|
case EN::NORESULTS:
|
|
case EN::SCATTER:
|
|
case EN::DISTRIBUTE:
|
|
case EN::GATHER:
|
|
case EN::REMOTE:
|
|
case EN::LIMIT: // LIMIT is criterion to stop
|
|
return true; // abort.
|
|
|
|
case EN::SORT: // pulling two sorts together is done elsewhere.
|
|
if (!_sorts.empty() || _sortNode != nullptr) {
|
|
return true; // a different SORT node. abort
|
|
}
|
|
_sortNode = ExecutionNode::castTo<SortNode*>(en);
|
|
for (auto& it : _sortNode->elements()) {
|
|
_sorts.emplace_back(it.var, it.ascending);
|
|
}
|
|
return false;
|
|
|
|
case EN::INDEX:
|
|
return handleIndexNode(ExecutionNode::castTo<IndexNode*>(en));
|
|
|
|
case EN::ENUMERATE_COLLECTION:
|
|
return handleEnumerateCollectionNode(
|
|
ExecutionNode::castTo<EnumerateCollectionNode*>(en));
|
|
|
|
default: {
|
|
// should not reach this point
|
|
TRI_ASSERT(false);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
void arangodb::aql::useIndexForSortRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::SORT, true);
|
|
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto sortNode = ExecutionNode::castTo<SortNode*>(n);
|
|
|
|
SortToIndexNode finder(plan.get());
|
|
sortNode->walk(finder);
|
|
|
|
if (finder._modified) {
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief try to remove filters which are covered by indexes
|
|
void arangodb::aql::removeFiltersCoveredByIndexRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::FILTER, true);
|
|
|
|
arangodb::HashSet<ExecutionNode*> toUnlink;
|
|
bool modified = false;
|
|
// this rule may modify the plan in place, but the new plan
|
|
// may not yet be optimal. so we may pass it into this same
|
|
// rule again. the default is to continue with the next rule
|
|
// however
|
|
int newLevel = 0;
|
|
|
|
for (auto const& node : nodes) {
|
|
auto fn = ExecutionNode::castTo<FilterNode const*>(node);
|
|
// find the node with the filter expression
|
|
auto setter = plan->getVarSetBy(fn->inVariable()->id);
|
|
|
|
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
|
|
continue;
|
|
}
|
|
|
|
auto calculationNode = ExecutionNode::castTo<CalculationNode*>(setter);
|
|
auto conditionNode = calculationNode->expression()->node();
|
|
|
|
// build the filter condition
|
|
Condition condition(plan->getAst());
|
|
condition.andCombine(conditionNode);
|
|
condition.normalize(plan.get());
|
|
|
|
if (condition.root() == nullptr) {
|
|
continue;
|
|
}
|
|
|
|
size_t const n = condition.root()->numMembers();
|
|
|
|
if (n != 1) {
|
|
// either no condition or multiple ORed conditions...
|
|
continue;
|
|
}
|
|
|
|
bool handled = false;
|
|
auto current = node;
|
|
while (current != nullptr) {
|
|
if (current->getType() == EN::INDEX) {
|
|
auto indexNode = ExecutionNode::castTo<IndexNode const*>(current);
|
|
|
|
// found an index node, now check if the expression is covered by the
|
|
// index
|
|
auto indexCondition = indexNode->condition();
|
|
|
|
if (indexCondition != nullptr && !indexCondition->isEmpty()) {
|
|
auto const& indexesUsed = indexNode->getIndexes();
|
|
|
|
if (indexesUsed.size() == 1) {
|
|
// single index. this is something that we can handle
|
|
auto newNode =
|
|
condition.removeIndexCondition(plan.get(), indexNode->outVariable(),
|
|
indexCondition->root(),
|
|
indexesUsed[0].getIndex().get());
|
|
|
|
if (newNode == nullptr) {
|
|
// no condition left...
|
|
// FILTER node can be completely removed
|
|
toUnlink.emplace(node);
|
|
// note: we must leave the calculation node intact, in case it is
|
|
// still used by other nodes in the plan
|
|
modified = true;
|
|
handled = true;
|
|
} else if (newNode != condition.root()) {
|
|
// some condition is left, but it is a different one than
|
|
// the one from the FILTER node
|
|
auto expr = std::make_unique<Expression>(plan.get(), plan->getAst(), newNode);
|
|
CalculationNode* cn =
|
|
new CalculationNode(plan.get(), plan->nextId(), expr.get(),
|
|
calculationNode->outVariable());
|
|
expr.release();
|
|
plan->registerNode(cn);
|
|
plan->replaceNode(setter, cn);
|
|
modified = true;
|
|
handled = true;
|
|
// pass the new plan into this rule again, to optimize even
|
|
// further
|
|
newLevel = static_cast<int>(rule->level - 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (handled) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (handled || current->getType() == EN::LIMIT || !current->hasDependency()) {
|
|
break;
|
|
}
|
|
|
|
current = current->getFirstDependency();
|
|
}
|
|
}
|
|
|
|
if (!toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified, newLevel);
|
|
}
|
|
|
|
/// @brief helper to compute lots of permutation tuples
|
|
/// a permutation tuple is represented as a single vector together with
|
|
/// another vector describing the boundaries of the tuples.
|
|
/// Example:
|
|
/// data: 0,1,2, 3,4, 5,6
|
|
/// starts: 0, 3, 5, (indices of starts of sections)
|
|
/// means a tuple of 3 permutations of 3, 2 and 2 points respectively
|
|
/// This function computes the next permutation tuple among the
|
|
/// lexicographically sorted list of all such tuples. It returns true
|
|
/// if it has successfully computed this and false if the tuple is already
|
|
/// the lexicographically largest one. If false is returned, the permutation
|
|
/// tuple is back to the beginning.
|
|
static bool NextPermutationTuple(std::vector<size_t>& data, std::vector<size_t>& starts) {
|
|
auto begin = data.begin(); // a random access iterator
|
|
|
|
for (size_t i = starts.size(); i-- != 0;) {
|
|
std::vector<size_t>::iterator from = begin + starts[i];
|
|
std::vector<size_t>::iterator to;
|
|
if (i == starts.size() - 1) {
|
|
to = data.end();
|
|
} else {
|
|
to = begin + starts[i + 1];
|
|
}
|
|
if (std::next_permutation(from, to)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/// @brief interchange adjacent EnumerateCollectionNodes in all possible ways
|
|
void arangodb::aql::interchangeAdjacentEnumerationsRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
|
|
plan->findNodesOfType(nodes, ::interchangeAdjacentEnumerationsNodeTypes, true);
|
|
|
|
arangodb::HashSet<ExecutionNode*> nodesSet;
|
|
for (auto const& n : nodes) {
|
|
TRI_ASSERT(nodesSet.find(n) == nodesSet.end());
|
|
nodesSet.emplace(n);
|
|
}
|
|
|
|
std::vector<ExecutionNode*> nodesToPermute;
|
|
std::vector<size_t> permTuple;
|
|
std::vector<size_t> starts;
|
|
std::vector<ExecutionNode*> nn;
|
|
|
|
// We use that the order of the nodes is such that a node B that is among the
|
|
// recursive dependencies of a node A is later in the vector.
|
|
for (auto const& n : nodes) {
|
|
if (nodesSet.find(n) != nodesSet.end()) {
|
|
nn.clear();
|
|
nn.emplace_back(n);
|
|
nodesSet.erase(n);
|
|
|
|
// Now follow the dependencies as long as we see further such nodes:
|
|
auto nwalker = n;
|
|
|
|
while (true) {
|
|
if (!nwalker->hasDependency()) {
|
|
break;
|
|
}
|
|
|
|
auto dep = nwalker->getFirstDependency();
|
|
|
|
if (dep->getType() != EN::ENUMERATE_COLLECTION && dep->getType() != EN::ENUMERATE_LIST) {
|
|
break;
|
|
}
|
|
|
|
if (n->getType() == EN::ENUMERATE_LIST && dep->getType() == EN::ENUMERATE_LIST) {
|
|
break;
|
|
}
|
|
|
|
nwalker = dep;
|
|
nn.emplace_back(nwalker);
|
|
nodesSet.erase(nwalker);
|
|
}
|
|
|
|
if (nn.size() > 1) {
|
|
// Move it into the permutation tuple:
|
|
starts.emplace_back(permTuple.size());
|
|
|
|
for (auto const& nnn : nn) {
|
|
nodesToPermute.emplace_back(nnn);
|
|
permTuple.emplace_back(permTuple.size());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Now we have collected all the runs of EnumerateCollectionNodes in the
|
|
// plan, we need to compute all possible permutations of all of them,
|
|
// independently. This is why we need to compute all permutation tuples.
|
|
|
|
if (!starts.empty()) {
|
|
NextPermutationTuple(permTuple, starts); // will never return false
|
|
|
|
do {
|
|
// check if we already have enough plans (plus the one plan that we will
|
|
// add at the end of this function)
|
|
if (opt->runOnlyRequiredRules(1)) {
|
|
// have enough plans. stop permutations
|
|
break;
|
|
}
|
|
|
|
// Clone the plan:
|
|
std::unique_ptr<ExecutionPlan> newPlan(plan->clone());
|
|
|
|
// Find the nodes in the new plan corresponding to the ones in the
|
|
// old plan that we want to permute:
|
|
std::vector<ExecutionNode*> newNodes;
|
|
newNodes.reserve(nodesToPermute.size());
|
|
for (size_t j = 0; j < nodesToPermute.size(); j++) {
|
|
newNodes.emplace_back(newPlan->getNodeById(nodesToPermute[j]->id()));
|
|
}
|
|
|
|
// Now get going with the permutations:
|
|
for (size_t i = 0; i < starts.size(); i++) {
|
|
size_t lowBound = starts[i];
|
|
size_t highBound = (i < starts.size() - 1) ? starts[i + 1] : permTuple.size();
|
|
// We need to remove the nodes
|
|
// newNodes[lowBound..highBound-1] in newPlan and replace
|
|
// them by the same ones in a different order, given by
|
|
// permTuple[lowBound..highBound-1].
|
|
auto parent = newNodes[lowBound]->getFirstParent();
|
|
|
|
TRI_ASSERT(parent != nullptr);
|
|
|
|
// Unlink all those nodes:
|
|
for (size_t j = lowBound; j < highBound; j++) {
|
|
newPlan->unlinkNode(newNodes[j]);
|
|
}
|
|
|
|
// And insert them in the new order:
|
|
for (size_t j = highBound; j-- != lowBound;) {
|
|
newPlan->insertDependency(parent, newNodes[permTuple[j]]);
|
|
}
|
|
}
|
|
|
|
// OK, the new plan is ready, let's report it:
|
|
opt->addPlan(std::move(newPlan), rule, true);
|
|
} while (NextPermutationTuple(permTuple, starts));
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, false);
|
|
}
|
|
|
|
/// @brief optimize queries in the cluster so that the entire query gets pushed
|
|
/// to a single server
|
|
#if 0
|
|
void arangodb::aql::optimizeClusterSingleShardRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
TRI_ASSERT(arangodb::ServerState::instance()->isCoordinator());
|
|
bool wasModified = false;
|
|
bool done = false;
|
|
|
|
std::unordered_set<std::string> responsibleServers;
|
|
auto collections = plan->getAst()->query()->collections();
|
|
|
|
for (auto const& it : *(collections->collections())) {
|
|
Collection* c = it.second;
|
|
TRI_ASSERT(c != nullptr);
|
|
|
|
if (c->numberOfShards() != 1) {
|
|
// more than one shard for this collection
|
|
done = true;
|
|
break;
|
|
}
|
|
|
|
size_t n = c->responsibleServers(responsibleServers);
|
|
|
|
if (n != 1) {
|
|
// more than one responsible server for this collection
|
|
done = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (done || responsibleServers.size() != 1) {
|
|
opt->addPlan(std::move(plan), rule, wasModified);
|
|
return;
|
|
}
|
|
|
|
// we only found a single responsible server, and all collections involved
|
|
// have exactly one shard
|
|
// that means we can move the entire query onto that server
|
|
|
|
// TODO: handle Traversals and ShortestPaths here!
|
|
// TODO: properly handle subqueries here
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type s;
|
|
SmallVector<ExecutionNode*> nodes{s};
|
|
std::vector<ExecutionNode::NodeType> types = {ExecutionNode::TRAVERSAL,
|
|
ExecutionNode::K_SHORTEST_PATHS,
|
|
ExecutionNode::SHORTEST_PATH,
|
|
ExecutionNode::SUBQUERY};
|
|
plan->findNodesOfType(nodes, types, true);
|
|
|
|
bool hasIncompatibleNodes = !nodes.empty();
|
|
|
|
nodes.clear();
|
|
types = {ExecutionNode::INDEX, ExecutionNode::ENUMERATE_COLLECTION, ExecutionNode::TRAVERSAL};
|
|
plan->findNodesOfType(nodes, types, false);
|
|
|
|
if (!nodes.empty() && !hasIncompatibleNodes) {
|
|
// turn off all other cluster optimization rules now as they are superfluous
|
|
//opt->disableRule(OptimizerRule::optimizeClusterJoinsRule);
|
|
opt->disableRule(OptimizerRule::distributeInClusterRule);
|
|
opt->disableRule(OptimizerRule::scatterInClusterRule);
|
|
opt->disableRule(OptimizerRule::distributeFilternCalcToClusterRule);
|
|
opt->disableRule(OptimizerRule::distributeSortToClusterRule);
|
|
opt->disableRule(OptimizerRule::removeUnnecessaryRemoteScatterRule);
|
|
#ifdef USE_ENTERPRISE
|
|
opt->disableRule(OptimizerRule::removeSatelliteJoinsRule);
|
|
#endif
|
|
opt->disableRule(OptimizerRule::undistributeRemoveAfterEnumCollRule);
|
|
|
|
// get first collection from query
|
|
Collection const* c = ::getCollection(nodes[0]);
|
|
TRI_ASSERT(c != nullptr);
|
|
|
|
auto& vocbase = plan->getAst()->query()->vocbase();
|
|
ExecutionNode* rootNode = plan->root();
|
|
|
|
// insert a remote node
|
|
ExecutionNode* remoteNode =
|
|
new RemoteNode(plan.get(), plan->nextId(), &vocbase, "", "", "");
|
|
|
|
plan->registerNode(remoteNode);
|
|
remoteNode->addDependency(rootNode);
|
|
|
|
// insert a gather node
|
|
auto const sortMode = GatherNode::evaluateSortMode(c->numberOfShards());
|
|
|
|
auto* gatherNode = new GatherNode(plan.get(), plan->nextId(), sortMode);
|
|
|
|
plan->registerNode(gatherNode);
|
|
gatherNode->addDependency(remoteNode);
|
|
plan->root(gatherNode, true);
|
|
wasModified = true;
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, wasModified);
|
|
}
|
|
#endif
|
|
|
|
/// @brief scatter operations in cluster
|
|
/// this rule inserts scatter, gather and remote nodes so operations on sharded
|
|
/// collections actually work
|
|
/// it will change plans in place
|
|
void arangodb::aql::scatterInClusterRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
TRI_ASSERT(arangodb::ServerState::instance()->isCoordinator());
|
|
bool wasModified = false;
|
|
|
|
// find subqueries
|
|
std::unordered_map<ExecutionNode*, ExecutionNode*> subqueries;
|
|
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type s;
|
|
SmallVector<ExecutionNode*> subs{s};
|
|
plan->findNodesOfType(subs, ExecutionNode::SUBQUERY, true);
|
|
|
|
for (auto& it : subs) {
|
|
subqueries.emplace(ExecutionNode::castTo<SubqueryNode const*>(it)->getSubquery(), it);
|
|
}
|
|
|
|
// we are a coordinator. now look in the plan for nodes of type
|
|
// EnumerateCollectionNode, IndexNode and modification nodes
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, ::scatterInClusterNodeTypes, true);
|
|
|
|
TRI_ASSERT(plan->getAst() && plan->getAst()->query() &&
|
|
plan->getAst()->query()->trx());
|
|
auto* resolver = plan->getAst()->query()->trx()->resolver();
|
|
TRI_ASSERT(resolver);
|
|
|
|
for (auto& node : nodes) {
|
|
// found a node we need to replace in the plan
|
|
|
|
auto const& parents = node->getParents();
|
|
// intentional copy of the dependencies, as we will be modifying
|
|
// dependencies later on
|
|
auto const deps = node->getDependencies();
|
|
TRI_ASSERT(deps.size() == 1);
|
|
|
|
// don't do this if we are already distributing!
|
|
if (deps[0]->getType() == ExecutionNode::REMOTE &&
|
|
deps[0]->getFirstDependency()->getType() == ExecutionNode::DISTRIBUTE) {
|
|
continue;
|
|
}
|
|
|
|
if (plan->shouldExcludeFromScatterGather(node)) {
|
|
continue;
|
|
}
|
|
|
|
bool const isRootNode = plan->isRoot(node);
|
|
plan->unlinkNode(node, true);
|
|
|
|
auto const nodeType = node->getType();
|
|
|
|
// extract database and collection from plan node
|
|
TRI_vocbase_t* vocbase = nullptr;
|
|
Collection const* collection = nullptr;
|
|
|
|
SortElementVector elements;
|
|
|
|
if (nodeType == ExecutionNode::ENUMERATE_COLLECTION) {
|
|
vocbase = ExecutionNode::castTo<EnumerateCollectionNode const*>(node)->vocbase();
|
|
collection = ExecutionNode::castTo<EnumerateCollectionNode const*>(node)->collection();
|
|
} else if (nodeType == ExecutionNode::INDEX) {
|
|
auto idxNode = ExecutionNode::castTo<IndexNode const*>(node);
|
|
vocbase = idxNode->vocbase();
|
|
collection = idxNode->collection();
|
|
TRI_ASSERT(collection != nullptr);
|
|
Variable const* sortVariable = idxNode->outVariable();
|
|
bool isSortAscending = idxNode->options().ascending;
|
|
auto allIndexes = idxNode->getIndexes();
|
|
TRI_ASSERT(!allIndexes.empty());
|
|
|
|
// Using Index for sort only works if all indexes are equal.
|
|
auto first = allIndexes[0].getIndex();
|
|
// also check if we actually need to bother about the sortedness of the
|
|
// result, or if we use the index for filtering only
|
|
if (first->isSorted() && idxNode->needsGatherNodeSort()) {
|
|
for (auto const& path : first->fieldNames()) {
|
|
elements.emplace_back(sortVariable, isSortAscending, path);
|
|
}
|
|
for (auto const& it : allIndexes) {
|
|
if (first != it.getIndex()) {
|
|
elements.clear();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
} else if (nodeType == ExecutionNode::INSERT || nodeType == ExecutionNode::UPDATE ||
|
|
nodeType == ExecutionNode::REPLACE || nodeType == ExecutionNode::REMOVE ||
|
|
nodeType == ExecutionNode::UPSERT) {
|
|
vocbase = ExecutionNode::castTo<ModificationNode*>(node)->vocbase();
|
|
collection = ExecutionNode::castTo<ModificationNode*>(node)->collection();
|
|
if (nodeType == ExecutionNode::REMOVE || nodeType == ExecutionNode::UPDATE) {
|
|
// Note that in the REPLACE or UPSERT case we are not getting here,
|
|
// since the distributeInClusterRule fires and a DistributionNode is
|
|
// used.
|
|
auto* modNode = ExecutionNode::castTo<ModificationNode*>(node);
|
|
modNode->getOptions().ignoreDocumentNotFound = true;
|
|
}
|
|
} else {
|
|
TRI_ASSERT(false);
|
|
}
|
|
|
|
// insert a scatter node
|
|
auto* scatterNode = new ScatterNode(plan.get(), plan->nextId());
|
|
plan->registerNode(scatterNode);
|
|
TRI_ASSERT(!deps.empty());
|
|
scatterNode->addDependency(deps[0]);
|
|
|
|
// insert a remote node
|
|
ExecutionNode* remoteNode =
|
|
new RemoteNode(plan.get(), plan->nextId(), vocbase, "", "", "");
|
|
plan->registerNode(remoteNode);
|
|
TRI_ASSERT(scatterNode);
|
|
remoteNode->addDependency(scatterNode);
|
|
|
|
// re-link with the remote node
|
|
node->addDependency(remoteNode);
|
|
|
|
// insert another remote node
|
|
remoteNode = new RemoteNode(plan.get(), plan->nextId(), vocbase, "", "", "");
|
|
plan->registerNode(remoteNode);
|
|
TRI_ASSERT(node);
|
|
remoteNode->addDependency(node);
|
|
|
|
// insert a gather node
|
|
auto const sortMode = GatherNode::evaluateSortMode(collection->numberOfShards());
|
|
auto* gatherNode = new GatherNode(plan.get(), plan->nextId(), sortMode);
|
|
plan->registerNode(gatherNode);
|
|
TRI_ASSERT(remoteNode);
|
|
gatherNode->addDependency(remoteNode);
|
|
// On SmartEdge collections we have 0 shards and we need the elements
|
|
// to be injected here as well. So do not replace it with > 1
|
|
if (!elements.empty() && collection->numberOfShards() != 1) {
|
|
gatherNode->elements(elements);
|
|
}
|
|
|
|
// and now link the gather node with the rest of the plan
|
|
if (parents.size() == 1) {
|
|
parents[0]->replaceDependency(deps[0], gatherNode);
|
|
}
|
|
|
|
// check if the node that we modified was at the end of a subquery
|
|
auto it = subqueries.find(node);
|
|
|
|
if (it != subqueries.end()) {
|
|
ExecutionNode::castTo<SubqueryNode*>((*it).second)->setSubquery(gatherNode, true);
|
|
}
|
|
|
|
if (isRootNode) {
|
|
// if we replaced the root node, set a new root node
|
|
plan->root(gatherNode);
|
|
}
|
|
wasModified = true;
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, wasModified);
|
|
}
|
|
|
|
/// @brief distribute operations in cluster
|
|
///
|
|
/// this rule inserts distribute, remote nodes so operations on sharded
|
|
/// collections actually work, this differs from scatterInCluster in that every
|
|
/// incoming row is only sent to one shard and not all as in scatterInCluster
|
|
///
|
|
/// it will change plans in place
|
|
void arangodb::aql::distributeInClusterRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
TRI_ASSERT(arangodb::ServerState::instance()->isCoordinator());
|
|
bool wasModified = false;
|
|
// we are a coordinator, we replace the root if it is a modification node
|
|
|
|
// only replace if it is the last node in the plan
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> subqueryNodes{a};
|
|
// inspect each return node and work upwards to SingletonNode
|
|
subqueryNodes.push_back(plan->root());
|
|
plan->findNodesOfType(subqueryNodes, ExecutionNode::SUBQUERY, true);
|
|
|
|
for (ExecutionNode* subqueryNode : subqueryNodes) {
|
|
SubqueryNode* snode = nullptr;
|
|
ExecutionNode* root = nullptr; // only used for asserts
|
|
bool reachedEnd = false;
|
|
if (subqueryNode == plan->root()) {
|
|
snode = nullptr;
|
|
root = plan->root();
|
|
} else {
|
|
snode = ExecutionNode::castTo<SubqueryNode*>(subqueryNode);
|
|
root = snode->getSubquery();
|
|
}
|
|
ExecutionNode* node = root;
|
|
TRI_ASSERT(node != nullptr);
|
|
|
|
while (node != nullptr) {
|
|
auto nodeType = node->getType();
|
|
|
|
// loop until we find a modification node or the end of the plan
|
|
while (node != nullptr) {
|
|
// update type
|
|
nodeType = node->getType();
|
|
|
|
// check if there is a node type that needs distribution
|
|
if (nodeType == ExecutionNode::INSERT || nodeType == ExecutionNode::REMOVE ||
|
|
nodeType == ExecutionNode::UPDATE || nodeType == ExecutionNode::REPLACE ||
|
|
nodeType == ExecutionNode::UPSERT) {
|
|
// found a node!
|
|
break;
|
|
}
|
|
|
|
// there is nothing above us
|
|
if (!node->hasDependency()) {
|
|
// reached the end
|
|
reachedEnd = true;
|
|
break;
|
|
}
|
|
|
|
// go further up the tree
|
|
node = node->getFirstDependency();
|
|
}
|
|
|
|
if (reachedEnd) {
|
|
// break loop for subqyery
|
|
break;
|
|
}
|
|
|
|
TRI_ASSERT(node != nullptr);
|
|
if (node == nullptr) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "logic error");
|
|
}
|
|
|
|
// when we get here, we have found a matching data-modification node!
|
|
TRI_ASSERT(nodeType == ExecutionNode::INSERT || nodeType == ExecutionNode::REMOVE ||
|
|
nodeType == ExecutionNode::UPDATE || nodeType == ExecutionNode::REPLACE ||
|
|
nodeType == ExecutionNode::UPSERT);
|
|
|
|
ExecutionNode* originalParent = nullptr;
|
|
if (node->hasParent()) {
|
|
auto const& parents = node->getParents();
|
|
originalParent = parents[0];
|
|
TRI_ASSERT(originalParent != nullptr);
|
|
TRI_ASSERT(node != root);
|
|
} else {
|
|
TRI_ASSERT(node == root);
|
|
}
|
|
|
|
Collection const* collection =
|
|
ExecutionNode::castTo<ModificationNode*>(node)->collection();
|
|
|
|
#ifdef USE_ENTERPRISE
|
|
auto ci = ClusterInfo::instance();
|
|
auto collInfo =
|
|
ci->getCollection(collection->vocbase()->name(), collection->name());
|
|
// Throws if collection is not found!
|
|
if (collInfo->isSmart() && collInfo->type() == TRI_COL_TYPE_EDGE) {
|
|
node = distributeInClusterRuleSmartEdgeCollection(plan.get(), snode, node,
|
|
originalParent, wasModified);
|
|
continue;
|
|
}
|
|
#endif
|
|
bool const defaultSharding = collection->usesDefaultSharding();
|
|
|
|
if (nodeType == ExecutionNode::REMOVE || nodeType == ExecutionNode::UPDATE) {
|
|
if (!defaultSharding) {
|
|
// We have to use a ScatterNode.
|
|
node = node->getFirstDependency(); // advance node
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// In the INSERT and REPLACE cases we use a DistributeNode...
|
|
|
|
TRI_ASSERT(node->hasDependency());
|
|
// intentional copy of the dependencies, as we will be modifying
|
|
// dependencies later on
|
|
auto const deps = node->getDependencies();
|
|
|
|
bool haveAdjusted = false;
|
|
if (originalParent != nullptr) {
|
|
// nodes below removed node
|
|
originalParent->removeDependency(node);
|
|
plan->unlinkNode(node, true);
|
|
if (snode) {
|
|
if (snode->getSubquery() == node) {
|
|
snode->setSubquery(originalParent, true);
|
|
haveAdjusted = true;
|
|
}
|
|
}
|
|
} else {
|
|
// no nodes below unlinked node
|
|
plan->unlinkNode(node, true);
|
|
if (snode) {
|
|
snode->setSubquery(deps[0], true);
|
|
haveAdjusted = true;
|
|
} else {
|
|
plan->root(deps[0], true);
|
|
}
|
|
}
|
|
|
|
// extract database from plan node
|
|
TRI_vocbase_t* vocbase = ExecutionNode::castTo<ModificationNode*>(node)->vocbase();
|
|
|
|
// insert a distribute node
|
|
ExecutionNode* distNode = nullptr;
|
|
Variable const* inputVariable;
|
|
if (nodeType == ExecutionNode::INSERT || nodeType == ExecutionNode::REMOVE) {
|
|
// in case of an INSERT, the DistributeNode is responsible for
|
|
// generating keys if none present
|
|
bool const createKeys = (nodeType == ExecutionNode::INSERT);
|
|
if (nodeType == ExecutionNode::INSERT) {
|
|
inputVariable = ExecutionNode::castTo<InsertNode const*>(node)->inVariable();
|
|
} else {
|
|
inputVariable = ExecutionNode::castTo<RemoveNode const*>(node)->inVariable();
|
|
}
|
|
distNode = new DistributeNode(plan.get(), plan->nextId(), collection,
|
|
inputVariable, inputVariable, createKeys, true);
|
|
} else if (nodeType == ExecutionNode::REPLACE || nodeType == ExecutionNode::UPDATE) {
|
|
auto updateReplaceNode = ExecutionNode::castTo<UpdateReplaceNode const*>(node);
|
|
if (defaultSharding && updateReplaceNode->inKeyVariable() != nullptr) {
|
|
// We only look into _inKeyVariable
|
|
// This is the _inKeyVariable! This works, since we use a ScatterNode
|
|
// for non-default-sharding attributes.
|
|
inputVariable = updateReplaceNode->inKeyVariable();
|
|
} else {
|
|
// We only look into _inDocVariable
|
|
// was only UPDATE <doc> IN <collection>
|
|
inputVariable = updateReplaceNode->inDocVariable();
|
|
}
|
|
distNode = new DistributeNode(plan.get(), plan->nextId(), collection,
|
|
inputVariable, inputVariable, false,
|
|
updateReplaceNode->inKeyVariable() != nullptr);
|
|
} else if (nodeType == ExecutionNode::UPSERT) {
|
|
// an UPSERT node has two input variables!
|
|
auto upsertNode = ExecutionNode::castTo<UpsertNode const*>(node);
|
|
auto d = new DistributeNode(plan.get(), plan->nextId(), collection,
|
|
upsertNode->inDocVariable(),
|
|
upsertNode->insertVariable(), true, true);
|
|
d->setAllowSpecifiedKeys(true);
|
|
distNode = ExecutionNode::castTo<ExecutionNode*>(d);
|
|
} else {
|
|
TRI_ASSERT(false);
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "logic error");
|
|
}
|
|
|
|
TRI_ASSERT(distNode != nullptr);
|
|
|
|
plan->registerNode(distNode);
|
|
distNode->addDependency(deps[0]);
|
|
|
|
// insert a remote node
|
|
ExecutionNode* remoteNode =
|
|
new RemoteNode(plan.get(), plan->nextId(), vocbase, "", "", "");
|
|
plan->registerNode(remoteNode);
|
|
remoteNode->addDependency(distNode);
|
|
|
|
// re-link with the remote node
|
|
node->addDependency(remoteNode);
|
|
|
|
// insert another remote node
|
|
remoteNode =
|
|
new RemoteNode(plan.get(), plan->nextId(), vocbase, "", "", "");
|
|
plan->registerNode(remoteNode);
|
|
remoteNode->addDependency(node);
|
|
|
|
// insert a gather node
|
|
auto const sortMode = GatherNode::evaluateSortMode(collection->numberOfShards());
|
|
auto* gatherNode = new GatherNode(plan.get(), plan->nextId(), sortMode);
|
|
plan->registerNode(gatherNode);
|
|
gatherNode->addDependency(remoteNode);
|
|
|
|
if (originalParent != nullptr) {
|
|
// we did not replace the root node
|
|
TRI_ASSERT(gatherNode);
|
|
originalParent->addDependency(gatherNode);
|
|
} else {
|
|
// we replaced the root node, set a new root node
|
|
if (snode) {
|
|
if (snode->getSubquery() == node || haveAdjusted) {
|
|
snode->setSubquery(gatherNode, true);
|
|
}
|
|
} else {
|
|
plan->root(gatherNode, true);
|
|
}
|
|
}
|
|
wasModified = true;
|
|
node = distNode; // will be gatherNode or nulltpr
|
|
} // for node in subquery
|
|
} // for end subquery in plan
|
|
opt->addPlan(std::move(plan), rule, wasModified);
|
|
}
|
|
|
|
void arangodb::aql::collectInClusterRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
TRI_ASSERT(arangodb::ServerState::instance()->isCoordinator());
|
|
bool wasModified = false;
|
|
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::COLLECT, true);
|
|
|
|
arangodb::HashSet<Variable const*> allUsed;
|
|
arangodb::HashSet<Variable const*> used;
|
|
|
|
for (auto& node : nodes) {
|
|
allUsed.clear();
|
|
used.clear();
|
|
node->getVariablesUsedHere(used);
|
|
|
|
// found a node we need to replace in the plan
|
|
TRI_ASSERT(node->getDependencies().size() == 1);
|
|
|
|
auto collectNode = ExecutionNode::castTo<CollectNode*>(node);
|
|
// look for next remote node
|
|
GatherNode* gatherNode = nullptr;
|
|
auto current = node->getFirstDependency();
|
|
|
|
while (current != nullptr) {
|
|
bool eligible = true;
|
|
|
|
// check if any of the nodes we pass use a variable that will not be
|
|
// available after we insert a new COLLECT on top of it (note: COLLECT
|
|
// will eliminate all variables from the scope but its own)
|
|
if (current->getType() != EN::GATHER) {
|
|
// Gather nodes are taken care of separately below
|
|
current->getVariablesUsedHere(allUsed);
|
|
}
|
|
|
|
for (auto const& it : current->getVariablesSetHere()) {
|
|
if (std::find(used.begin(), used.end(), it) != used.end()) {
|
|
eligible = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!eligible) {
|
|
break;
|
|
}
|
|
|
|
if (current->getType() == ExecutionNode::GATHER) {
|
|
gatherNode = ExecutionNode::castTo<GatherNode*>(current);
|
|
} else if (current->getType() == ExecutionNode::REMOTE) {
|
|
auto previous = current->getFirstDependency();
|
|
// now we are on a DB server
|
|
|
|
{
|
|
// check if we will deal with more than one shard
|
|
// if the remote one has one shard, the optimization will actually
|
|
// be a pessimization and shouldn't be applied
|
|
bool hasFoundMultipleShards = false;
|
|
auto p = previous;
|
|
while (p != nullptr) {
|
|
if (p->getType() == ExecutionNode::REMOTE) {
|
|
hasFoundMultipleShards = true;
|
|
} else if (p->getType() == ExecutionNode::ENUMERATE_COLLECTION || p->getType() == ExecutionNode::INDEX) {
|
|
auto col = getCollection(p);
|
|
if (col->numberOfShards() > 1) {
|
|
hasFoundMultipleShards = true;
|
|
}
|
|
} else if (p->getType() == ExecutionNode::TRAVERSAL) {
|
|
hasFoundMultipleShards = true;
|
|
}
|
|
if (hasFoundMultipleShards) {
|
|
break;
|
|
}
|
|
p = p->getFirstDependency();
|
|
}
|
|
if (!hasFoundMultipleShards) {
|
|
// only a single shard will be contacted - abort the optimization attempt
|
|
// to not make it a pessimization
|
|
break;
|
|
}
|
|
}
|
|
|
|
// we may have moved another CollectNode here already. if so, we need to
|
|
// move the new CollectNode to the front of multiple CollectNodes
|
|
ExecutionNode* target = current;
|
|
while (previous != nullptr && previous->getType() == ExecutionNode::COLLECT) {
|
|
target = previous;
|
|
previous = previous->getFirstDependency();
|
|
}
|
|
|
|
TRI_ASSERT(eligible);
|
|
|
|
if (previous != nullptr) {
|
|
for (auto const& otherVariable : allUsed) {
|
|
auto const setHere = collectNode->getVariablesSetHere();
|
|
if (std::find(setHere.begin(), setHere.end(), otherVariable) ==
|
|
setHere.end()) {
|
|
eligible = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!eligible) {
|
|
break;
|
|
}
|
|
|
|
bool removeGatherNodeSort = false;
|
|
|
|
if (collectNode->aggregationMethod() == CollectOptions::CollectMethod::COUNT) {
|
|
// clone a COLLECT WITH COUNT operation from the coordinator to the
|
|
// DB server(s), and leave an aggregate COLLECT node on the
|
|
// coordinator for total aggregation
|
|
|
|
// add a new CollectNode on the DB server to do the actual counting
|
|
auto outVariable = plan->getAst()->variables()->createTemporaryVariable();
|
|
auto dbCollectNode =
|
|
new CollectNode(plan.get(), plan->nextId(), collectNode->getOptions(),
|
|
collectNode->groupVariables(),
|
|
collectNode->aggregateVariables(), nullptr,
|
|
outVariable, std::vector<Variable const*>(),
|
|
collectNode->variableMap(), true, false);
|
|
|
|
plan->registerNode(dbCollectNode);
|
|
|
|
dbCollectNode->addDependency(previous);
|
|
target->replaceDependency(previous, dbCollectNode);
|
|
|
|
dbCollectNode->aggregationMethod(collectNode->aggregationMethod());
|
|
dbCollectNode->specialized();
|
|
|
|
// re-use the existing CollectNode on the coordinator to aggregate
|
|
// the counts of the DB servers
|
|
std::vector<std::pair<Variable const*, std::pair<Variable const*, std::string>>> aggregateVariables;
|
|
aggregateVariables.emplace_back(
|
|
std::make_pair(collectNode->outVariable(),
|
|
std::make_pair(outVariable, "SUM")));
|
|
|
|
collectNode->aggregationMethod(CollectOptions::CollectMethod::SORTED);
|
|
collectNode->count(false);
|
|
collectNode->setAggregateVariables(aggregateVariables);
|
|
collectNode->clearOutVariable();
|
|
|
|
removeGatherNodeSort = true;
|
|
} else if (collectNode->aggregationMethod() ==
|
|
CollectOptions::CollectMethod::DISTINCT) {
|
|
// clone a COLLECT DISTINCT operation from the coordinator to the DB
|
|
// server(s), and leave an aggregate COLLECT node on the coordinator
|
|
// for total aggregation
|
|
|
|
// create a new result variable
|
|
auto const& groupVars = collectNode->groupVariables();
|
|
TRI_ASSERT(!groupVars.empty());
|
|
auto out = plan->getAst()->variables()->createTemporaryVariable();
|
|
|
|
std::vector<std::pair<Variable const*, Variable const*>> const groupVariables{
|
|
std::make_pair(out, groupVars[0].second)};
|
|
|
|
auto dbCollectNode =
|
|
new CollectNode(plan.get(), plan->nextId(), collectNode->getOptions(),
|
|
groupVariables, collectNode->aggregateVariables(),
|
|
nullptr, nullptr, std::vector<Variable const*>(),
|
|
collectNode->variableMap(), false, true);
|
|
|
|
plan->registerNode(dbCollectNode);
|
|
|
|
dbCollectNode->addDependency(previous);
|
|
target->replaceDependency(previous, dbCollectNode);
|
|
|
|
dbCollectNode->aggregationMethod(collectNode->aggregationMethod());
|
|
dbCollectNode->specialized();
|
|
|
|
// will set the input of the coordinator's collect node to the new
|
|
// variable produced on the DB servers
|
|
auto copy = collectNode->groupVariables();
|
|
TRI_ASSERT(!copy.empty());
|
|
copy[0].second = out;
|
|
collectNode->groupVariables(copy);
|
|
} else if ( //! collectNode->groupVariables().empty() &&
|
|
(!collectNode->hasOutVariable() || collectNode->count())) {
|
|
// clone a COLLECT v1 = expr, v2 = expr ... operation from the
|
|
// coordinator to the DB server(s), and leave an aggregate COLLECT
|
|
// node on the coordinator for total aggregation
|
|
|
|
std::vector<std::pair<Variable const*, std::pair<Variable const*, std::string>>> aggregateVariables;
|
|
if (!collectNode->aggregateVariables().empty()) {
|
|
for (auto const& it : collectNode->aggregateVariables()) {
|
|
std::string func = Aggregator::pushToDBServerAs(it.second.second);
|
|
if (func.empty()) {
|
|
eligible = false;
|
|
break;
|
|
}
|
|
// eligible!
|
|
auto outVariable = plan->getAst()->variables()->createTemporaryVariable();
|
|
aggregateVariables.emplace_back(
|
|
std::make_pair(outVariable, std::make_pair(it.second.first, func)));
|
|
}
|
|
}
|
|
|
|
if (!eligible) {
|
|
break;
|
|
}
|
|
|
|
Variable const* outVariable = nullptr;
|
|
if (collectNode->count()) {
|
|
outVariable = plan->getAst()->variables()->createTemporaryVariable();
|
|
}
|
|
|
|
// create new group variables
|
|
auto const& groupVars = collectNode->groupVariables();
|
|
std::vector<std::pair<Variable const*, Variable const*>> outVars;
|
|
outVars.reserve(groupVars.size());
|
|
std::unordered_map<Variable const*, Variable const*> replacements;
|
|
|
|
for (auto const& it : groupVars) {
|
|
// create new out variables
|
|
auto out = plan->getAst()->variables()->createTemporaryVariable();
|
|
replacements.emplace(it.second, out);
|
|
outVars.emplace_back(out, it.second);
|
|
}
|
|
|
|
auto dbCollectNode =
|
|
new CollectNode(plan.get(), plan->nextId(), collectNode->getOptions(),
|
|
outVars, aggregateVariables, nullptr,
|
|
outVariable, std::vector<Variable const*>(),
|
|
collectNode->variableMap(), collectNode->count(), false);
|
|
|
|
plan->registerNode(dbCollectNode);
|
|
|
|
dbCollectNode->addDependency(previous);
|
|
target->replaceDependency(previous, dbCollectNode);
|
|
|
|
dbCollectNode->aggregationMethod(collectNode->aggregationMethod());
|
|
dbCollectNode->specialized();
|
|
|
|
std::vector<std::pair<Variable const*, Variable const*>> copy;
|
|
size_t i = 0;
|
|
for (auto const& it : collectNode->groupVariables()) {
|
|
// replace input variables
|
|
copy.emplace_back(std::make_pair(it.first, outVars[i].first));
|
|
++i;
|
|
}
|
|
collectNode->groupVariables(copy);
|
|
|
|
if (collectNode->count()) {
|
|
std::vector<std::pair<Variable const*, std::pair<Variable const*, std::string>>> aggregateVariables;
|
|
aggregateVariables.emplace_back(
|
|
std::make_pair(collectNode->outVariable(),
|
|
std::make_pair(outVariable, "SUM")));
|
|
|
|
collectNode->count(false);
|
|
collectNode->setAggregateVariables(aggregateVariables);
|
|
collectNode->clearOutVariable();
|
|
} else {
|
|
size_t i = 0;
|
|
for (auto& it : collectNode->aggregateVariables()) {
|
|
it.second.first = aggregateVariables[i].first;
|
|
it.second.second = Aggregator::runOnCoordinatorAs(it.second.second);
|
|
++i;
|
|
}
|
|
}
|
|
|
|
removeGatherNodeSort = (dbCollectNode->aggregationMethod() !=
|
|
CollectOptions::CollectMethod::SORTED);
|
|
|
|
// in case we need to keep the sortedness of the GatherNode,
|
|
// we may need to replace some variable references in it due
|
|
// to the changes we made to the COLLECT node
|
|
if (gatherNode != nullptr) {
|
|
SortElementVector& elements = gatherNode->elements();
|
|
if (!removeGatherNodeSort && !replacements.empty() && !elements.empty()) {
|
|
std::string cmp;
|
|
std::string other;
|
|
basics::StringBuffer buffer(128, false);
|
|
|
|
// look for all sort elements in the GatherNode and replace them
|
|
// if they match what we have changed
|
|
for (auto& it : elements) {
|
|
// replace variables
|
|
auto it2 = replacements.find(it.var);
|
|
|
|
if (it2 != replacements.end()) {
|
|
// match with our replacement table
|
|
it.var = (*it2).second;
|
|
it.attributePath.clear();
|
|
} else {
|
|
// no match. now check all our replacements and compare how
|
|
// their sources are actually calculated (e.g. #2 may mean
|
|
// "foo.bar")
|
|
cmp = it.toString();
|
|
for (auto const& it3 : replacements) {
|
|
auto setter = plan->getVarSetBy(it3.first->id);
|
|
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
|
|
continue;
|
|
}
|
|
auto* expr =
|
|
ExecutionNode::castTo<CalculationNode const*>(setter)->expression();
|
|
if (expr == nullptr) {
|
|
continue;
|
|
}
|
|
other.clear();
|
|
try {
|
|
buffer.clear();
|
|
expr->stringify(&buffer);
|
|
other = std::string(buffer.c_str(), buffer.size());
|
|
} catch (...) {
|
|
}
|
|
if (other == cmp) {
|
|
// finally a match!
|
|
it.var = it3.second;
|
|
it.attributePath.clear();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// all other cases cannot be optimized
|
|
break;
|
|
}
|
|
|
|
if (gatherNode != nullptr && removeGatherNodeSort) {
|
|
// remove sort(s) from GatherNode if we can
|
|
gatherNode->elements().clear();
|
|
}
|
|
|
|
wasModified = true;
|
|
}
|
|
break;
|
|
}
|
|
|
|
current = current->getFirstDependency();
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, wasModified);
|
|
}
|
|
|
|
/// @brief move filters up into the cluster distribution part of the plan
|
|
/// this rule modifies the plan in place
|
|
/// filters are moved as far up in the plan as possible to make result sets
|
|
/// as small as possible as early as possible
|
|
void arangodb::aql::distributeFilternCalcToClusterRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
bool modified = false;
|
|
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::GATHER, true);
|
|
|
|
arangodb::HashSet<Variable const*> varsSetHere;
|
|
|
|
for (auto& n : nodes) {
|
|
auto const& remoteNodeList = n->getDependencies();
|
|
TRI_ASSERT(remoteNodeList.size() > 0);
|
|
auto rn = remoteNodeList[0];
|
|
|
|
if (!n->hasParent()) {
|
|
continue;
|
|
}
|
|
|
|
bool allowOnlyFilterAndCalculation = false;
|
|
|
|
varsSetHere.clear();
|
|
auto parents = n->getParents();
|
|
TRI_ASSERT(!parents.empty());
|
|
|
|
while (true) {
|
|
TRI_ASSERT(!parents.empty());
|
|
bool stopSearching = false;
|
|
auto inspectNode = parents[0];
|
|
TRI_ASSERT(inspectNode != nullptr);
|
|
|
|
auto type = inspectNode->getType();
|
|
if (allowOnlyFilterAndCalculation && type != EN::FILTER && type != EN::CALCULATION) {
|
|
stopSearching = true;
|
|
break;
|
|
}
|
|
|
|
switch (type) {
|
|
case EN::ENUMERATE_LIST:
|
|
case EN::SINGLETON:
|
|
case EN::INSERT:
|
|
case EN::REMOVE:
|
|
case EN::REPLACE:
|
|
case EN::UPDATE:
|
|
case EN::UPSERT:
|
|
case EN::SORT: {
|
|
for (auto& v : inspectNode->getVariablesSetHere()) {
|
|
varsSetHere.emplace(v);
|
|
}
|
|
parents = inspectNode->getParents();
|
|
if (type == EN::SORT) {
|
|
allowOnlyFilterAndCalculation = true;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
case EN::COLLECT:
|
|
case EN::RETURN:
|
|
case EN::NORESULTS:
|
|
case EN::SCATTER:
|
|
case EN::DISTRIBUTE:
|
|
case EN::GATHER:
|
|
case EN::REMOTE:
|
|
case EN::LIMIT:
|
|
case EN::INDEX:
|
|
case EN::ENUMERATE_COLLECTION:
|
|
case EN::TRAVERSAL:
|
|
case EN::K_SHORTEST_PATHS:
|
|
case EN::SHORTEST_PATH:
|
|
case EN::SUBQUERY:
|
|
case EN::ENUMERATE_IRESEARCH_VIEW:
|
|
// do break
|
|
stopSearching = true;
|
|
break;
|
|
|
|
case EN::CALCULATION:
|
|
case EN::FILTER: {
|
|
if (inspectNode->getType() == EN::CALCULATION) {
|
|
// check if the expression can be executed on a DB server safely
|
|
if (!ExecutionNode::castTo<CalculationNode const*>(inspectNode)
|
|
->expression()
|
|
->canRunOnDBServer()) {
|
|
stopSearching = true;
|
|
break;
|
|
}
|
|
// intentionally falls through
|
|
}
|
|
#if 0
|
|
// TODO: this is already prepared to push subqueries on the DB servers.
|
|
// However, the ExecutionEngine's instanciator cannot yet handle subqueries
|
|
// on DB servers. Once it can do this, this part can be finished
|
|
else if (inspectNode->getType() == EN::SUBQUERY) {
|
|
// check if the subquery can be executed on a DB server safely
|
|
SubqueryNode* s = ExecutionNode::castTo<SubqueryNode*>(inspectNode);
|
|
if (!s->isDeterministic() || s->mayAccessCollections()) {
|
|
stopSearching = true;
|
|
break;
|
|
}
|
|
// intentionally falls through
|
|
}
|
|
#endif
|
|
// no special handling for filters here
|
|
|
|
TRI_ASSERT(inspectNode->getType() == EN::SUBQUERY ||
|
|
inspectNode->getType() == EN::CALCULATION ||
|
|
inspectNode->getType() == EN::FILTER);
|
|
|
|
arangodb::HashSet<Variable const*> used;
|
|
inspectNode->getVariablesUsedHere(used);
|
|
for (auto& v : used) {
|
|
if (varsSetHere.find(v) != varsSetHere.end()) {
|
|
// do not move over the definition of variables that we need
|
|
stopSearching = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!stopSearching) {
|
|
// remember our cursor...
|
|
parents = inspectNode->getParents();
|
|
// then unlink the filter/calculator from the plan
|
|
plan->unlinkNode(inspectNode);
|
|
// and re-insert into plan in front of the remoteNode
|
|
plan->insertDependency(rn, inspectNode);
|
|
|
|
modified = true;
|
|
// ready to rumble!
|
|
}
|
|
break;
|
|
}
|
|
|
|
default: {
|
|
// should not reach this point
|
|
TRI_ASSERT(false);
|
|
}
|
|
}
|
|
|
|
if (stopSearching) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief move sorts up into the cluster distribution part of the plan
|
|
/// this rule modifies the plan in place
|
|
/// sorts are moved as far up in the plan as possible to make result sets
|
|
/// as small as possible as early as possible
|
|
///
|
|
/// filters are not pushed beyond limits
|
|
void arangodb::aql::distributeSortToClusterRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::GATHER, true);
|
|
|
|
bool modified = false;
|
|
|
|
for (auto& n : nodes) {
|
|
auto const remoteNodeList = n->getDependencies();
|
|
TRI_ASSERT(remoteNodeList.size() > 0);
|
|
auto rn = remoteNodeList[0];
|
|
|
|
if (!n->hasParent()) {
|
|
continue;
|
|
}
|
|
|
|
auto gatherNode = ExecutionNode::castTo<GatherNode*>(n);
|
|
|
|
auto parents = n->getParents();
|
|
|
|
while (true) {
|
|
TRI_ASSERT(!parents.empty());
|
|
bool stopSearching = false;
|
|
auto inspectNode = parents[0];
|
|
TRI_ASSERT(inspectNode != nullptr);
|
|
|
|
switch (inspectNode->getType()) {
|
|
case EN::SINGLETON:
|
|
case EN::ENUMERATE_COLLECTION:
|
|
case EN::ENUMERATE_LIST:
|
|
case EN::COLLECT:
|
|
case EN::INSERT:
|
|
case EN::REMOVE:
|
|
case EN::REPLACE:
|
|
case EN::UPDATE:
|
|
case EN::UPSERT:
|
|
case EN::CALCULATION:
|
|
case EN::FILTER:
|
|
case EN::SUBQUERY:
|
|
case EN::RETURN:
|
|
case EN::NORESULTS:
|
|
case EN::SCATTER:
|
|
case EN::DISTRIBUTE:
|
|
case EN::GATHER:
|
|
case EN::REMOTE:
|
|
case EN::LIMIT:
|
|
case EN::INDEX:
|
|
case EN::TRAVERSAL:
|
|
case EN::K_SHORTEST_PATHS:
|
|
case EN::SHORTEST_PATH:
|
|
case EN::REMOTESINGLE:
|
|
case EN::ENUMERATE_IRESEARCH_VIEW:
|
|
|
|
// For all these, we do not want to pull a SortNode further down
|
|
// out to the DBservers, note that potential FilterNodes and
|
|
// CalculationNodes that can be moved to the DBservers have
|
|
// already been moved over by the distribute-filtercalc-to-cluster
|
|
// rule which is done first.
|
|
stopSearching = true;
|
|
break;
|
|
|
|
case EN::SORT: {
|
|
auto thisSortNode = ExecutionNode::castTo<SortNode*>(inspectNode);
|
|
|
|
// remember our cursor...
|
|
parents = inspectNode->getParents();
|
|
// then unlink the filter/calculator from the plan
|
|
plan->unlinkNode(inspectNode);
|
|
// and re-insert into plan in front of the remoteNode
|
|
if (thisSortNode->_reinsertInCluster) {
|
|
plan->insertDependency(rn, inspectNode);
|
|
}
|
|
|
|
auto const* collection = GatherNode::findCollection(*gatherNode);
|
|
|
|
// For views (when 'collection == nullptr') we don't need
|
|
// to check number of shards
|
|
// On SmartEdge collections we have 0 shards and we need the elements
|
|
// to be injected here as well. So do not replace it with > 1
|
|
if (!collection || collection->numberOfShards() != 1) {
|
|
gatherNode->elements(thisSortNode->elements());
|
|
}
|
|
|
|
modified = true;
|
|
// ready to rumble!
|
|
break;
|
|
}
|
|
|
|
case EN::MAX_NODE_TYPE_VALUE: {
|
|
// should not reach this point
|
|
TRI_ASSERT(false);
|
|
stopSearching = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (stopSearching) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief try to get rid of a RemoteNode->ScatterNode combination which has
|
|
/// only a SingletonNode and possibly some CalculationNodes as dependencies
|
|
void arangodb::aql::removeUnnecessaryRemoteScatterRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::REMOTE, true);
|
|
|
|
arangodb::HashSet<ExecutionNode*> toUnlink;
|
|
|
|
for (auto& n : nodes) {
|
|
// check if the remote node is preceeded by a scatter node and any number of
|
|
// calculation and singleton nodes. if yes, remove remote and scatter
|
|
if (!n->hasDependency()) {
|
|
continue;
|
|
}
|
|
|
|
auto const dep = n->getFirstDependency();
|
|
if (dep->getType() != EN::SCATTER) {
|
|
continue;
|
|
}
|
|
|
|
bool canOptimize = true;
|
|
auto node = dep;
|
|
while (node != nullptr) {
|
|
auto const& d = node->getDependencies();
|
|
|
|
if (d.size() != 1) {
|
|
break;
|
|
}
|
|
|
|
node = d[0];
|
|
if (!plan->shouldExcludeFromScatterGather(node)) {
|
|
if (node->getType() != EN::SINGLETON &&
|
|
node->getType() != EN::CALCULATION && node->getType() != EN::FILTER) {
|
|
// found some other node type...
|
|
// this disqualifies the optimization
|
|
canOptimize = false;
|
|
break;
|
|
}
|
|
|
|
if (node->getType() == EN::CALCULATION) {
|
|
auto calc = ExecutionNode::castTo<CalculationNode const*>(node);
|
|
// check if the expression can be executed on a DB server safely
|
|
if (!calc->expression()->canRunOnDBServer()) {
|
|
canOptimize = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (canOptimize) {
|
|
toUnlink.emplace(n);
|
|
toUnlink.emplace(dep);
|
|
}
|
|
}
|
|
|
|
if (!toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, !toUnlink.empty());
|
|
}
|
|
|
|
/// @brief try to restrict fragments to a single shard if possible
|
|
void arangodb::aql::restrictToSingleShardRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
TRI_ASSERT(arangodb::ServerState::instance()->isCoordinator());
|
|
bool wasModified = false;
|
|
|
|
CollectionVariableTracker tracker;
|
|
plan->root()->walk(tracker);
|
|
if (!tracker.isSafeForOptimization()) {
|
|
// encountered errors while working on optimization, do not continue
|
|
opt->addPlan(std::move(plan), rule, wasModified);
|
|
return;
|
|
}
|
|
|
|
RestrictToSingleShardChecker finder(plan.get(), tracker);
|
|
plan->root()->walk(finder);
|
|
if (!finder.isSafeForOptimization()) {
|
|
// found something in the execution plan that renders the optimization
|
|
// unsafe, so do not optimize
|
|
opt->addPlan(std::move(plan), rule, wasModified);
|
|
return;
|
|
}
|
|
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::REMOTE, true);
|
|
|
|
arangodb::HashSet<ExecutionNode*> toUnlink;
|
|
std::map<Collection const*, std::unordered_set<std::string>> modificationRestrictions;
|
|
|
|
// forward a shard key restriction from one collection to the other if the two collections
|
|
// are used in a smart join (and use distributeShardsLike on each other)
|
|
auto forwardRestrictionToPrototype = [&plan](ExecutionNode const* current, std::string const& shardId) {
|
|
auto collectionNode = dynamic_cast<CollectionAccessingNode const*>(current);
|
|
if (collectionNode == nullptr) {
|
|
return;
|
|
}
|
|
auto prototypeOutVariable = collectionNode->prototypeOutVariable();
|
|
if (prototypeOutVariable == nullptr) {
|
|
return;
|
|
}
|
|
|
|
auto setter = plan->getVarSetBy(prototypeOutVariable->id);
|
|
if (setter == nullptr ||
|
|
(setter->getType() != EN::INDEX && setter->getType() != EN::ENUMERATE_COLLECTION)) {
|
|
return;
|
|
}
|
|
|
|
auto s1 = ::getCollection(current)->shardIds();
|
|
auto s2 = ::getCollection(setter)->shardIds();
|
|
|
|
if (s1->size() != s2->size()) {
|
|
// different number of shard ids... should not happen if we have a prototype
|
|
return;
|
|
}
|
|
|
|
// find matching shard key
|
|
for (size_t i = 0; i < s1->size(); ++i) {
|
|
if ((*s1)[i] == shardId) {
|
|
::restrictToShard(setter, (*s2)[i]);
|
|
break;
|
|
}
|
|
}
|
|
};
|
|
|
|
for (auto& node : nodes) {
|
|
TRI_ASSERT(node->getType() == ExecutionNode::REMOTE);
|
|
ExecutionNode* current = node->getFirstDependency();
|
|
|
|
while (current != nullptr) {
|
|
auto const currentType = current->getType();
|
|
if (currentType == ExecutionNode::INSERT || currentType == ExecutionNode::UPDATE ||
|
|
currentType == ExecutionNode::REPLACE || currentType == ExecutionNode::REMOVE) {
|
|
auto collection =
|
|
ExecutionNode::castTo<ModificationNode const*>(current)->collection();
|
|
std::string shardId = ::getSingleShardId(plan.get(), current, collection);
|
|
|
|
if (!shardId.empty()) {
|
|
wasModified = true;
|
|
// we are on a single shard. we must not ignore not-found documents
|
|
// now
|
|
auto* modNode = ExecutionNode::castTo<ModificationNode*>(current);
|
|
modNode->getOptions().ignoreDocumentNotFound = false;
|
|
modNode->restrictToShard(shardId);
|
|
modificationRestrictions[collection].emplace(shardId);
|
|
|
|
auto const& deps = current->getDependencies();
|
|
if (deps.size() && deps[0]->getType() == ExecutionNode::REMOTE) {
|
|
// if we can apply the single-shard optimization, but still have a
|
|
// REMOTE node in front of us, we can probably move the remote parts
|
|
// of the query to our side. this is only the case if the remote
|
|
// part does not call any remote parts itself
|
|
arangodb::HashSet<ExecutionNode*> toRemove;
|
|
|
|
auto c = deps[0];
|
|
toRemove.emplace(c);
|
|
while (true) {
|
|
if (c->getType() == EN::SCATTER || c->getType() == EN::DISTRIBUTE) {
|
|
toRemove.emplace(c);
|
|
}
|
|
c = c->getFirstDependency();
|
|
|
|
if (c == nullptr) {
|
|
// reached the end
|
|
break;
|
|
}
|
|
|
|
if (c->getType() == EN::REMOTE || c->getType() == EN::SUBQUERY) {
|
|
toRemove.clear();
|
|
break;
|
|
}
|
|
|
|
if (c->getType() == EN::CALCULATION) {
|
|
auto cn = ExecutionNode::castTo<CalculationNode const*>(c);
|
|
auto expr = cn->expression();
|
|
if (expr != nullptr && !expr->canRunOnDBServer()) {
|
|
// found something that must not run on a DB server,
|
|
// but that must run on a coordinator. stop optimization here!
|
|
toRemove.clear();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (auto const& it : toRemove) {
|
|
toUnlink.emplace(it);
|
|
}
|
|
}
|
|
}
|
|
} else if (currentType == ExecutionNode::INDEX ||
|
|
currentType == ExecutionNode::ENUMERATE_COLLECTION) {
|
|
auto collection = ::getCollection(current);
|
|
auto collectionVariable = ::getOutVariable(current);
|
|
std::string shardId = finder.getShard(collectionVariable);
|
|
|
|
if (finder.isSafeForOptimization(collectionVariable) && !shardId.empty()) {
|
|
wasModified = true;
|
|
::restrictToShard(current, shardId);
|
|
forwardRestrictionToPrototype(current, shardId);
|
|
} else if (finder.isSafeForOptimization(collection)) {
|
|
auto& shards = modificationRestrictions[collection];
|
|
if (shards.size() == 1) {
|
|
wasModified = true;
|
|
shardId = *shards.begin();
|
|
::restrictToShard(current, shardId);
|
|
forwardRestrictionToPrototype(current, shardId);
|
|
}
|
|
}
|
|
} else if (currentType == ExecutionNode::UPSERT || currentType == ExecutionNode::REMOTE ||
|
|
currentType == ExecutionNode::DISTRIBUTE ||
|
|
currentType == ExecutionNode::SINGLETON) {
|
|
// we reached a new snippet or the end of the plan - we can abort
|
|
// searching now. additionally, we cannot yet handle UPSERT well
|
|
break;
|
|
}
|
|
|
|
current = current->getFirstDependency();
|
|
}
|
|
}
|
|
|
|
if (!toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, wasModified);
|
|
}
|
|
|
|
/// WalkerWorker for undistributeRemoveAfterEnumColl
|
|
class RemoveToEnumCollFinder final : public WalkerWorker<ExecutionNode> {
|
|
ExecutionPlan* _plan;
|
|
arangodb::HashSet<ExecutionNode*>& _toUnlink;
|
|
bool _remove;
|
|
bool _scatter;
|
|
bool _gather;
|
|
ExecutionNode* _enumColl;
|
|
ExecutionNode* _setter;
|
|
const Variable* _variable;
|
|
ExecutionNode* _lastNode;
|
|
|
|
public:
|
|
RemoveToEnumCollFinder(ExecutionPlan* plan, arangodb::HashSet<ExecutionNode*>& toUnlink)
|
|
: _plan(plan),
|
|
_toUnlink(toUnlink),
|
|
_remove(false),
|
|
_scatter(false),
|
|
_gather(false),
|
|
_enumColl(nullptr),
|
|
_setter(nullptr),
|
|
_variable(nullptr),
|
|
_lastNode(nullptr) {}
|
|
|
|
~RemoveToEnumCollFinder() {}
|
|
|
|
bool before(ExecutionNode* en) override final {
|
|
switch (en->getType()) {
|
|
case EN::REMOVE: {
|
|
if (_remove) {
|
|
break;
|
|
}
|
|
|
|
// find the variable we are removing . . .
|
|
auto rn = ExecutionNode::castTo<RemoveNode*>(en);
|
|
Variable const* toRemove = rn->inVariable();
|
|
|
|
_setter = _plan->getVarSetBy(rn->inVariable()->id);
|
|
TRI_ASSERT(_setter != nullptr);
|
|
auto enumColl = _setter;
|
|
|
|
if (_setter->getType() == EN::CALCULATION) {
|
|
// this should be an attribute access for _key
|
|
auto cn = ExecutionNode::castTo<CalculationNode*>(_setter);
|
|
|
|
auto expr = cn->expression();
|
|
if (expr->isAttributeAccess()) {
|
|
// check the variable is the same as the remove variable
|
|
if (cn->outVariable() != rn->inVariable()) {
|
|
break; // abort . . .
|
|
}
|
|
// check the remove node's collection is sharded over _key
|
|
std::vector<std::string> shardKeys = rn->collection()->shardKeys(false);
|
|
if (shardKeys.size() != 1 || shardKeys[0] != StaticStrings::KeyString) {
|
|
break; // abort . . .
|
|
}
|
|
|
|
// set the varsToRemove to the variable in the expression of this
|
|
// node and also define enumColl
|
|
arangodb::HashSet<Variable const*> varsToRemove;
|
|
cn->getVariablesUsedHere(varsToRemove);
|
|
TRI_ASSERT(varsToRemove.size() == 1);
|
|
toRemove = *(varsToRemove.begin());
|
|
enumColl = _plan->getVarSetBy(toRemove->id);
|
|
TRI_ASSERT(_setter != nullptr);
|
|
} else if (expr->node() && expr->node()->isObject()) {
|
|
auto n = expr->node();
|
|
|
|
if (n == nullptr) {
|
|
break;
|
|
}
|
|
|
|
// note for which shard keys we need to look for
|
|
auto shardKeys = rn->collection()->shardKeys(false);
|
|
std::unordered_set<std::string> toFind;
|
|
for (auto const& it : shardKeys) {
|
|
toFind.emplace(it);
|
|
}
|
|
// for REMOVE, we must also know the _key value, otherwise
|
|
// REMOVE will not work
|
|
toFind.emplace(StaticStrings::KeyString);
|
|
|
|
// go through the input object attribute by attribute
|
|
// and look for our shard keys
|
|
Variable const* lastVariable = nullptr;
|
|
bool doOptimize = true;
|
|
|
|
for (size_t i = 0; i < n->numMembers(); ++i) {
|
|
auto sub = n->getMember(i);
|
|
|
|
if (sub->type != NODE_TYPE_OBJECT_ELEMENT) {
|
|
continue;
|
|
}
|
|
|
|
auto it = toFind.find(sub->getString());
|
|
|
|
if (it != toFind.end()) {
|
|
// we found one of the shard keys!
|
|
// remove the attribute from our to-do list
|
|
auto value = sub->getMember(0);
|
|
|
|
if (value->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
|
|
// check if all values for the shard keys are referring to the
|
|
// same FOR loop variable
|
|
auto var = value->getMember(0);
|
|
if (var->type == NODE_TYPE_REFERENCE) {
|
|
auto accessedVariable = static_cast<Variable const*>(var->getData());
|
|
|
|
if (lastVariable == nullptr) {
|
|
lastVariable = accessedVariable;
|
|
} else if (lastVariable != accessedVariable) {
|
|
doOptimize = false;
|
|
break;
|
|
}
|
|
|
|
toFind.erase(it);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!toFind.empty() || !doOptimize || lastVariable == nullptr) {
|
|
// not all shard keys covered, or different source variables in
|
|
// use
|
|
break;
|
|
}
|
|
|
|
TRI_ASSERT(lastVariable != nullptr);
|
|
enumColl = _plan->getVarSetBy(lastVariable->id);
|
|
} else {
|
|
// cannot optimize this type of input
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (enumColl->getType() != EN::ENUMERATE_COLLECTION &&
|
|
enumColl->getType() != EN::INDEX) {
|
|
break; // abort . . .
|
|
}
|
|
|
|
if (enumColl->getType() == EN::ENUMERATE_COLLECTION &&
|
|
!dynamic_cast<DocumentProducingNode const*>(enumColl)->projections().empty()) {
|
|
// cannot handle projections yet
|
|
break;
|
|
}
|
|
|
|
_enumColl = enumColl;
|
|
|
|
if (::getCollection(_enumColl) != rn->collection()) {
|
|
break; // abort . . .
|
|
}
|
|
|
|
_variable = toRemove; // the variable we'll remove
|
|
_remove = true;
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
case EN::REMOTE: {
|
|
_toUnlink.emplace(en);
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
case EN::DISTRIBUTE:
|
|
case EN::SCATTER: {
|
|
if (_scatter) { // met more than one scatter node
|
|
break; // abort . . .
|
|
}
|
|
_scatter = true;
|
|
_toUnlink.emplace(en);
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
case EN::GATHER: {
|
|
if (_gather) { // met more than one gather node
|
|
break; // abort . . .
|
|
}
|
|
_gather = true;
|
|
_toUnlink.emplace(en);
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
case EN::FILTER: {
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
case EN::CALCULATION: {
|
|
TRI_ASSERT(_setter != nullptr);
|
|
if (_setter->getType() == EN::CALCULATION && _setter->id() == en->id()) {
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
if (_lastNode == nullptr || _lastNode->getType() != EN::FILTER) {
|
|
// doesn't match the last filter node
|
|
break; // abort . . .
|
|
}
|
|
auto cn = ExecutionNode::castTo<CalculationNode const*>(en);
|
|
auto fn = ExecutionNode::castTo<FilterNode const*>(_lastNode);
|
|
|
|
// check these are a Calc-Filter pair
|
|
if (cn->outVariable() != fn->inVariable()) {
|
|
break; // abort . . .
|
|
}
|
|
|
|
// check that we are filtering/calculating something with the variable
|
|
// we are to remove
|
|
arangodb::HashSet<Variable const*> varsUsedHere;
|
|
cn->getVariablesUsedHere(varsUsedHere);
|
|
|
|
if (varsUsedHere.size() != 1 ||
|
|
varsUsedHere.find(_variable) == varsUsedHere.end()) {
|
|
break; // abort . . .
|
|
}
|
|
_lastNode = en;
|
|
return false; // continue . . .
|
|
}
|
|
case EN::ENUMERATE_COLLECTION:
|
|
case EN::INDEX: {
|
|
// check that we are enumerating the variable we are to remove
|
|
// and that we have already seen a remove node
|
|
TRI_ASSERT(_enumColl != nullptr);
|
|
if (en->id() != _enumColl->id()) {
|
|
break;
|
|
}
|
|
return true; // reached the end!
|
|
}
|
|
case EN::SINGLETON:
|
|
case EN::ENUMERATE_LIST:
|
|
case EN::ENUMERATE_IRESEARCH_VIEW:
|
|
case EN::SUBQUERY:
|
|
case EN::COLLECT:
|
|
case EN::INSERT:
|
|
case EN::REPLACE:
|
|
case EN::UPDATE:
|
|
case EN::UPSERT:
|
|
case EN::RETURN:
|
|
case EN::NORESULTS:
|
|
case EN::LIMIT:
|
|
case EN::SORT:
|
|
case EN::TRAVERSAL:
|
|
case EN::K_SHORTEST_PATHS:
|
|
case EN::SHORTEST_PATH: {
|
|
// if we meet any of the above, then we abort . . .
|
|
break;
|
|
}
|
|
|
|
default: {
|
|
// should not reach this point
|
|
TRI_ASSERT(false);
|
|
}
|
|
}
|
|
_toUnlink.clear();
|
|
return true;
|
|
}
|
|
};
|
|
|
|
/// @brief recognizes that a RemoveNode can be moved to the shards.
|
|
void arangodb::aql::undistributeRemoveAfterEnumCollRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::REMOVE, true);
|
|
|
|
arangodb::HashSet<ExecutionNode*> toUnlink;
|
|
|
|
for (auto& n : nodes) {
|
|
RemoveToEnumCollFinder finder(plan.get(), toUnlink);
|
|
n->walk(finder);
|
|
}
|
|
|
|
bool modified = false;
|
|
if (!toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
modified = true;
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief auxilliary struct for finding common nodes in OR conditions
|
|
struct CommonNodeFinder {
|
|
std::vector<AstNode const*> possibleNodes;
|
|
|
|
bool find(AstNode const* node, AstNodeType condition,
|
|
AstNode const*& commonNode, std::string& commonName) {
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_OR) {
|
|
return (find(node->getMember(0), condition, commonNode, commonName) &&
|
|
find(node->getMember(1), condition, commonNode, commonName));
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_VALUE) {
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
|
|
if (node->type == condition ||
|
|
(condition != NODE_TYPE_OPERATOR_BINARY_EQ &&
|
|
(node->type == NODE_TYPE_OPERATOR_BINARY_LE || node->type == NODE_TYPE_OPERATOR_BINARY_LT ||
|
|
node->type == NODE_TYPE_OPERATOR_BINARY_GE || node->type == NODE_TYPE_OPERATOR_BINARY_GT ||
|
|
node->type == NODE_TYPE_OPERATOR_BINARY_IN))) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
bool const isIn = (node->type == NODE_TYPE_OPERATOR_BINARY_IN && rhs->isArray());
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_IN && rhs->type == NODE_TYPE_EXPANSION) {
|
|
// ooh, cannot optimize this (yet)
|
|
possibleNodes.clear();
|
|
return false;
|
|
}
|
|
|
|
if (!isIn && lhs->isConstant()) {
|
|
commonNode = rhs;
|
|
commonName = commonNode->toString();
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
|
|
if (rhs->isConstant()) {
|
|
commonNode = lhs;
|
|
commonName = commonNode->toString();
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
|
|
if (rhs->type == NODE_TYPE_FCALL || rhs->type == NODE_TYPE_FCALL_USER ||
|
|
rhs->type == NODE_TYPE_REFERENCE) {
|
|
commonNode = lhs;
|
|
commonName = commonNode->toString();
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
|
|
if (!isIn && (lhs->type == NODE_TYPE_FCALL || lhs->type == NODE_TYPE_FCALL_USER ||
|
|
lhs->type == NODE_TYPE_REFERENCE)) {
|
|
commonNode = rhs;
|
|
commonName = commonNode->toString();
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
|
|
if (!isIn && (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
|
|
lhs->type == NODE_TYPE_INDEXED_ACCESS)) {
|
|
if (possibleNodes.size() == 2) {
|
|
for (size_t i = 0; i < 2; i++) {
|
|
if (lhs->toString() == possibleNodes[i]->toString()) {
|
|
commonNode = possibleNodes[i];
|
|
commonName = commonNode->toString();
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
}
|
|
// don't return, must consider the other side of the condition
|
|
} else {
|
|
possibleNodes.emplace_back(lhs);
|
|
}
|
|
}
|
|
if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS || rhs->type == NODE_TYPE_INDEXED_ACCESS) {
|
|
if (possibleNodes.size() == 2) {
|
|
for (size_t i = 0; i < 2; i++) {
|
|
if (rhs->toString() == possibleNodes[i]->toString()) {
|
|
commonNode = possibleNodes[i];
|
|
commonName = commonNode->toString();
|
|
possibleNodes.clear();
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
} else {
|
|
possibleNodes.emplace_back(rhs);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
possibleNodes.clear();
|
|
return (!commonName.empty());
|
|
}
|
|
};
|
|
|
|
/// @brief auxilliary struct for the OR-to-IN conversion
|
|
struct OrSimplifier {
|
|
Ast* ast;
|
|
ExecutionPlan* plan;
|
|
|
|
OrSimplifier(Ast* ast, ExecutionPlan* plan) : ast(ast), plan(plan) {}
|
|
|
|
std::string stringifyNode(AstNode const* node) const {
|
|
try {
|
|
return node->toString();
|
|
} catch (...) {
|
|
}
|
|
return std::string();
|
|
}
|
|
|
|
bool qualifies(AstNode const* node, std::string& attributeName) const {
|
|
if (node->isConstant()) {
|
|
return false;
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
|
|
node->type == NODE_TYPE_INDEXED_ACCESS || node->type == NODE_TYPE_REFERENCE) {
|
|
attributeName = stringifyNode(node);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool detect(AstNode const* node, bool preferRight, std::string& attributeName,
|
|
AstNode const*& attr, AstNode const*& value) const {
|
|
attributeName.clear();
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
if (!preferRight && qualifies(lhs, attributeName)) {
|
|
if (rhs->isDeterministic()) {
|
|
attr = lhs;
|
|
value = rhs;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
if (qualifies(rhs, attributeName)) {
|
|
if (lhs->isDeterministic()) {
|
|
attr = rhs;
|
|
value = lhs;
|
|
return true;
|
|
}
|
|
}
|
|
// intentionally falls through
|
|
} else if (node->type == NODE_TYPE_OPERATOR_BINARY_IN) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
if (rhs->isArray() && qualifies(lhs, attributeName)) {
|
|
if (rhs->isDeterministic()) {
|
|
attr = lhs;
|
|
value = rhs;
|
|
return true;
|
|
}
|
|
}
|
|
// intentionally falls through
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
AstNode* buildValues(AstNode const* attr, AstNode const* lhs, bool leftIsArray,
|
|
AstNode const* rhs, bool rightIsArray) const {
|
|
auto values = ast->createNodeArray();
|
|
if (leftIsArray) {
|
|
size_t const n = lhs->numMembers();
|
|
for (size_t i = 0; i < n; ++i) {
|
|
values->addMember(lhs->getMemberUnchecked(i));
|
|
}
|
|
} else {
|
|
values->addMember(lhs);
|
|
}
|
|
|
|
if (rightIsArray) {
|
|
size_t const n = rhs->numMembers();
|
|
for (size_t i = 0; i < n; ++i) {
|
|
values->addMember(rhs->getMemberUnchecked(i));
|
|
}
|
|
} else {
|
|
values->addMember(rhs);
|
|
}
|
|
|
|
return ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_IN, attr, values);
|
|
}
|
|
|
|
AstNode* simplify(AstNode const* node) const {
|
|
if (node == nullptr) {
|
|
return nullptr;
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_OR) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
auto lhsNew = simplify(lhs);
|
|
auto rhsNew = simplify(rhs);
|
|
|
|
if (lhs != lhsNew || rhs != rhsNew) {
|
|
// create a modified node
|
|
node = ast->createNodeBinaryOperator(node->type, lhsNew, rhsNew);
|
|
}
|
|
|
|
if ((lhsNew->type == NODE_TYPE_OPERATOR_BINARY_EQ || lhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN) &&
|
|
(rhsNew->type == NODE_TYPE_OPERATOR_BINARY_EQ ||
|
|
rhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN)) {
|
|
std::string leftName;
|
|
std::string rightName;
|
|
AstNode const* leftAttr = nullptr;
|
|
AstNode const* rightAttr = nullptr;
|
|
AstNode const* leftValue = nullptr;
|
|
AstNode const* rightValue = nullptr;
|
|
|
|
for (size_t i = 0; i < 4; ++i) {
|
|
if (detect(lhsNew, i >= 2, leftName, leftAttr, leftValue) &&
|
|
detect(rhsNew, i % 2 == 0, rightName, rightAttr, rightValue) &&
|
|
leftName == rightName) {
|
|
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> tmp1;
|
|
|
|
if (leftValue->isAttributeAccessForVariable(tmp1)) {
|
|
bool qualifies = false;
|
|
auto setter = plan->getVarSetBy(tmp1.first->id);
|
|
if (setter != nullptr && setter->getType() == EN::ENUMERATE_COLLECTION) {
|
|
qualifies = true;
|
|
}
|
|
|
|
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> tmp2;
|
|
|
|
if (qualifies && rightValue->isAttributeAccessForVariable(tmp2)) {
|
|
auto setter = plan->getVarSetBy(tmp2.first->id);
|
|
if (setter != nullptr && setter->getType() == EN::ENUMERATE_COLLECTION) {
|
|
if (tmp1.first != tmp2.first || tmp1.second != tmp2.second) {
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return buildValues(leftAttr, leftValue,
|
|
lhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN, rightValue,
|
|
rhsNew->type == NODE_TYPE_OPERATOR_BINARY_IN);
|
|
}
|
|
}
|
|
}
|
|
|
|
// return node as is
|
|
return const_cast<AstNode*>(node);
|
|
}
|
|
|
|
if (node->type == NODE_TYPE_OPERATOR_BINARY_AND) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
auto lhsNew = simplify(lhs);
|
|
auto rhsNew = simplify(rhs);
|
|
|
|
if (lhs != lhsNew || rhs != rhsNew) {
|
|
// return a modified node
|
|
return ast->createNodeBinaryOperator(node->type, lhsNew, rhsNew);
|
|
}
|
|
|
|
// intentionally falls through
|
|
}
|
|
|
|
return const_cast<AstNode*>(node);
|
|
}
|
|
};
|
|
|
|
/// @brief this rule replaces expressions of the type:
|
|
/// x.val == 1 || x.val == 2 || x.val == 3
|
|
// with
|
|
// x.val IN [1,2,3]
|
|
// when the OR conditions are present in the same FILTER node, and refer to the
|
|
// same (single) attribute.
|
|
void arangodb::aql::replaceOrWithInRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::FILTER, true);
|
|
|
|
bool modified = false;
|
|
for (auto const& n : nodes) {
|
|
TRI_ASSERT(n->hasDependency());
|
|
|
|
auto const dep = n->getFirstDependency();
|
|
|
|
if (dep->getType() != EN::CALCULATION) {
|
|
continue;
|
|
}
|
|
|
|
auto fn = ExecutionNode::castTo<FilterNode const*>(n);
|
|
auto cn = ExecutionNode::castTo<CalculationNode*>(dep);
|
|
auto outVar = cn->outVariable();
|
|
|
|
if (outVar != fn->inVariable()) {
|
|
continue;
|
|
}
|
|
|
|
auto root = cn->expression()->node();
|
|
|
|
OrSimplifier simplifier(plan->getAst(), plan.get());
|
|
auto newRoot = simplifier.simplify(root);
|
|
|
|
if (newRoot != root) {
|
|
ExecutionNode* newNode = nullptr;
|
|
Expression* expr = new Expression(plan.get(), plan->getAst(), newRoot);
|
|
|
|
try {
|
|
TRI_IF_FAILURE("OptimizerRules::replaceOrWithInRuleOom") {
|
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
|
}
|
|
|
|
newNode = new CalculationNode(plan.get(), plan->nextId(), expr, outVar);
|
|
} catch (...) {
|
|
delete expr;
|
|
throw;
|
|
}
|
|
|
|
plan->registerNode(newNode);
|
|
plan->replaceNode(cn, newNode);
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
struct RemoveRedundantOr {
|
|
AstNode const* bestValue = nullptr;
|
|
AstNodeType comparison;
|
|
bool inclusive;
|
|
bool isComparisonSet = false;
|
|
CommonNodeFinder finder;
|
|
AstNode const* commonNode = nullptr;
|
|
std::string commonName;
|
|
|
|
bool hasRedundantCondition(AstNode const* node) {
|
|
try {
|
|
if (finder.find(node, NODE_TYPE_OPERATOR_BINARY_LT, commonNode, commonName)) {
|
|
return hasRedundantConditionWalker(node);
|
|
}
|
|
} catch (...) {
|
|
// ignore errors and simply return false
|
|
}
|
|
return false;
|
|
}
|
|
|
|
AstNode* createReplacementNode(Ast* ast) {
|
|
TRI_ASSERT(commonNode != nullptr);
|
|
TRI_ASSERT(bestValue != nullptr);
|
|
TRI_ASSERT(isComparisonSet == true);
|
|
return ast->createNodeBinaryOperator(comparison, commonNode->clone(ast), bestValue);
|
|
}
|
|
|
|
private:
|
|
bool isInclusiveBound(AstNodeType type) {
|
|
return (type == NODE_TYPE_OPERATOR_BINARY_GE || type == NODE_TYPE_OPERATOR_BINARY_LE);
|
|
}
|
|
|
|
int isCompatibleBound(AstNodeType type, AstNode const* value) {
|
|
if ((comparison == NODE_TYPE_OPERATOR_BINARY_LE || comparison == NODE_TYPE_OPERATOR_BINARY_LT) &&
|
|
(type == NODE_TYPE_OPERATOR_BINARY_LE || type == NODE_TYPE_OPERATOR_BINARY_LT)) {
|
|
return -1; // high bound
|
|
} else if ((comparison == NODE_TYPE_OPERATOR_BINARY_GE || comparison == NODE_TYPE_OPERATOR_BINARY_GT) &&
|
|
(type == NODE_TYPE_OPERATOR_BINARY_GE || type == NODE_TYPE_OPERATOR_BINARY_GT)) {
|
|
return 1; // low bound
|
|
}
|
|
return 0; // incompatible bounds
|
|
}
|
|
|
|
// returns false if the existing value is better and true if the input value
|
|
// is better
|
|
bool compareBounds(AstNodeType type, AstNode const* value, int lowhigh) {
|
|
int cmp = CompareAstNodes(bestValue, value, true);
|
|
|
|
if (cmp == 0 && (isInclusiveBound(comparison) != isInclusiveBound(type))) {
|
|
return (isInclusiveBound(type) ? true : false);
|
|
}
|
|
return (cmp * lowhigh == 1);
|
|
}
|
|
|
|
bool hasRedundantConditionWalker(AstNode const* node) {
|
|
AstNodeType type = node->type;
|
|
|
|
if (type == NODE_TYPE_OPERATOR_BINARY_OR) {
|
|
return (hasRedundantConditionWalker(node->getMember(0)) &&
|
|
hasRedundantConditionWalker(node->getMember(1)));
|
|
}
|
|
|
|
if (type == NODE_TYPE_OPERATOR_BINARY_LE || type == NODE_TYPE_OPERATOR_BINARY_LT ||
|
|
type == NODE_TYPE_OPERATOR_BINARY_GE || type == NODE_TYPE_OPERATOR_BINARY_GT) {
|
|
auto lhs = node->getMember(0);
|
|
auto rhs = node->getMember(1);
|
|
|
|
if (hasRedundantConditionWalker(rhs) &&
|
|
!hasRedundantConditionWalker(lhs) && lhs->isConstant()) {
|
|
if (!isComparisonSet) {
|
|
comparison = Ast::ReverseOperator(type);
|
|
bestValue = lhs;
|
|
isComparisonSet = true;
|
|
return true;
|
|
}
|
|
|
|
int lowhigh = isCompatibleBound(Ast::ReverseOperator(type), lhs);
|
|
if (lowhigh == 0) {
|
|
return false;
|
|
}
|
|
|
|
if (compareBounds(type, lhs, lowhigh)) {
|
|
comparison = Ast::ReverseOperator(type);
|
|
bestValue = lhs;
|
|
}
|
|
return true;
|
|
}
|
|
if (hasRedundantConditionWalker(lhs) &&
|
|
!hasRedundantConditionWalker(rhs) && rhs->isConstant()) {
|
|
if (!isComparisonSet) {
|
|
comparison = type;
|
|
bestValue = rhs;
|
|
isComparisonSet = true;
|
|
return true;
|
|
}
|
|
|
|
int lowhigh = isCompatibleBound(type, rhs);
|
|
if (lowhigh == 0) {
|
|
return false;
|
|
}
|
|
|
|
if (compareBounds(type, rhs, lowhigh)) {
|
|
comparison = type;
|
|
bestValue = rhs;
|
|
}
|
|
return true;
|
|
}
|
|
// if hasRedundantConditionWalker(lhs) and
|
|
// hasRedundantConditionWalker(rhs), then one of the conditions in the OR
|
|
// statement is of the form x == x intentionally falls through
|
|
} else if (type == NODE_TYPE_REFERENCE || type == NODE_TYPE_ATTRIBUTE_ACCESS ||
|
|
type == NODE_TYPE_INDEXED_ACCESS) {
|
|
// get a string representation of the node for comparisons
|
|
return (node->toString() == commonName);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
};
|
|
|
|
void arangodb::aql::removeRedundantOrRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::FILTER, true);
|
|
|
|
bool modified = false;
|
|
for (auto const& n : nodes) {
|
|
TRI_ASSERT(n->hasDependency());
|
|
|
|
auto const dep = n->getFirstDependency();
|
|
|
|
if (dep->getType() != EN::CALCULATION) {
|
|
continue;
|
|
}
|
|
|
|
auto fn = ExecutionNode::castTo<FilterNode const*>(n);
|
|
auto cn = ExecutionNode::castTo<CalculationNode*>(dep);
|
|
auto outVar = cn->outVariable();
|
|
|
|
if (outVar != fn->inVariable()) {
|
|
continue;
|
|
}
|
|
if (cn->expression()->node()->type != NODE_TYPE_OPERATOR_BINARY_OR) {
|
|
continue;
|
|
}
|
|
|
|
RemoveRedundantOr remover;
|
|
if (remover.hasRedundantCondition(cn->expression()->node())) {
|
|
ExecutionNode* newNode = nullptr;
|
|
auto astNode = remover.createReplacementNode(plan->getAst());
|
|
|
|
Expression* expr = new Expression(plan.get(), plan->getAst(), astNode);
|
|
|
|
try {
|
|
newNode = new CalculationNode(plan.get(), plan->nextId(), expr, outVar);
|
|
} catch (...) {
|
|
delete expr;
|
|
throw;
|
|
}
|
|
|
|
plan->registerNode(newNode);
|
|
plan->replaceNode(cn, newNode);
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief remove $OLD and $NEW variables from data-modification statements
|
|
/// if not required
|
|
void arangodb::aql::removeDataModificationOutVariablesRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
bool modified = false;
|
|
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, ::removeDataModificationOutVariablesNodeTypes, true);
|
|
|
|
for (auto const& n : nodes) {
|
|
auto node = ExecutionNode::castTo<ModificationNode*>(n);
|
|
TRI_ASSERT(node != nullptr);
|
|
|
|
Variable const* old = node->getOutVariableOld();
|
|
if (!n->isVarUsedLater(old)) {
|
|
// "$OLD" is not used later
|
|
node->clearOutVariableOld();
|
|
modified = true;
|
|
} else {
|
|
switch (n->getType()) {
|
|
case EN::UPDATE:
|
|
case EN::REPLACE: {
|
|
Variable const* inVariable =
|
|
ExecutionNode::castTo<UpdateReplaceNode const*>(n)->inKeyVariable();
|
|
if (inVariable != nullptr) {
|
|
auto setter = plan->getVarSetBy(inVariable->id);
|
|
if (setter != nullptr && (setter->getType() == EN::ENUMERATE_COLLECTION ||
|
|
setter->getType() == EN::INDEX)) {
|
|
std::unordered_map<VariableId, Variable const*> replacements;
|
|
replacements.emplace(old->id, inVariable);
|
|
RedundantCalculationsReplacer finder(plan->getAst(), replacements);
|
|
plan->root()->walk(finder);
|
|
modified = true;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case EN::REMOVE: {
|
|
Variable const* inVariable =
|
|
ExecutionNode::castTo<RemoveNode const*>(n)->inVariable();
|
|
TRI_ASSERT(inVariable != nullptr);
|
|
auto setter = plan->getVarSetBy(inVariable->id);
|
|
if (setter != nullptr && (setter->getType() == EN::ENUMERATE_COLLECTION ||
|
|
setter->getType() == EN::INDEX)) {
|
|
std::unordered_map<VariableId, Variable const*> replacements;
|
|
replacements.emplace(old->id, inVariable);
|
|
RedundantCalculationsReplacer finder(plan->getAst(), replacements);
|
|
plan->root()->walk(finder);
|
|
modified = true;
|
|
}
|
|
break;
|
|
}
|
|
default: {
|
|
// do nothing
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!n->isVarUsedLater(node->getOutVariableNew())) {
|
|
// "$NEW" is not used later
|
|
node->clearOutVariableNew();
|
|
modified = true;
|
|
}
|
|
|
|
if (!n->hasParent()) {
|
|
node->producesResults(false);
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief patch UPDATE statement on single collection that iterates over the
|
|
/// entire collection to operate in batches
|
|
void arangodb::aql::patchUpdateStatementsRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
// no need to dive into subqueries here
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
|
|
StorageEngine* engine = EngineSelectorFeature::ENGINE;
|
|
if (engine->typeName() == "mmfiles") {
|
|
// MMFiles: we can update UPDATE/REPLACE but not REMOVE
|
|
// this is because in MMFiles the iteration over a collection may
|
|
// use the primary index, but a REMOVE may at the same time remove
|
|
// the documents from this index. this would not be safe
|
|
plan->findNodesOfType(nodes, ::patchUpdateStatementsNodeTypes, false);
|
|
} else {
|
|
// other engines: we can update UPDATE/REPLACE as well as REMOVE
|
|
plan->findNodesOfType(nodes, ::patchUpdateRemoveStatementsNodeTypes, false);
|
|
}
|
|
|
|
bool modified = false;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto node = ExecutionNode::castTo<ModificationNode*>(n);
|
|
TRI_ASSERT(node != nullptr);
|
|
|
|
auto& options = node->getOptions();
|
|
if (!options.readCompleteInput) {
|
|
// already ok
|
|
continue;
|
|
}
|
|
|
|
auto const collection = node->collection();
|
|
|
|
auto dep = n->getFirstDependency();
|
|
|
|
while (dep != nullptr) {
|
|
auto const type = dep->getType();
|
|
|
|
if (type == EN::ENUMERATE_LIST ||
|
|
type == EN::ENUMERATE_IRESEARCH_VIEW ||
|
|
type == EN::SUBQUERY) {
|
|
// not suitable
|
|
modified = false;
|
|
break;
|
|
}
|
|
|
|
if (type == EN::ENUMERATE_COLLECTION || type == EN::INDEX) {
|
|
if (::getCollection(dep) == collection) {
|
|
if (modified) {
|
|
// already saw the collection... that means we have seen the same
|
|
// collection two times in two FOR loops
|
|
modified = false;
|
|
// abort
|
|
break;
|
|
}
|
|
TRI_ASSERT(!modified);
|
|
// saw the same collection in FOR as in UPDATE
|
|
if (n->isVarUsedLater(::getOutVariable(dep))) {
|
|
// must abort, because the variable produced by the FOR loop is
|
|
// read after it is updated
|
|
break;
|
|
}
|
|
modified = true;
|
|
}
|
|
} else if (type == EN::TRAVERSAL || type == EN::K_SHORTEST_PATHS || type == EN::SHORTEST_PATH) {
|
|
// unclear what will be read by the traversal
|
|
modified = false;
|
|
break;
|
|
}
|
|
|
|
dep = dep->getFirstDependency();
|
|
}
|
|
|
|
if (modified) {
|
|
options.readCompleteInput = false;
|
|
}
|
|
}
|
|
|
|
// always re-add the original plan, be it modified or not
|
|
// only a flag in the plan will be modified
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief optimizes away unused traversal output variables and
|
|
/// merges filter nodes into graph traversal nodes
|
|
void arangodb::aql::optimizeTraversalsRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> tNodes{a};
|
|
plan->findNodesOfType(tNodes, EN::TRAVERSAL, true);
|
|
|
|
if (tNodes.empty()) {
|
|
// no traversals present
|
|
opt->addPlan(std::move(plan), rule, false);
|
|
return;
|
|
}
|
|
|
|
bool modified = false;
|
|
|
|
// first make a pass over all traversal nodes and remove unused
|
|
// variables from them
|
|
for (auto const& n : tNodes) {
|
|
TraversalNode* traversal = ExecutionNode::castTo<TraversalNode*>(n);
|
|
|
|
// note that we can NOT optimize away the vertex output variable
|
|
// yet, as many traversal internals depend on the number of vertices
|
|
// found/built
|
|
auto outVariable = traversal->edgeOutVariable();
|
|
std::vector<Variable const*> pruneVars;
|
|
traversal->getPruneVariables(pruneVars);
|
|
|
|
if (outVariable != nullptr && !n->isVarUsedLater(outVariable) &&
|
|
std::find(pruneVars.begin(), pruneVars.end(), outVariable) == pruneVars.end()) {
|
|
// traversal edge outVariable not used later
|
|
traversal->setEdgeOutput(nullptr);
|
|
modified = true;
|
|
}
|
|
|
|
outVariable = traversal->pathOutVariable();
|
|
if (outVariable != nullptr && !n->isVarUsedLater(outVariable) &&
|
|
std::find(pruneVars.begin(), pruneVars.end(), outVariable) == pruneVars.end()) {
|
|
// traversal path outVariable not used later
|
|
traversal->setPathOutput(nullptr);
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
if (!tNodes.empty()) {
|
|
// These are all the end nodes where we start
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findEndNodes(nodes, true);
|
|
|
|
for (auto const& n : nodes) {
|
|
TraversalConditionFinder finder(plan.get(), &modified);
|
|
n->walk(finder);
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
// remove filter nodes already covered by a traversal
|
|
void arangodb::aql::removeFiltersCoveredByTraversal(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> fNodes{a};
|
|
plan->findNodesOfType(fNodes, EN::FILTER, true);
|
|
if (fNodes.empty()) {
|
|
// no filters present
|
|
opt->addPlan(std::move(plan), rule, false);
|
|
return;
|
|
}
|
|
|
|
bool modified = false;
|
|
arangodb::HashSet<ExecutionNode*> toUnlink;
|
|
|
|
for (auto const& node : fNodes) {
|
|
auto fn = ExecutionNode::castTo<FilterNode const*>(node);
|
|
// find the node with the filter expression
|
|
auto setter = plan->getVarSetBy(fn->inVariable()->id);
|
|
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
|
|
continue;
|
|
}
|
|
|
|
auto calculationNode = ExecutionNode::castTo<CalculationNode*>(setter);
|
|
auto conditionNode = calculationNode->expression()->node();
|
|
|
|
// build the filter condition
|
|
Condition condition(plan->getAst());
|
|
condition.andCombine(conditionNode);
|
|
condition.normalize(plan.get());
|
|
|
|
if (condition.root() == nullptr) {
|
|
continue;
|
|
}
|
|
|
|
size_t const n = condition.root()->numMembers();
|
|
|
|
if (n != 1) {
|
|
// either no condition or multiple ORed conditions...
|
|
continue;
|
|
}
|
|
|
|
bool handled = false;
|
|
auto current = node;
|
|
while (current != nullptr) {
|
|
if (current->getType() == EN::TRAVERSAL) {
|
|
auto traversalNode = ExecutionNode::castTo<TraversalNode const*>(current);
|
|
|
|
// found a traversal node, now check if the expression
|
|
// is covered by the traversal
|
|
auto traversalCondition = traversalNode->condition();
|
|
|
|
if (traversalCondition != nullptr && !traversalCondition->isEmpty()) {
|
|
/*auto const& indexesUsed = traversalNode->get
|
|
//indexNode->getIndexes();
|
|
|
|
if (indexesUsed.size() == 1) {*/
|
|
// single index. this is something that we can handle
|
|
Variable const* outVariable = traversalNode->pathOutVariable();
|
|
arangodb::HashSet<Variable const*> varsUsedByCondition;
|
|
Ast::getReferencedVariables(condition.root(), varsUsedByCondition);
|
|
if (outVariable != nullptr &&
|
|
varsUsedByCondition.find(outVariable) != varsUsedByCondition.end()) {
|
|
auto newNode =
|
|
condition.removeTraversalCondition(plan.get(), outVariable,
|
|
traversalCondition->root());
|
|
if (newNode == nullptr) {
|
|
// no condition left...
|
|
// FILTER node can be completely removed
|
|
toUnlink.emplace(node);
|
|
// note: we must leave the calculation node intact, in case it is
|
|
// still used by other nodes in the plan
|
|
modified = true;
|
|
handled = true;
|
|
} else if (newNode != condition.root()) {
|
|
// some condition is left, but it is a different one than
|
|
// the one from the FILTER node
|
|
auto expr = std::make_unique<Expression>(plan.get(), plan->getAst(), newNode);
|
|
CalculationNode* cn =
|
|
new CalculationNode(plan.get(), plan->nextId(), expr.get(),
|
|
calculationNode->outVariable());
|
|
expr.release();
|
|
plan->registerNode(cn);
|
|
plan->replaceNode(setter, cn);
|
|
modified = true;
|
|
handled = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (handled) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (handled || current->getType() == EN::LIMIT || !current->hasDependency()) {
|
|
break;
|
|
}
|
|
current = current->getFirstDependency();
|
|
}
|
|
}
|
|
|
|
if (!toUnlink.empty()) {
|
|
plan->unlinkNodes(toUnlink);
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief removes redundant path variables, after applying
|
|
/// `removeFiltersCoveredByTraversal`. Should significantly reduce overhead
|
|
void arangodb::aql::removeTraversalPathVariable(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> tNodes{a};
|
|
plan->findNodesOfType(tNodes, EN::TRAVERSAL, true);
|
|
|
|
bool modified = false;
|
|
// first make a pass over all traversal nodes and remove unused
|
|
// variables from them
|
|
for (auto const& n : tNodes) {
|
|
TraversalNode* traversal = ExecutionNode::castTo<TraversalNode*>(n);
|
|
|
|
std::vector<Variable const*> pruneVars;
|
|
traversal->getPruneVariables(pruneVars);
|
|
auto outVariable = traversal->pathOutVariable();
|
|
if (outVariable != nullptr && !n->isVarUsedLater(outVariable) &&
|
|
std::find(pruneVars.begin(), pruneVars.end(), outVariable) == pruneVars.end()) {
|
|
// traversal path outVariable not used later
|
|
traversal->setPathOutput(nullptr);
|
|
modified = true;
|
|
}
|
|
}
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
/// @brief prepares traversals for execution (hidden rule)
|
|
void arangodb::aql::prepareTraversalsRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> tNodes{a};
|
|
plan->findNodesOfType(tNodes, EN::TRAVERSAL, true);
|
|
plan->findNodesOfType(tNodes, EN::K_SHORTEST_PATHS, true);
|
|
plan->findNodesOfType(tNodes, EN::SHORTEST_PATH, true);
|
|
|
|
if (tNodes.empty()) {
|
|
// no traversals present
|
|
opt->addPlan(std::move(plan), rule, false);
|
|
return;
|
|
}
|
|
|
|
// first make a pass over all traversal nodes and remove unused
|
|
// variables from them
|
|
for (auto const& n : tNodes) {
|
|
if (n->getType() == EN::TRAVERSAL) {
|
|
TraversalNode* traversal = ExecutionNode::castTo<TraversalNode*>(n);
|
|
traversal->prepareOptions();
|
|
} else if(n->getType() == EN::K_SHORTEST_PATHS) {
|
|
TRI_ASSERT(n->getType() == EN::K_SHORTEST_PATHS);
|
|
KShortestPathsNode* spn = ExecutionNode::castTo<KShortestPathsNode*>(n);
|
|
spn->prepareOptions();
|
|
} else {
|
|
TRI_ASSERT(n->getType() == EN::SHORTEST_PATH);
|
|
ShortestPathNode* spn = ExecutionNode::castTo<ShortestPathNode*>(n);
|
|
spn->prepareOptions();
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, true);
|
|
}
|
|
|
|
/// @brief pulls out simple subqueries and merges them with the level above
|
|
///
|
|
/// For example, if we have the input query
|
|
///
|
|
/// FOR x IN (
|
|
/// FOR y IN collection FILTER y.value >= 5 RETURN y.test
|
|
/// )
|
|
/// RETURN x.a
|
|
///
|
|
/// then this rule will transform it into:
|
|
///
|
|
/// FOR tmp IN collection
|
|
/// FILTER tmp.value >= 5
|
|
/// LET x = tmp.test
|
|
/// RETURN x.a
|
|
void arangodb::aql::inlineSubqueriesRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::SUBQUERY, true);
|
|
|
|
if (nodes.empty()) {
|
|
opt->addPlan(std::move(plan), rule, false);
|
|
return;
|
|
}
|
|
|
|
bool modified = false;
|
|
std::vector<ExecutionNode*> subNodes;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto subqueryNode = ExecutionNode::castTo<SubqueryNode*>(n);
|
|
|
|
if (subqueryNode->isModificationSubquery()) {
|
|
// can't modify modifying subqueries
|
|
continue;
|
|
}
|
|
|
|
if (!subqueryNode->isDeterministic()) {
|
|
// can't inline non-deterministic subqueries
|
|
continue;
|
|
}
|
|
|
|
// check if subquery contains a COLLECT node with an INTO variable
|
|
bool eligible = true;
|
|
bool containsLimitOrSort = false;
|
|
auto current = subqueryNode->getSubquery();
|
|
TRI_ASSERT(current != nullptr);
|
|
|
|
while (current != nullptr) {
|
|
if (current->getType() == EN::COLLECT) {
|
|
if (ExecutionNode::castTo<CollectNode const*>(current)->hasOutVariable()) {
|
|
eligible = false;
|
|
break;
|
|
}
|
|
} else if (current->getType() == EN::LIMIT || current->getType() == EN::SORT) {
|
|
containsLimitOrSort = true;
|
|
}
|
|
current = current->getFirstDependency();
|
|
}
|
|
|
|
if (!eligible) {
|
|
continue;
|
|
}
|
|
|
|
Variable const* out = subqueryNode->outVariable();
|
|
TRI_ASSERT(out != nullptr);
|
|
// the subquery outvariable and all its aliases
|
|
arangodb::HashSet<Variable const*> subqueryVars;
|
|
subqueryVars.emplace(out);
|
|
|
|
// the potential calculation nodes that produce the aliases
|
|
std::vector<ExecutionNode*> aliasNodesToRemoveLater;
|
|
|
|
arangodb::HashSet<Variable const*> varsUsed;
|
|
|
|
current = n->getFirstParent();
|
|
// now check where the subquery is used
|
|
while (current->hasParent()) {
|
|
if (current->getType() == EN::ENUMERATE_LIST) {
|
|
if (current->isInInnerLoop() && containsLimitOrSort) {
|
|
// exit the loop
|
|
current = nullptr;
|
|
break;
|
|
}
|
|
|
|
// we're only interested in FOR loops...
|
|
auto listNode = ExecutionNode::castTo<EnumerateListNode*>(current);
|
|
|
|
// ...that use our subquery as its input
|
|
if (subqueryVars.find(listNode->inVariable()) != subqueryVars.end()) {
|
|
// bingo!
|
|
|
|
// check if the subquery result variable or any of the aliases are
|
|
// used after the FOR loop
|
|
bool mustAbort = false;
|
|
for (auto const& itSub : subqueryVars) {
|
|
if (listNode->isVarUsedLater(itSub)) {
|
|
// exit the loop
|
|
current = nullptr;
|
|
mustAbort = true;
|
|
break;
|
|
}
|
|
}
|
|
if (mustAbort) {
|
|
break;
|
|
}
|
|
|
|
for (auto const& toRemove : aliasNodesToRemoveLater) {
|
|
plan->unlinkNode(toRemove, false);
|
|
}
|
|
|
|
subNodes.clear();
|
|
subNodes.reserve(4);
|
|
subqueryNode->getSubquery()->getDependencyChain(subNodes, true);
|
|
TRI_ASSERT(!subNodes.empty());
|
|
auto returnNode = ExecutionNode::castTo<ReturnNode*>(subNodes[0]);
|
|
TRI_ASSERT(returnNode->getType() == EN::RETURN);
|
|
|
|
modified = true;
|
|
auto queryVariables = plan->getAst()->variables();
|
|
auto previous = n->getFirstDependency();
|
|
auto insert = n->getFirstParent();
|
|
TRI_ASSERT(insert != nullptr);
|
|
|
|
// unlink the original SubqueryNode
|
|
plan->unlinkNode(n, false);
|
|
|
|
for (auto& it : subNodes) {
|
|
// first unlink them all
|
|
plan->unlinkNode(it, true);
|
|
|
|
if (it->getType() == EN::SINGLETON) {
|
|
// reached the singleton node already. that means we can stop
|
|
break;
|
|
}
|
|
|
|
// and now insert them one level up
|
|
if (it != returnNode) {
|
|
// we skip over the subquery's return node. we don't need it
|
|
// anymore
|
|
insert->removeDependencies();
|
|
TRI_ASSERT(it != nullptr);
|
|
insert->addDependency(it);
|
|
insert = it;
|
|
|
|
// additionally rename the variables from the subquery so they
|
|
// cannot conflict with the ones from the top query
|
|
for (auto const& variable : it->getVariablesSetHere()) {
|
|
queryVariables->renameVariable(variable->id);
|
|
}
|
|
}
|
|
}
|
|
|
|
// link the top node in the subquery with the original plan
|
|
if (previous != nullptr) {
|
|
insert->addDependency(previous);
|
|
}
|
|
|
|
// remove the list node from the plan
|
|
plan->unlinkNode(listNode, false);
|
|
|
|
queryVariables->renameVariable(returnNode->inVariable()->id,
|
|
listNode->outVariable()->name);
|
|
|
|
// finally replace the variables
|
|
std::unordered_map<VariableId, Variable const*> replacements;
|
|
replacements.emplace(listNode->outVariable()->id, returnNode->inVariable());
|
|
RedundantCalculationsReplacer finder(plan->getAst(), replacements);
|
|
plan->root()->walk(finder);
|
|
|
|
plan->clearVarUsageComputed();
|
|
plan->findVarUsage();
|
|
|
|
// abort optimization
|
|
current = nullptr;
|
|
}
|
|
} else if (current->getType() == EN::CALCULATION) {
|
|
auto rootNode =
|
|
ExecutionNode::castTo<CalculationNode*>(current)->expression()->node();
|
|
if (rootNode->type == NODE_TYPE_REFERENCE) {
|
|
if (subqueryVars.find(static_cast<Variable const*>(rootNode->getData())) !=
|
|
subqueryVars.end()) {
|
|
// found an alias for the subquery variable
|
|
subqueryVars.emplace(
|
|
ExecutionNode::castTo<CalculationNode*>(current)->outVariable());
|
|
aliasNodesToRemoveLater.emplace_back(current);
|
|
current = current->getFirstParent();
|
|
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (current == nullptr) {
|
|
break;
|
|
}
|
|
|
|
varsUsed.clear();
|
|
current->getVariablesUsedHere(varsUsed);
|
|
|
|
bool mustAbort = false;
|
|
for (auto const& itSub : subqueryVars) {
|
|
if (varsUsed.find(itSub) != varsUsed.end()) {
|
|
// we found another node that uses the subquery variable
|
|
// we need to stop the optimization attempts here
|
|
mustAbort = true;
|
|
break;
|
|
}
|
|
}
|
|
if (mustAbort) {
|
|
break;
|
|
}
|
|
|
|
current = current->getFirstParent();
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|
|
|
|
static bool isValueOrReference(AstNode const* node) {
|
|
return node->type == NODE_TYPE_VALUE || node->type == NODE_TYPE_REFERENCE;
|
|
}
|
|
|
|
/// Essentially mirrors the geo::QueryParams struct, but with
|
|
/// abstracts AstNode value objects
|
|
struct GeoIndexInfo {
|
|
operator bool() const {
|
|
return collectionNodeToReplace != nullptr && collectionNodeOutVar &&
|
|
collection && index && valid;
|
|
}
|
|
void invalidate() { valid = false; }
|
|
|
|
/// node that will be replaced by (geo) IndexNode
|
|
ExecutionNode* collectionNodeToReplace = nullptr;
|
|
Variable const* collectionNodeOutVar = nullptr;
|
|
|
|
/// accessed collection
|
|
aql::Collection const* collection = nullptr;
|
|
/// selected index
|
|
std::shared_ptr<Index> index;
|
|
|
|
/// Filter calculations to modify
|
|
std::map<ExecutionNode*, Expression*> exesToModify;
|
|
std::set<AstNode const*> nodesToRemove;
|
|
|
|
// ============ Distance ============
|
|
AstNode const* distCenterExpr = nullptr;
|
|
AstNode const* distCenterLatExpr = nullptr;
|
|
AstNode const* distCenterLngExpr = nullptr;
|
|
// Expression representing minimum distance
|
|
AstNode const* minDistanceExpr = nullptr;
|
|
// Was operator < or <= used
|
|
bool minInclusive = true;
|
|
// Expression representing maximum distance
|
|
AstNode const* maxDistanceExpr = nullptr;
|
|
// Was operator > or >= used
|
|
bool maxInclusive = true;
|
|
|
|
// ============ Near Info ============
|
|
bool sorted = false;
|
|
/// Default order is from closest to farthest
|
|
bool ascending = true;
|
|
|
|
// ============ Filter Info ===========
|
|
geo::FilterType filterMode = geo::FilterType::NONE;
|
|
/// variable using the filter mask
|
|
AstNode const* filterExpr = nullptr;
|
|
|
|
// ============ Accessed Fields ============
|
|
AstNode const* locationVar = nullptr; // access to location field
|
|
AstNode const* latitudeVar = nullptr; // access path to latitude
|
|
AstNode const* longitudeVar = nullptr; // access path to longitude
|
|
|
|
/// contains this node a valid condition
|
|
bool valid = true;
|
|
};
|
|
|
|
// checks 2 parameters of distance function if they represent a valid access to
|
|
// latitude and longitude attribute of the geo index.
|
|
// distance(a,b,c,d) - possible pairs are (a,b) and (c,d)
|
|
static bool distanceFuncArgCheck(ExecutionPlan* plan, AstNode const* latArg,
|
|
AstNode const* lngArg, bool supportLegacy,
|
|
GeoIndexInfo& info) {
|
|
// note: this only modifies "info" if the function returns true
|
|
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> attributeAccess1;
|
|
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> attributeAccess2;
|
|
// first and second should be based on the same document - need to provide the
|
|
// document in order to see which collection is bound to it and if that
|
|
// collections supports geo-index
|
|
if (!latArg->isAttributeAccessForVariable(attributeAccess1, true) ||
|
|
!lngArg->isAttributeAccessForVariable(attributeAccess2, true)) {
|
|
return false;
|
|
}
|
|
TRI_ASSERT(attributeAccess1.first != nullptr);
|
|
TRI_ASSERT(attributeAccess2.first != nullptr);
|
|
|
|
ExecutionNode* setter1 = plan->getVarSetBy(attributeAccess1.first->id);
|
|
ExecutionNode* setter2 = plan->getVarSetBy(attributeAccess2.first->id);
|
|
if (setter1 == nullptr || setter1 != setter2 || setter1->getType() != EN::ENUMERATE_COLLECTION) {
|
|
return false; // expect access of doc.lat, doc.lng or doc.loc[0],
|
|
// doc.loc[1]
|
|
}
|
|
|
|
// get logical collection
|
|
auto collNode = ExecutionNode::castTo<EnumerateCollectionNode*>(setter1);
|
|
if (info.collectionNodeToReplace != nullptr && info.collectionNodeToReplace != collNode) {
|
|
return false; // should probably never happen
|
|
}
|
|
|
|
// we should not access the LogicalCollection directly
|
|
Query* query = plan->getAst()->query();
|
|
auto indexes = query->trx()->indexesForCollection(collNode->collection()->name());
|
|
// check for suitiable indexes
|
|
for (std::shared_ptr<Index> idx : indexes) {
|
|
// check if current index is a geo-index
|
|
std::size_t fieldNum = idx->fields().size();
|
|
bool isGeo1 = idx->type() == Index::IndexType::TRI_IDX_TYPE_GEO1_INDEX && supportLegacy;
|
|
bool isGeo2 = idx->type() == Index::IndexType::TRI_IDX_TYPE_GEO2_INDEX && supportLegacy;
|
|
bool isGeo = idx->type() == Index::IndexType::TRI_IDX_TYPE_GEO_INDEX;
|
|
|
|
if ((isGeo2 || isGeo) && fieldNum == 2) { // individual fields
|
|
// check access paths of attributes in ast and those in index match
|
|
if (idx->fields()[0] == attributeAccess1.second &&
|
|
idx->fields()[1] == attributeAccess2.second) {
|
|
if (info.index != nullptr && info.index != idx) {
|
|
return false;
|
|
}
|
|
info.index = idx;
|
|
info.latitudeVar = latArg;
|
|
info.longitudeVar = lngArg;
|
|
info.collectionNodeToReplace = collNode;
|
|
info.collectionNodeOutVar = collNode->outVariable();
|
|
info.collection = collNode->collection();
|
|
return true;
|
|
}
|
|
} else if ((isGeo1 || isGeo) && fieldNum == 1) {
|
|
std::vector<basics::AttributeName> fields1 = idx->fields()[0];
|
|
std::vector<basics::AttributeName> fields2 = idx->fields()[0];
|
|
|
|
VPackBuilder builder;
|
|
idx->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics));
|
|
bool geoJson =
|
|
basics::VelocyPackHelper::getBooleanValue(builder.slice(), "geoJson", false);
|
|
|
|
fields1.back().name += geoJson ? "[1]" : "[0]";
|
|
fields2.back().name += geoJson ? "[0]" : "[1]";
|
|
if (fields1 == attributeAccess1.second && fields2 == attributeAccess2.second) {
|
|
if (info.index != nullptr && info.index != idx) {
|
|
return false;
|
|
}
|
|
info.index = idx;
|
|
info.latitudeVar = latArg;
|
|
info.longitudeVar = lngArg;
|
|
info.collectionNodeToReplace = collNode;
|
|
info.collectionNodeOutVar = collNode->outVariable();
|
|
info.collection = collNode->collection();
|
|
return true;
|
|
}
|
|
} // if isGeo 1 or 2
|
|
} // for index in collection
|
|
return false;
|
|
}
|
|
|
|
// checks parameter of GEO_* function
|
|
static bool geoFuncArgCheck(ExecutionPlan* plan, AstNode const* args,
|
|
bool supportLegacy, GeoIndexInfo& info) {
|
|
// note: this only modifies "info" if the function returns true
|
|
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> attributeAccess;
|
|
// "arg" is either `[doc.lat, doc.lng]` or `doc.geometry`
|
|
if (args->isArray() && args->numMembers() == 2) {
|
|
return distanceFuncArgCheck(plan, /*lat*/ args->getMemberUnchecked(1),
|
|
/*lng*/ args->getMemberUnchecked(0), supportLegacy, info);
|
|
} else if (!args->isAttributeAccessForVariable(attributeAccess, true)) {
|
|
return false; // no attribute access, no index check
|
|
}
|
|
TRI_ASSERT(attributeAccess.first != nullptr);
|
|
ExecutionNode* setter = plan->getVarSetBy(attributeAccess.first->id);
|
|
if (setter == nullptr || setter->getType() != EN::ENUMERATE_COLLECTION) {
|
|
return false; // expected access of the for doc.attribute
|
|
}
|
|
|
|
// get logical collection
|
|
auto collNode = ExecutionNode::castTo<EnumerateCollectionNode*>(setter);
|
|
if (info.collectionNodeToReplace != nullptr && info.collectionNodeToReplace != collNode) {
|
|
return false; // should probably never happen
|
|
}
|
|
|
|
// we should not access the LogicalCollection directly
|
|
Query* query = plan->getAst()->query();
|
|
auto indexes = query->trx()->indexesForCollection(collNode->collection()->name());
|
|
// check for suitiable indexes
|
|
for (std::shared_ptr<arangodb::Index> idx : indexes) {
|
|
// check if current index is a geo-index
|
|
bool isGeo = idx->type() == arangodb::Index::IndexType::TRI_IDX_TYPE_GEO_INDEX;
|
|
if (isGeo && idx->fields().size() == 1) { // individual fields
|
|
// check access paths of attributes in ast and those in index match
|
|
if (idx->fields()[0] == attributeAccess.second) {
|
|
if (info.index != nullptr && info.index != idx) {
|
|
return false; // different index
|
|
}
|
|
info.index = idx;
|
|
info.locationVar = args;
|
|
info.collectionNodeToReplace = collNode;
|
|
info.collectionNodeOutVar = collNode->outVariable();
|
|
info.collection = collNode->collection();
|
|
return true;
|
|
}
|
|
}
|
|
} // for index in collection
|
|
return false;
|
|
}
|
|
|
|
/// returns true if left side is same as right or lhs is null
|
|
static bool isValidGeoArg(AstNode const* lhs, AstNode const* rhs) {
|
|
if (lhs == nullptr) { // lhs is from the GeoIndexInfo struct
|
|
return true; // if geoindex field is null everything is valid
|
|
} else if (lhs->type != rhs->type) {
|
|
return false;
|
|
} else if (lhs->isArray()) { // expect `[doc.lng, doc.lat]`
|
|
if (lhs->numMembers() >= 2 && rhs->numMembers() >= 2) {
|
|
return isValidGeoArg(lhs->getMemberUnchecked(0), rhs->getMemberUnchecked(0)) &&
|
|
isValidGeoArg(lhs->getMemberUnchecked(1), rhs->getMemberUnchecked(1));
|
|
}
|
|
return false;
|
|
} else if (lhs->type == NODE_TYPE_REFERENCE) {
|
|
return static_cast<Variable const*>(lhs->getData())->id ==
|
|
static_cast<Variable const*>(rhs->getData())->id;
|
|
}
|
|
// CompareAstNodes does not handle non const attribute access
|
|
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> res1, res2;
|
|
bool acc1 = lhs->isAttributeAccessForVariable(res1, true);
|
|
bool acc2 = rhs->isAttributeAccessForVariable(res2, true);
|
|
if (acc1 || acc2) {
|
|
return acc1 && acc2 && res1 == res2; // same variable same path
|
|
}
|
|
return aql::CompareAstNodes(lhs, rhs, false) == 0;
|
|
}
|
|
|
|
static bool checkDistanceFunc(ExecutionPlan* plan, AstNode const* funcNode,
|
|
bool legacy, GeoIndexInfo& info) {
|
|
// note: this only modifies "info" if the function returns true
|
|
if (funcNode->type == NODE_TYPE_REFERENCE) {
|
|
// FOR x IN cc LET d = DISTANCE(...) FILTER d > 10 RETURN x
|
|
Variable const* var = static_cast<Variable const*>(funcNode->getData());
|
|
TRI_ASSERT(var != nullptr);
|
|
ExecutionNode* setter = plan->getVarSetBy(var->id);
|
|
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
|
|
return false;
|
|
}
|
|
funcNode = ExecutionNode::castTo<CalculationNode*>(setter)->expression()->node();
|
|
}
|
|
// get the ast node of the expression
|
|
if (!funcNode || funcNode->type != NODE_TYPE_FCALL || funcNode->numMembers() != 1) {
|
|
return false;
|
|
}
|
|
AstNode* fargs = funcNode->getMemberUnchecked(0);
|
|
auto func = static_cast<Function const*>(funcNode->getData());
|
|
if (fargs->numMembers() >= 4 && func->name == "DISTANCE") { // allow DISTANCE(a,b,c,d)
|
|
if (info.distCenterExpr != nullptr) {
|
|
return false; // do not allow mixing of DISTANCE and GEO_DISTANCE
|
|
}
|
|
if (isValidGeoArg(info.distCenterLatExpr, fargs->getMemberUnchecked(2)) &&
|
|
isValidGeoArg(info.distCenterLngExpr, fargs->getMemberUnchecked(3)) &&
|
|
distanceFuncArgCheck(plan, fargs->getMemberUnchecked(0),
|
|
fargs->getMemberUnchecked(1), legacy, info)) {
|
|
info.distCenterLatExpr = fargs->getMemberUnchecked(2);
|
|
info.distCenterLngExpr = fargs->getMemberUnchecked(3);
|
|
return true;
|
|
} else if (isValidGeoArg(info.distCenterLatExpr, fargs->getMemberUnchecked(0)) &&
|
|
isValidGeoArg(info.distCenterLngExpr, fargs->getMemberUnchecked(1)) &&
|
|
distanceFuncArgCheck(plan, fargs->getMemberUnchecked(2),
|
|
fargs->getMemberUnchecked(3), legacy, info)) {
|
|
info.distCenterLatExpr = fargs->getMemberUnchecked(0);
|
|
info.distCenterLngExpr = fargs->getMemberUnchecked(1);
|
|
return true;
|
|
}
|
|
} else if (fargs->numMembers() == 2 && func->name == "GEO_DISTANCE") {
|
|
if (info.distCenterLatExpr || info.distCenterLngExpr) {
|
|
return false; // do not allow mixing of DISTANCE and GEO_DISTANCE
|
|
}
|
|
if (isValidGeoArg(info.distCenterExpr, fargs->getMemberUnchecked(1)) &&
|
|
geoFuncArgCheck(plan, fargs->getMemberUnchecked(0), legacy, info)) {
|
|
info.distCenterExpr = fargs->getMemberUnchecked(1);
|
|
return true;
|
|
} else if (isValidGeoArg(info.distCenterExpr, fargs->getMemberUnchecked(0)) &&
|
|
geoFuncArgCheck(plan, fargs->getMemberUnchecked(1), legacy, info)) {
|
|
info.distCenterExpr = fargs->getMemberUnchecked(0);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// contains the AstNode* a supported function?
|
|
static bool checkGeoFilterFunction(ExecutionPlan* plan, AstNode const* funcNode,
|
|
GeoIndexInfo& info) {
|
|
// note: this only modifies "info" if the function returns true
|
|
// the expression must exist and it must be a function call
|
|
if (funcNode->type != NODE_TYPE_FCALL || funcNode->numMembers() != 1 ||
|
|
info.filterMode != geo::FilterType::NONE) { // can't handle more than one
|
|
return false;
|
|
}
|
|
|
|
auto func = static_cast<Function const*>(funcNode->getData());
|
|
AstNode* fargs = funcNode->getMemberUnchecked(0);
|
|
bool contains = func->name == "GEO_CONTAINS";
|
|
bool intersect = func->name == "GEO_INTERSECTS";
|
|
if ((!contains && !intersect) || fargs->numMembers() != 2) {
|
|
return false;
|
|
}
|
|
|
|
AstNode* arg = fargs->getMemberUnchecked(1);
|
|
if (geoFuncArgCheck(plan, arg, /*legacy*/ true, info)) {
|
|
TRI_ASSERT(contains || intersect);
|
|
info.filterMode = contains ? geo::FilterType::CONTAINS : geo::FilterType::INTERSECTS;
|
|
info.filterExpr = fargs->getMemberUnchecked(0);
|
|
TRI_ASSERT(info.index);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// checks if a node contanis a geo index function a valid operator
|
|
// to use within a filter condition
|
|
bool checkGeoFilterExpression(ExecutionPlan* plan, AstNode const* node, GeoIndexInfo& info) {
|
|
// checks @first `smaller` @second
|
|
// note: this only modifies "info" if the function returns true
|
|
auto eval = [&](AstNode const* first, AstNode const* second, bool lessequal) -> bool {
|
|
if (isValueOrReference(second) && // no attribute access
|
|
info.maxDistanceExpr == nullptr && // max distance is not yet set
|
|
checkDistanceFunc(plan, first, /*legacy*/ true, info)) {
|
|
TRI_ASSERT(info.index);
|
|
info.maxDistanceExpr = second;
|
|
info.maxInclusive = info.maxInclusive && lessequal;
|
|
info.nodesToRemove.insert(node);
|
|
return true;
|
|
} else if (isValueOrReference(first) && // no attribute access
|
|
info.minDistanceExpr == nullptr && // min distance is not yet set
|
|
checkDistanceFunc(plan, second, /*legacy*/ true, info)) {
|
|
info.minDistanceExpr = first;
|
|
info.minInclusive = info.minInclusive && lessequal;
|
|
info.nodesToRemove.insert(node);
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
switch (node->type) {
|
|
case NODE_TYPE_FCALL:
|
|
if (checkGeoFilterFunction(plan, node, info)) {
|
|
info.nodesToRemove.insert(node);
|
|
return true;
|
|
}
|
|
return false;
|
|
break;
|
|
// only DISTANCE is allowed with <=, <, >=, >
|
|
case NODE_TYPE_OPERATOR_BINARY_LE:
|
|
TRI_ASSERT(node->numMembers() == 2);
|
|
return eval(node->getMember(0), node->getMember(1), true);
|
|
break;
|
|
case NODE_TYPE_OPERATOR_BINARY_LT:
|
|
TRI_ASSERT(node->numMembers() == 2);
|
|
return eval(node->getMember(0), node->getMember(1), false);
|
|
break;
|
|
case NODE_TYPE_OPERATOR_BINARY_GE:
|
|
TRI_ASSERT(node->numMembers() == 2);
|
|
return eval(node->getMember(1), node->getMember(0), true);
|
|
case NODE_TYPE_OPERATOR_BINARY_GT:
|
|
TRI_ASSERT(node->numMembers() == 2);
|
|
return eval(node->getMember(1), node->getMember(0), false);
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static bool optimizeSortNode(ExecutionPlan* plan, SortNode* sort, GeoIndexInfo& info) {
|
|
// note: info will only be modified if the function returns true
|
|
TRI_ASSERT(sort->getType() == EN::SORT);
|
|
// we're looking for "SORT DISTANCE(x,y,a,b)"
|
|
SortElementVector const& elements = sort->elements();
|
|
if (elements.size() != 1) { // can't do it
|
|
return false;
|
|
}
|
|
TRI_ASSERT(elements[0].var != nullptr);
|
|
|
|
// find the expression that is bound to the variable
|
|
// get the expression node that holds the calculation
|
|
ExecutionNode* setter = plan->getVarSetBy(elements[0].var->id);
|
|
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
|
|
return false; // setter could be enumerate list node e.g.
|
|
}
|
|
CalculationNode* calc = ExecutionNode::castTo<CalculationNode*>(setter);
|
|
Expression* expr = calc->expression();
|
|
if (expr == nullptr || expr->node() == nullptr) {
|
|
return false; // the expression must exist and must have an astNode
|
|
}
|
|
|
|
// info will only be modified if the function returns true
|
|
bool legacy = elements[0].ascending; // DESC is only supported on S2 index
|
|
if (!info.sorted && checkDistanceFunc(plan, expr->node(), legacy, info)) {
|
|
info.sorted = true; // do not parse another SORT
|
|
info.ascending = elements[0].ascending;
|
|
if (!ServerState::instance()->isCoordinator()) {
|
|
// we must not remove a sort in the cluster... the results from each
|
|
// shard will be sorted by using the index, however we still need to
|
|
// establish a cross-shard sortedness by distance.
|
|
info.exesToModify.emplace(sort, expr);
|
|
info.nodesToRemove.emplace(expr->node());
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// checks a single sort or filter node
|
|
static void optimizeFilterNode(ExecutionPlan* plan, FilterNode* fn, GeoIndexInfo& info) {
|
|
TRI_ASSERT(fn->getType() == EN::FILTER);
|
|
|
|
// filter nodes always have one input variable
|
|
auto variable = ExecutionNode::castTo<FilterNode const*>(fn)->inVariable();
|
|
// now check who introduced our variable
|
|
ExecutionNode* setter = plan->getVarSetBy(variable->id);
|
|
if (setter == nullptr || setter->getType() != EN::CALCULATION) {
|
|
return;
|
|
}
|
|
CalculationNode* calc = ExecutionNode::castTo<CalculationNode*>(setter);
|
|
Expression* expr = calc->expression();
|
|
if (expr == nullptr || expr->node() == nullptr) {
|
|
return; // the expression must exist and must have an AstNode
|
|
}
|
|
|
|
Ast::traverseReadOnly(expr->node(),
|
|
[&](AstNode const* node) { // pre
|
|
if (node->isSimpleComparisonOperator() ||
|
|
node->type == arangodb::aql::NODE_TYPE_FCALL ||
|
|
node->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_AND ||
|
|
node->type == arangodb::aql::NODE_TYPE_OPERATOR_NARY_AND) {
|
|
return true;
|
|
}
|
|
return false;
|
|
},
|
|
[&](AstNode const* node) { // post
|
|
if (!node->isSimpleComparisonOperator() &&
|
|
node->type != arangodb::aql::NODE_TYPE_FCALL) {
|
|
return;
|
|
}
|
|
if (checkGeoFilterExpression(plan, node, info)) {
|
|
info.exesToModify.emplace(fn, expr);
|
|
}
|
|
});
|
|
}
|
|
|
|
// modify plan
|
|
|
|
// builds a condition that can be used with the index interface and
|
|
// contains all parameters required by the MMFilesGeoIndex
|
|
static std::unique_ptr<Condition> buildGeoCondition(ExecutionPlan* plan,
|
|
GeoIndexInfo const& info) {
|
|
Ast* ast = plan->getAst();
|
|
// shared code to add symbolic `doc.geometry` or `[doc.lng, doc.lat]`
|
|
auto addLocationArg = [ast, &info](AstNode* args) {
|
|
if (info.locationVar) {
|
|
args->addMember(info.locationVar);
|
|
} else if (info.latitudeVar && info.longitudeVar) {
|
|
AstNode* array = ast->createNodeArray(2);
|
|
array->addMember(info.longitudeVar); // GeoJSON ordering
|
|
array->addMember(info.latitudeVar);
|
|
args->addMember(array);
|
|
} else {
|
|
TRI_ASSERT(false);
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "unsupported geo type");
|
|
}
|
|
};
|
|
|
|
TRI_ASSERT(info.index);
|
|
auto cond = std::make_unique<Condition>(ast);
|
|
bool hasCenter = info.distCenterLatExpr || info.distCenterExpr;
|
|
bool hasDistLimit = info.maxDistanceExpr || info.minDistanceExpr;
|
|
TRI_ASSERT(!hasCenter || hasDistLimit || info.sorted);
|
|
if (hasCenter && (hasDistLimit || info.sorted)) {
|
|
// create GEO_DISTANCE(...) [<|<=|>=|>] Var
|
|
AstNode* args = ast->createNodeArray(2);
|
|
if (info.distCenterLatExpr && info.distCenterLngExpr) { // legacy
|
|
TRI_ASSERT(!info.distCenterExpr);
|
|
// info.sorted && info.ascending &&
|
|
AstNode* array = ast->createNodeArray(2);
|
|
array->addMember(info.distCenterLngExpr); // GeoJSON ordering
|
|
array->addMember(info.distCenterLatExpr);
|
|
args->addMember(array);
|
|
} else {
|
|
TRI_ASSERT(info.distCenterExpr);
|
|
TRI_ASSERT(!info.distCenterLatExpr && !info.distCenterLngExpr);
|
|
args->addMember(info.distCenterExpr); // center location
|
|
}
|
|
|
|
addLocationArg(args);
|
|
AstNode* func =
|
|
ast->createNodeFunctionCall(TRI_CHAR_LENGTH_PAIR("GEO_DISTANCE"), args);
|
|
|
|
TRI_ASSERT(info.maxDistanceExpr || info.minDistanceExpr || info.sorted);
|
|
if (info.minDistanceExpr != nullptr) {
|
|
AstNodeType t = info.minInclusive ? NODE_TYPE_OPERATOR_BINARY_GE
|
|
: NODE_TYPE_OPERATOR_BINARY_GT;
|
|
cond->andCombine(ast->createNodeBinaryOperator(t, func, info.minDistanceExpr));
|
|
}
|
|
if (info.maxDistanceExpr != nullptr) {
|
|
AstNodeType t = info.maxInclusive ? NODE_TYPE_OPERATOR_BINARY_LE
|
|
: NODE_TYPE_OPERATOR_BINARY_LT;
|
|
cond->andCombine(ast->createNodeBinaryOperator(t, func, info.maxDistanceExpr));
|
|
}
|
|
if (info.minDistanceExpr == nullptr && info.maxDistanceExpr == nullptr && info.sorted) {
|
|
// hack to pass on the sort-to-point info
|
|
AstNodeType t = NODE_TYPE_OPERATOR_BINARY_LT;
|
|
std::string const& u = StaticStrings::Unlimited;
|
|
AstNode* cc = ast->createNodeValueString(u.c_str(), u.length());
|
|
cond->andCombine(ast->createNodeBinaryOperator(t, func, cc));
|
|
}
|
|
}
|
|
if (info.filterMode != geo::FilterType::NONE) {
|
|
// create GEO_CONTAINS / GEO_INTERSECTS
|
|
TRI_ASSERT(info.filterExpr);
|
|
TRI_ASSERT(info.locationVar || (info.longitudeVar && info.latitudeVar));
|
|
|
|
AstNode* args = ast->createNodeArray(2);
|
|
args->addMember(info.filterExpr);
|
|
addLocationArg(args);
|
|
if (info.filterMode == geo::FilterType::CONTAINS) {
|
|
cond->andCombine(ast->createNodeFunctionCall("GEO_CONTAINS", args));
|
|
} else if (info.filterMode == geo::FilterType::INTERSECTS) {
|
|
cond->andCombine(ast->createNodeFunctionCall("GEO_INTERSECTS", args));
|
|
} else {
|
|
TRI_ASSERT(false);
|
|
}
|
|
}
|
|
|
|
cond->normalize(plan);
|
|
return cond;
|
|
}
|
|
|
|
// applys the optimization for a candidate
|
|
static bool applyGeoOptimization(ExecutionPlan* plan, LimitNode* ln,
|
|
GeoIndexInfo const& info) {
|
|
TRI_ASSERT(info.collection != nullptr);
|
|
TRI_ASSERT(info.collectionNodeToReplace != nullptr);
|
|
TRI_ASSERT(info.index);
|
|
|
|
// verify that all vars used in the index condition are valid
|
|
auto const& valid = info.collectionNodeToReplace->getVarsValid();
|
|
auto checkVars = [&valid](AstNode const* expr) {
|
|
if (expr != nullptr) {
|
|
arangodb::HashSet<Variable const*> varsUsed;
|
|
Ast::getReferencedVariables(expr, varsUsed);
|
|
for (Variable const* v : varsUsed) {
|
|
if (valid.find(v) == valid.end()) {
|
|
return false; // invalid variable foud
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
};
|
|
if (!checkVars(info.distCenterExpr) || !checkVars(info.distCenterLatExpr) ||
|
|
!checkVars(info.distCenterLngExpr) || !checkVars(info.filterExpr)) {
|
|
return false;
|
|
}
|
|
|
|
size_t limit = 0;
|
|
if (ln != nullptr) {
|
|
limit = ln->offset() + ln->limit();
|
|
TRI_ASSERT(limit != SIZE_MAX);
|
|
}
|
|
|
|
IndexIteratorOptions opts;
|
|
opts.sorted = info.sorted;
|
|
opts.ascending = info.ascending;
|
|
opts.limit = limit;
|
|
opts.evaluateFCalls = false; // workaround to avoid evaluating "doc.geo"
|
|
std::unique_ptr<Condition> condition(buildGeoCondition(plan, info));
|
|
auto inode = new IndexNode(plan, plan->nextId(), info.collection, info.collectionNodeOutVar,
|
|
std::vector<transaction::Methods::IndexHandle>{
|
|
transaction::Methods::IndexHandle{info.index}},
|
|
std::move(condition), opts);
|
|
plan->registerNode(inode);
|
|
plan->replaceNode(info.collectionNodeToReplace, inode);
|
|
|
|
// remove expressions covered by our index
|
|
Ast* ast = plan->getAst();
|
|
for (std::pair<ExecutionNode*, Expression*> pair : info.exesToModify) {
|
|
AstNode* root = pair.second->nodeForModification();
|
|
auto pre = [&](AstNode const* node) -> bool {
|
|
return node == root || Ast::IsAndOperatorType(node->type);
|
|
};
|
|
auto visitor = [&](AstNode* node) -> AstNode* {
|
|
if (Ast::IsAndOperatorType(node->type)) {
|
|
std::vector<AstNode*> keep; // always shallow copy node
|
|
for (std::size_t i = 0; i < node->numMembers(); i++) {
|
|
AstNode* child = node->getMemberUnchecked(i);
|
|
if (info.nodesToRemove.find(child) == info.nodesToRemove.end()) {
|
|
keep.push_back(child);
|
|
}
|
|
}
|
|
|
|
if (keep.size() > 2) {
|
|
AstNode* n = ast->createNodeNaryOperator(NODE_TYPE_OPERATOR_NARY_AND);
|
|
for (size_t i = 0; i < keep.size(); i++) {
|
|
n->addMember(keep[i]);
|
|
}
|
|
return n;
|
|
} else if (keep.size() == 2) {
|
|
return ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND,
|
|
keep[0], keep[1]);
|
|
} else if (keep.size() == 1) {
|
|
return keep[0];
|
|
}
|
|
return node == root ? nullptr : ast->createNodeValueBool(true);
|
|
} else if (info.nodesToRemove.find(node) != info.nodesToRemove.end()) {
|
|
return node == root ? nullptr : ast->createNodeValueBool(true);
|
|
}
|
|
return node;
|
|
};
|
|
auto post = [](AstNode const*) {};
|
|
AstNode* newNode = Ast::traverseAndModify(root, pre, visitor, post);
|
|
if (newNode == nullptr) { // if root was removed, unlink FILTER or SORT
|
|
plan->unlinkNode(pair.first);
|
|
} else if (newNode != root) {
|
|
pair.second->replaceNode(newNode);
|
|
}
|
|
}
|
|
|
|
// signal that plan has been changed
|
|
return true;
|
|
}
|
|
|
|
void arangodb::aql::geoIndexRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
bool mod = false;
|
|
|
|
plan->findNodesOfType(nodes, EN::ENUMERATE_COLLECTION, true);
|
|
for (ExecutionNode* node : nodes) {
|
|
GeoIndexInfo info;
|
|
info.collectionNodeToReplace = node;
|
|
|
|
ExecutionNode* current = node->getFirstParent();
|
|
LimitNode* limit = nullptr;
|
|
bool canUseSortLimit = true;
|
|
|
|
while (current) {
|
|
if (current->getType() == EN::FILTER) {
|
|
// picking up filter conditions is always allowed
|
|
optimizeFilterNode(plan.get(), ExecutionNode::castTo<FilterNode*>(current), info);
|
|
} else if (current->getType() == EN::SORT && canUseSortLimit) {
|
|
// only pick up a sort clause if we haven't seen another loop yet
|
|
if (!optimizeSortNode(plan.get(), ExecutionNode::castTo<SortNode*>(current), info)) {
|
|
// 1. EnumerateCollectionNode x
|
|
// 2. SortNode x.abc ASC
|
|
// 3. LimitNode n,m <-- cannot reuse LIMIT node here
|
|
// limit = nullptr;
|
|
break; // stop parsing on non-optimizable SORT
|
|
}
|
|
} else if (current->getType() == EN::LIMIT && canUseSortLimit) {
|
|
// only pick up a limit clause if we haven't seen another loop yet
|
|
limit = ExecutionNode::castTo<LimitNode*>(current);
|
|
break; // stop parsing after first LIMIT
|
|
} else if (current->getType() == EN::RETURN || current->getType() == EN::COLLECT) {
|
|
break; // stop parsing on return or collect
|
|
} else if (current->getType() == EN::INDEX ||
|
|
current->getType() == EN::ENUMERATE_COLLECTION ||
|
|
current->getType() == EN::ENUMERATE_LIST ||
|
|
current->getType() == EN::ENUMERATE_IRESEARCH_VIEW ||
|
|
current->getType() == EN::TRAVERSAL ||
|
|
current->getType() == EN::K_SHORTEST_PATHS ||
|
|
current->getType() == EN::SHORTEST_PATH) {
|
|
// invalidate limit and sort. filters can still be used
|
|
limit = nullptr;
|
|
info.sorted = false;
|
|
// don't allow picking up either sort or limit from here on
|
|
canUseSortLimit = false;
|
|
}
|
|
current = current->getFirstParent(); // inspect next node
|
|
}
|
|
|
|
// if info is valid we try to optimize ENUMERATE_COLLECTION
|
|
if (info && info.collectionNodeToReplace == node) {
|
|
if (applyGeoOptimization(plan.get(), limit, info)) {
|
|
mod = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, mod);
|
|
}
|
|
|
|
void arangodb::aql::sortLimitRule(Optimizer* opt, std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
bool mod = false;
|
|
|
|
plan->findNodesOfType(nodes, EN::SORT, true);
|
|
for (ExecutionNode* node : nodes) {
|
|
ExecutionNode* current = node->getFirstParent();
|
|
LimitNode* limit = nullptr;
|
|
|
|
while (current) {
|
|
if (current->getType() == EN::LIMIT) {
|
|
limit = ExecutionNode::castTo<LimitNode*>(current);
|
|
break; // stop parsing after first LIMIT
|
|
} else if (current->getType() == EN::FILTER || current->getType() == EN::RETURN ||
|
|
current->getType() == EN::ENUMERATE_COLLECTION ||
|
|
current->getType() == EN::ENUMERATE_LIST ||
|
|
current->getType() == EN::ENUMERATE_IRESEARCH_VIEW ||
|
|
current->getType() == EN::TRAVERSAL ||
|
|
current->getType() == EN::SHORTEST_PATH ||
|
|
current->getType() == EN::K_SHORTEST_PATHS ||
|
|
current->getType() == EN::INDEX || current->getType() == EN::COLLECT) {
|
|
// TODO check other end conditions
|
|
break; // stop parsing
|
|
}
|
|
current = current->getFirstParent(); // inspect next node
|
|
}
|
|
|
|
// if we found a limit and we meet the heuristic, make the sort node
|
|
// aware of the limit
|
|
if (limit != nullptr && shouldApplyHeapOptimization(node, limit)) {
|
|
auto sn = static_cast<SortNode*>(node);
|
|
sn->setLimit(limit->limit() + limit->offset());
|
|
mod = true;
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, mod);
|
|
}
|
|
|
|
void arangodb::aql::optimizeSubqueriesRule(Optimizer* opt,
|
|
std::unique_ptr<ExecutionPlan> plan,
|
|
OptimizerRule const* rule) {
|
|
bool modified = false;
|
|
|
|
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
|
|
SmallVector<ExecutionNode*> nodes{a};
|
|
plan->findNodesOfType(nodes, EN::CALCULATION, true);
|
|
|
|
std::unordered_map<ExecutionNode*, std::tuple<int64_t, std::unordered_set<ExecutionNode const*>, bool>> subqueryAttributes;
|
|
|
|
for (auto const& n : nodes) {
|
|
auto cn = ExecutionNode::castTo<CalculationNode*>(n);
|
|
auto expr = cn->expression();
|
|
if (expr == nullptr) {
|
|
continue;
|
|
}
|
|
|
|
AstNode const* root = expr->node();
|
|
if (root == nullptr) {
|
|
continue;
|
|
}
|
|
|
|
auto visitor = [&subqueryAttributes, &plan, n](AstNode const* node) -> bool {
|
|
std::pair<ExecutionNode*, int64_t> found{nullptr, 0};
|
|
bool usedForCount = false;
|
|
|
|
if (node->type == NODE_TYPE_REFERENCE) {
|
|
Variable const* v = static_cast<Variable const*>(node->getData());
|
|
auto setter = plan->getVarSetBy(v->id);
|
|
if (setter != nullptr && setter->getType() == EN::SUBQUERY) {
|
|
// we found a subquery result being used somehow in some
|
|
// way that will make the optimization produce wrong results
|
|
found.first = setter;
|
|
found.second = -1; // negative values will disable the optimization
|
|
}
|
|
} else if (node->type == NODE_TYPE_INDEXED_ACCESS) {
|
|
auto sub = node->getMemberUnchecked(0);
|
|
if (sub->type == NODE_TYPE_REFERENCE) {
|
|
Variable const* v = static_cast<Variable const*>(sub->getData());
|
|
auto setter = plan->getVarSetBy(v->id);
|
|
auto index = node->getMemberUnchecked(1);
|
|
if (index->type == NODE_TYPE_VALUE && index->isNumericValue() &&
|
|
setter != nullptr && setter->getType() == EN::SUBQUERY) {
|
|
found.first = setter;
|
|
found.second = index->getIntValue() + 1; // x[0] => LIMIT 1
|
|
if (found.second <= 0) {
|
|
// turn optimization off
|
|
found.second = -1;
|
|
}
|
|
}
|
|
}
|
|
} else if (node->type == NODE_TYPE_FCALL && node->numMembers() > 0) {
|
|
auto func = static_cast<Function const*>(node->getData());
|
|
auto args = node->getMember(0);
|
|
if (func->name == "FIRST" || func->name == "LENGTH" ||
|
|
func->name == "COUNT") {
|
|
if (args->numMembers() > 0 && args->getMember(0)->type == NODE_TYPE_REFERENCE) {
|
|
Variable const* v =
|
|
static_cast<Variable const*>(args->getMember(0)->getData());
|
|
auto setter = plan->getVarSetBy(v->id);
|
|
if (setter != nullptr && setter->getType() == EN::SUBQUERY) {
|
|
found.first = setter;
|
|
if (func->name == "FIRST") {
|
|
found.second = 1; // FIRST(x) => LIMIT 1
|
|
} else {
|
|
found.second = -1;
|
|
usedForCount = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (found.first != nullptr) {
|
|
auto it = subqueryAttributes.find(found.first);
|
|
if (it == subqueryAttributes.end()) {
|
|
subqueryAttributes.emplace(found.first,
|
|
std::make_tuple(found.second,
|
|
std::unordered_set<ExecutionNode const*>{n},
|
|
usedForCount));
|
|
} else {
|
|
auto& sq = (*it).second;
|
|
if (usedForCount) {
|
|
// COUNT + LIMIT together will turn off the optimization
|
|
std::get<2>(sq) = (std::get<0>(sq) <= 0);
|
|
std::get<0>(sq) = -1;
|
|
std::get<1>(sq).clear();
|
|
} else {
|
|
if (found.second <= 0 || std::get<0>(sq) < 0) {
|
|
// negative value will turn off the optimization
|
|
std::get<0>(sq) = -1;
|
|
std::get<1>(sq).clear();
|
|
} else {
|
|
// otherwise, use the maximum of the limits needed, and insert
|
|
// current node into our "safe" list
|
|
std::get<0>(sq) = std::max(std::get<0>(sq), found.second);
|
|
std::get<1>(sq).emplace(n);
|
|
}
|
|
std::get<2>(sq) = false;
|
|
}
|
|
}
|
|
// don't descend further
|
|
return false;
|
|
}
|
|
|
|
// descend further
|
|
return true;
|
|
};
|
|
|
|
Ast::traverseReadOnly(root, visitor, [](AstNode const*) {});
|
|
}
|
|
|
|
for (auto const& it : subqueryAttributes) {
|
|
ExecutionNode* node = it.first;
|
|
TRI_ASSERT(node->getType() == EN::SUBQUERY);
|
|
auto sn = ExecutionNode::castTo<SubqueryNode const*>(node);
|
|
|
|
if (sn->isModificationSubquery()) {
|
|
// cannot push a LIMIT into data-modification subqueries
|
|
continue;
|
|
}
|
|
|
|
auto const& sq = it.second;
|
|
int64_t limitValue = std::get<0>(sq);
|
|
bool usedForCount = std::get<2>(sq);
|
|
if (limitValue <= 0 && !usedForCount) {
|
|
// optimization turned off
|
|
continue;
|
|
}
|
|
|
|
// scan from the subquery node to the bottom of the ExecutionPlan to check
|
|
// if any of the following nodes also use the subquery result
|
|
auto out = sn->outVariable();
|
|
arangodb::HashSet<Variable const*> used;
|
|
bool invalid = false;
|
|
|
|
auto current = node->getFirstParent();
|
|
while (current != nullptr) {
|
|
auto const& referencedBy = std::get<1>(sq);
|
|
if (referencedBy.find(current) == referencedBy.end()) {
|
|
// node not found in "safe" list
|
|
// now check if it uses the subquery's out variable
|
|
used.clear();
|
|
current->getVariablesUsedHere(used);
|
|
if (used.find(out) != used.end()) {
|
|
invalid = true;
|
|
break;
|
|
}
|
|
}
|
|
// continue iteration
|
|
current = current->getFirstParent();
|
|
}
|
|
|
|
if (invalid) {
|
|
continue;
|
|
}
|
|
|
|
auto root = sn->getSubquery();
|
|
if (root != nullptr && root->getType() == EN::RETURN) {
|
|
// now inject a limit
|
|
auto f = root->getFirstDependency();
|
|
TRI_ASSERT(f != nullptr);
|
|
|
|
if (std::get<2>(sq)) {
|
|
// used for count, e.g. COUNT(FOR doc IN collection RETURN ...)
|
|
// this will be turned into
|
|
// COUNT(FOR doc IN collection RETURN 1)
|
|
Ast* ast = plan->getAst();
|
|
// generate a calculation node that only produces "true"
|
|
auto expr = std::make_unique<Expression>(plan.get(), ast,
|
|
Ast::createNodeValueBool(true));
|
|
Variable* outVariable = ast->variables()->createTemporaryVariable();
|
|
auto calcNode = new CalculationNode(plan.get(), plan->nextId(),
|
|
expr.get(), nullptr, outVariable);
|
|
plan->registerNode(calcNode);
|
|
expr.release();
|
|
plan->insertAfter(f, calcNode);
|
|
// change the result value of the existing Return node
|
|
TRI_ASSERT(root->getType() == EN::RETURN);
|
|
ExecutionNode::castTo<ReturnNode*>(root)->inVariable(outVariable);
|
|
modified = true;
|
|
continue;
|
|
}
|
|
|
|
if (f->getType() == EN::LIMIT) {
|
|
// subquery already has a LIMIT node at its end
|
|
// no need to do anything
|
|
continue;
|
|
}
|
|
|
|
auto limitNode = new LimitNode(plan.get(), plan->nextId(), 0, limitValue);
|
|
plan->registerNode(limitNode);
|
|
plan->insertAfter(f, limitNode);
|
|
modified = true;
|
|
}
|
|
}
|
|
|
|
opt->addPlan(std::move(plan), rule, modified);
|
|
}
|