1
0
Fork 0
arangodb/arangod/Aql/OptimizerRule.h

363 lines
11 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2017 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Steemann
////////////////////////////////////////////////////////////////////////////////
#ifndef ARANGOD_AQL_AQL_OPTIMIZER_RULE_H
#define ARANGOD_AQL_AQL_OPTIMIZER_RULE_H 1
#include "Basics/Common.h"
#include <velocypack/StringRef.h>
#include <type_traits>
namespace arangodb {
namespace aql {
class ExecutionPlan;
class Optimizer;
struct OptimizerRule;
/// @brief type of an optimizer rule function, the function gets an
/// optimizer, an ExecutionPlan, and the current rule. it has
/// to append one or more plans to the resulting deque. This must
/// include the original plan if it ought to be kept. The rule has to
/// set the level of the appended plan to the largest level of rule
/// that ought to be considered as done to indicate which rule is to be
/// applied next.
typedef void (*RuleFunction)(Optimizer*, std::unique_ptr<ExecutionPlan>, OptimizerRule const&);
/// @brief type of an optimizer rule
struct OptimizerRule {
enum class Flags : int {
Default = 0,
Hidden = 1,
ClusterOnly = 2,
CanBeDisabled = 4,
CanCreateAdditionalPlans = 8,
DisabledByDefault = 16,
};
/// @brief helper for building flags
template <typename... Args>
static std::underlying_type<Flags>::type makeFlags(Flags flag, Args... args) {
return static_cast<std::underlying_type<Flags>::type>(flag) + makeFlags(args...);
}
static std::underlying_type<Flags>::type makeFlags() {
return static_cast<std::underlying_type<Flags>::type>(Flags::Default);
}
/// @brief check a flag for the rule
bool hasFlag(Flags flag) const {
return ((flags & static_cast<std::underlying_type<Flags>::type>(flag)) != 0);
}
bool canBeDisabled() const {
return hasFlag(Flags::CanBeDisabled);
}
bool isClusterOnly() const {
return hasFlag(Flags::ClusterOnly);
}
bool isHidden() const {
return hasFlag(Flags::Hidden);
}
bool canCreateAdditionalPlans() const {
return hasFlag(Flags::CanCreateAdditionalPlans);
}
bool isDisabledByDefault() const {
return hasFlag(Flags::DisabledByDefault);
}
/// @brief optimizer rules
enum RuleLevel : int {
// List all the rules in the system here:
// lower level values mean earlier rule execution
// note that levels must be unique
initial = 100,
// "Pass 1": moving nodes "up" (potentially outside loops):
// ========================================================
replaceNearWithinFulltext,
inlineSubqueriesRule,
/// simplify some conditions in CalculationNodes
simplifyConditionsRule,
// move calculations up the dependency chain (to pull them out of
// inner loops etc.)
moveCalculationsUpRule,
// move filters up the dependency chain (to make result sets as small
// as possible as early as possible)
moveFiltersUpRule,
// remove calculations that are repeatedly used in a query
removeRedundantCalculationsRule,
// "Pass 2": try to remove redundant or unnecessary nodes
// ======================================================
// remove filters from the query that are not necessary at all
// filters that are always true will be removed entirely
// filters that are always false will be replaced with a NoResults node
removeUnnecessaryFiltersRule,
// remove calculations that are never necessary
removeUnnecessaryCalculationsRule,
// determine the "right" type of CollectNode and
// add a sort node for each COLLECT (may be removed later)
specializeCollectRule,
// remove redundant sort blocks
removeRedundantSortsRule,
// push limits into subqueries and simplify them
optimizeSubqueriesRule,
// "Pass 3": interchange EnumerateCollection nodes in all possible ways
// this is level 500, please never let new plans from higher
// levels go back to this or lower levels!
// ======================================================
interchangeAdjacentEnumerationsRule,
// "Pass 4": moving nodes "up" (potentially outside loops) (second try):
// ======================================================
// move calculations up the dependency chain (to pull them out of
// inner loops etc.)
moveCalculationsUpRule2,
// move filters up the dependency chain (to make result sets as small
// as possible as early as possible)
moveFiltersUpRule2,
/// "Pass 5": try to remove redundant or unnecessary nodes (second try)
// remove filters from the query that are not necessary at all
// filters that are always true will be removed entirely
// filters that are always false will be replaced with a NoResults node
// ======================================================
// remove redundant sort blocks
removeRedundantSortsRule2,
// remove SORT RAND() if appropriate
removeSortRandRule,
// remove INTO for COLLECT if appropriate
removeCollectVariablesRule,
// propagate constant attributes in FILTERs
propagateConstantAttributesRule,
// remove unused out variables for data-modification queries
removeDataModificationOutVariablesRule,
/// "Pass 6": use indexes if possible for FILTER and/or SORT nodes
// ======================================================
// replace simple OR conditions with IN
replaceOrWithInRule,
// remove redundant OR conditions
removeRedundantOrRule,
// remove FILTER and SORT if there are geoindexes
applyGeoIndexRule,
useIndexesRule,
// try to remove filters covered by index ranges
removeFiltersCoveredByIndexRule,
removeUnnecessaryFiltersRule2,
// try to find sort blocks which are superseeded by indexes
useIndexForSortRule,
// sort values used in IN comparisons of remaining filters
sortInValuesRule,
// merge filters into graph traversals
optimizeTraversalsRule,
// remove redundant filters statements
removeFiltersCoveredByTraversal,
// move filters and sort conditions into views and remove them
handleArangoSearchViewsRule,
// remove calculations that are redundant
// needs to run after filter removal
removeUnnecessaryCalculationsRule2,
// remove now obsolete path variables
removeTraversalPathVariable,
prepareTraversalsRule,
// when we have single document operations, fill in special cluster
// handling.
substituteSingleDocumentOperations,
/// Pass 9: push down calculations beyond FILTERs and LIMITs
moveCalculationsDownRule,
/// Pass 9: fuse filter conditions
fuseFiltersRule,
/// Pass 9: patch update statements
patchUpdateStatementsRule,
/// "Pass 10": final transformations for the cluster
// optimize queries in the cluster so that the entire query
// gets pushed to a single server
// if applied, this rule will turn all other cluster rules off
// for the current plan
#ifdef USE_ENTERPRISE
clusterOneShardRule,
#endif
// make operations on sharded collections use distribute
distributeInClusterRule,
#ifdef USE_ENTERPRISE
smartJoinsRule,
#endif
// make operations on sharded collections use scatter / gather / remote
scatterInClusterRule,
// FIXME order-???
// make operations on sharded IResearch views use scatter / gather / remote
scatterIResearchViewInClusterRule,
// move FilterNodes & Calculation nodes in between
// scatter(remote) <-> gather(remote) so they're
// distributed to the cluster nodes.
distributeFilternCalcToClusterRule,
// move SortNodes into the distribution.
// adjust gathernode to also contain the sort criteria.
distributeSortToClusterRule,
// try to get rid of a RemoteNode->ScatterNode combination which has
// only a SingletonNode and possibly some CalculationNodes as dependencies
removeUnnecessaryRemoteScatterRule,
#ifdef USE_ENTERPRISE
// remove any superflous satellite collection joins...
// put it after Scatter rule because we would do
// the work twice otherwise
removeSatelliteJoinsRule,
#endif
// recognize that a RemoveNode can be moved to the shards
undistributeRemoveAfterEnumCollRule,
// push collect operations to the db servers
collectInClusterRule,
// make sort node aware of subsequent limit statements for internal optimizations
applySortLimitRule,
// try to restrict fragments to a single shard if possible
restrictToSingleShardRule,
// simplify an EnumerationCollectionNode that fetches an
// entire document to a projection of this document
reduceExtractionToProjectionRule,
// moves filters on collection data into EnumerateCollection/Index to
// avoid copying large amounts of unneeded documents
moveFiltersIntoEnumerateRule,
// parallelizes execution in coordinator-sided GatherNodes
parallelizeGatherRule,
// move document materialization after SORT and LIMIT
// this must be run AFTER all cluster rules as this rule
// needs to take into account query distribution across cluster nodes
// for arango search view
lateDocumentMaterializationArangoSearchRule,
// move document materialization after SORT and LIMIT
// this must be run AFTER all cluster rules as this rule
// needs to take into account query distribution across cluster nodes
// for index
lateDocumentMaterializationRule,
// splice subquery into the place of a subquery node
// enclosed by a SubqueryStartNode and a SubqueryEndNode
// Must run last.
spliceSubqueriesRule
};
#ifdef USE_ENTERPRISE
static_assert(clusterOneShardRule < distributeInClusterRule);
static_assert(clusterOneShardRule < smartJoinsRule);
static_assert(clusterOneShardRule < scatterInClusterRule);
// smart joins must come before we move filters around, so the smart-join
// detection code does not need to take the special filters into account
static_assert(smartJoinsRule < moveFiltersIntoEnumerateRule);
#endif
static_assert(scatterInClusterRule < parallelizeGatherRule);
velocypack::StringRef name;
RuleFunction func;
RuleLevel level;
std::underlying_type<Flags>::type flags;
OptimizerRule() = delete;
OptimizerRule(velocypack::StringRef name, RuleFunction const& ruleFunc, RuleLevel level, std::underlying_type<Flags>::type flags)
: name(name),
func(ruleFunc),
level(level),
flags(flags) {}
OptimizerRule(OptimizerRule&& other) = default;
OptimizerRule& operator=(OptimizerRule&& other) = default;
OptimizerRule(OptimizerRule const& other) = delete;
OptimizerRule& operator=(OptimizerRule const& other) = delete;
friend bool operator<(OptimizerRule const& lhs, int rhs) {
return lhs.level < rhs;
}
friend bool operator<(int lhs, OptimizerRule const& rhs) {
return lhs < rhs.level;
}
};
} // namespace aql
} // namespace arangodb
#endif