mirror of https://gitee.com/bigwinds/arangodb
485 lines
15 KiB
C++
485 lines
15 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Max Neunhoeffer
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef ARANGOD_AQL_OPTIMIZER_H
|
|
#define ARANGOD_AQL_OPTIMIZER_H 1
|
|
|
|
#include "Basics/Common.h"
|
|
#include "Aql/ExecutionPlan.h"
|
|
#include "Basics/MutexLocker.h"
|
|
|
|
#include <velocypack/Builder.h>
|
|
#include <velocypack/velocypack-aliases.h>
|
|
|
|
#include <deque>
|
|
|
|
namespace arangodb {
|
|
namespace aql {
|
|
|
|
class Optimizer {
|
|
public:
|
|
/// @brief optimizer statistics
|
|
struct Stats {
|
|
int64_t rulesExecuted = 0;
|
|
int64_t rulesSkipped = 0;
|
|
int64_t plansCreated = 1; // 1 for the initial plan
|
|
|
|
std::shared_ptr<VPackBuilder> toVelocyPack() const {
|
|
auto result = std::make_shared<VPackBuilder>();
|
|
{
|
|
VPackObjectBuilder b(result.get());
|
|
result->add("rulesExecuted", VPackValue(rulesExecuted));
|
|
result->add("rulesSkipped", VPackValue(rulesSkipped));
|
|
result->add("plansCreated", VPackValue(plansCreated));
|
|
}
|
|
return result;
|
|
}
|
|
};
|
|
|
|
/// @brief optimizer rules
|
|
enum RuleLevel : int {
|
|
// List all the rules in the system here:
|
|
// lower level values mean earlier rule execution
|
|
|
|
// note that levels must be unique
|
|
|
|
// "Pass 1": moving nodes "up" (potentially outside loops):
|
|
pass1 = 100,
|
|
|
|
// determine the "right" type of CollectNode and
|
|
// add a sort node for each COLLECT (may be removed later)
|
|
specializeCollectRule_pass1 = 105,
|
|
|
|
inlineSubqueriesRule_pass1 = 106,
|
|
|
|
// split and-combined filters into multiple smaller filters
|
|
splitFiltersRule_pass1 = 110,
|
|
|
|
// move calculations up the dependency chain (to pull them out of
|
|
// inner loops etc.)
|
|
moveCalculationsUpRule_pass1 = 120,
|
|
|
|
// move filters up the dependency chain (to make result sets as small
|
|
// as possible as early as possible)
|
|
moveFiltersUpRule_pass1 = 130,
|
|
|
|
// remove calculations that are repeatedly used in a query
|
|
removeRedundantCalculationsRule_pass1 = 140,
|
|
|
|
/// "Pass 2": try to remove redundant or unnecessary nodes
|
|
pass2 = 200,
|
|
// remove filters from the query that are not necessary at all
|
|
// filters that are always true will be removed entirely
|
|
// filters that are always false will be replaced with a NoResults node
|
|
removeUnnecessaryFiltersRule_pass2 = 210,
|
|
|
|
// remove calculations that are never necessary
|
|
removeUnnecessaryCalculationsRule_pass2 = 220,
|
|
|
|
// remove redundant sort blocks
|
|
removeRedundantSortsRule_pass2 = 230,
|
|
|
|
/// "Pass 3": interchange EnumerateCollection nodes in all possible ways
|
|
/// this is level 500, please never let new plans from higher
|
|
/// levels go back to this or lower levels!
|
|
pass3 = 500,
|
|
interchangeAdjacentEnumerationsRule_pass3 = 510,
|
|
|
|
// "Pass 4": moving nodes "up" (potentially outside loops) (second try):
|
|
pass4 = 600,
|
|
// move calculations up the dependency chain (to pull them out of
|
|
// inner loops etc.)
|
|
moveCalculationsUpRule_pass4 = 610,
|
|
|
|
// move filters up the dependency chain (to make result sets as small
|
|
// as possible as early as possible)
|
|
moveFiltersUpRule_pass4 = 620,
|
|
|
|
/// "Pass 5": try to remove redundant or unnecessary nodes (second try)
|
|
// remove filters from the query that are not necessary at all
|
|
// filters that are always true will be removed entirely
|
|
// filters that are always false will be replaced with a NoResults node
|
|
pass5 = 700,
|
|
|
|
// remove redundant sort blocks
|
|
removeRedundantSortsRule_pass5 = 710,
|
|
|
|
// remove SORT RAND() if appropriate
|
|
removeSortRandRule_pass5 = 720,
|
|
|
|
// remove INTO for COLLECT if appropriate
|
|
removeCollectVariablesRule_pass5 = 740,
|
|
|
|
// propagate constant attributes in FILTERs
|
|
propagateConstantAttributesRule_pass5 = 750,
|
|
|
|
// remove unused out variables for data-modification queries
|
|
removeDataModificationOutVariablesRule_pass5 = 760,
|
|
|
|
/// "Pass 6": use indexes if possible for FILTER and/or SORT nodes
|
|
pass6 = 800,
|
|
|
|
// replace simple OR conditions with IN
|
|
replaceOrWithInRule_pass6 = 810,
|
|
|
|
// remove redundant OR conditions
|
|
removeRedundantOrRule_pass6 = 820,
|
|
|
|
applyGeoIndexRule = 825,
|
|
|
|
useIndexesRule_pass6 = 830,
|
|
|
|
// try to remove filters covered by index ranges
|
|
removeFiltersCoveredByIndexRule_pass6 = 840,
|
|
|
|
removeUnnecessaryFiltersRule_pass6 = 850,
|
|
|
|
// try to find sort blocks which are superseeded by indexes
|
|
useIndexForSortRule_pass6 = 860,
|
|
|
|
// sort values used in IN comparisons of remaining filters
|
|
sortInValuesRule_pass6 = 865,
|
|
|
|
// remove calculations that are never necessary
|
|
removeUnnecessaryCalculationsRule_pass6 = 870,
|
|
|
|
// merge filters into graph traversals
|
|
optimizeTraversalsRule_pass6 = 880,
|
|
prepareTraversalsRule_pass6 = 881,
|
|
|
|
/// Pass 9: push down calculations beyond FILTERs and LIMITs
|
|
moveCalculationsDownRule_pass9 = 900,
|
|
|
|
/// Pass 9: patch update statements
|
|
patchUpdateStatementsRule_pass9 = 902,
|
|
|
|
/// "Pass 10": final transformations for the cluster
|
|
// make operations on sharded collections use distribute
|
|
distributeInClusterRule_pass10 = 1000,
|
|
|
|
// make operations on sharded collections use scatter / gather / remote
|
|
scatterInClusterRule_pass10 = 1010,
|
|
|
|
// move FilterNodes & Calculation nodes in between
|
|
// scatter(remote) <-> gather(remote) so they're
|
|
// distributed to the cluster nodes.
|
|
distributeFilternCalcToClusterRule_pass10 = 1020,
|
|
|
|
// move SortNodes into the distribution.
|
|
// adjust gathernode to also contain the sort criteria.
|
|
distributeSortToClusterRule_pass10 = 1030,
|
|
|
|
// try to get rid of a RemoteNode->ScatterNode combination which has
|
|
// only a SingletonNode and possibly some CalculationNodes as dependencies
|
|
removeUnnecessaryRemoteScatterRule_pass10 = 1040,
|
|
|
|
// remove any superflous satellite collection joins...
|
|
// put it after Scatter rule because we would do
|
|
// the work twice otherwise
|
|
removeSatelliteJoinsRule_pass10 = 1045,
|
|
|
|
// recognize that a RemoveNode can be moved to the shards
|
|
undistributeRemoveAfterEnumCollRule_pass10 = 1050
|
|
|
|
};
|
|
|
|
public:
|
|
struct Rule;
|
|
|
|
/// @brief type of an optimizer rule function, the function gets an
|
|
/// optimizer, an ExecutionPlan, and the current rule. it has
|
|
/// to append one or more plans to the resulting deque. This must
|
|
/// include the original plan if it ought to be kept. The rule has to
|
|
/// set the level of the appended plan to the largest level of rule
|
|
/// that ought to be considered as done to indicate which rule is to be
|
|
/// applied next.
|
|
typedef std::function<void(Optimizer*, ExecutionPlan*, Rule const*)>
|
|
RuleFunction;
|
|
|
|
/// @brief type of an optimizer rule
|
|
struct Rule {
|
|
std::string name;
|
|
RuleFunction func;
|
|
RuleLevel const level;
|
|
bool const canCreateAdditionalPlans;
|
|
bool const canBeDisabled;
|
|
bool const isHidden;
|
|
|
|
Rule() = delete;
|
|
|
|
Rule(std::string const& name, RuleFunction const& func, RuleLevel level,
|
|
bool canCreateAdditionalPlans, bool canBeDisabled, bool isHidden)
|
|
: name(name),
|
|
func(func),
|
|
level(level),
|
|
canCreateAdditionalPlans(canCreateAdditionalPlans),
|
|
canBeDisabled(canBeDisabled),
|
|
isHidden(isHidden) {}
|
|
};
|
|
|
|
/// @brief the following struct keeps a list (deque) of ExecutionPlan*
|
|
/// and has some automatic convenience functions.
|
|
struct PlanList {
|
|
std::deque<ExecutionPlan*> list;
|
|
std::deque<int> levelDone;
|
|
|
|
PlanList() {}
|
|
|
|
/// @brief constructor with a plan
|
|
PlanList(ExecutionPlan* p, int level) { push_back(p, level); }
|
|
|
|
/// @brief destructor, deleting contents
|
|
~PlanList() {
|
|
for (auto& p : list) {
|
|
delete p;
|
|
}
|
|
}
|
|
|
|
/// @brief check if a plan is contained in the list
|
|
bool isContained(ExecutionPlan* plan) const {
|
|
for (auto const& p : list) {
|
|
if (p == plan) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// @brief get number of plans contained
|
|
size_t size() const { return list.size(); }
|
|
|
|
/// @brief check if empty
|
|
bool empty() const { return list.empty(); }
|
|
|
|
/// @brief pop the first one
|
|
ExecutionPlan* pop_front(int& levelDoneOut) {
|
|
auto p = list.front();
|
|
levelDoneOut = levelDone.front();
|
|
list.pop_front();
|
|
levelDone.pop_front();
|
|
return p;
|
|
}
|
|
|
|
/// @brief push_back
|
|
void push_back(ExecutionPlan* p, int level) {
|
|
list.push_back(p);
|
|
try {
|
|
levelDone.push_back(level);
|
|
} catch (...) {
|
|
list.pop_back();
|
|
throw;
|
|
}
|
|
}
|
|
|
|
/// @brief steals all the plans in b and clears b at the same time
|
|
void steal(PlanList& b) {
|
|
list.swap(b.list);
|
|
levelDone.swap(b.levelDone);
|
|
for (auto& p : b.list) {
|
|
delete p;
|
|
}
|
|
b.list.clear();
|
|
b.levelDone.clear();
|
|
}
|
|
|
|
/// @brief appends all the plans to the target and clears *this at the same
|
|
/// time
|
|
void appendTo(PlanList& target) {
|
|
while (list.size() > 0) {
|
|
auto p = list.front();
|
|
int level = levelDone.front();
|
|
list.pop_front();
|
|
levelDone.pop_front();
|
|
try {
|
|
target.push_back(p, level);
|
|
} catch (...) {
|
|
delete p;
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// @brief clear, deletes all plans contained
|
|
void clear() {
|
|
for (auto& p : list) {
|
|
delete p;
|
|
}
|
|
list.clear();
|
|
levelDone.clear();
|
|
}
|
|
};
|
|
|
|
public:
|
|
/// @brief constructor, this will initialize the rules database
|
|
/// the .cpp file includes Aql/OptimizerRules.h
|
|
/// and add all methods there to the rules database
|
|
explicit Optimizer(size_t);
|
|
|
|
~Optimizer() {}
|
|
|
|
public:
|
|
/// @brief do the optimization, this does the optimization, the resulting
|
|
/// plans are all estimated, sorted by that estimate and can then be got
|
|
/// by getPlans, until the next initialize is called. Note that the optimizer
|
|
/// object takes ownership of the execution plan and will delete it
|
|
/// automatically on destruction. It will also have ownership of all the
|
|
/// newly created plans it recalls and will automatically delete them.
|
|
/// If you need to extract the plans from the optimizer use stealBest or
|
|
/// stealPlans.
|
|
int createPlans(ExecutionPlan* p, std::vector<std::string> const&, bool);
|
|
|
|
/// @brief add a plan to the optimizer
|
|
/// returns false if there are already enough plans, true otherwise
|
|
bool addPlan(ExecutionPlan*, Rule const*, bool, int newLevel = 0);
|
|
|
|
/// @brief getBest, ownership of the plan remains with the optimizer
|
|
ExecutionPlan* getBest() {
|
|
if (_plans.empty()) {
|
|
return nullptr;
|
|
}
|
|
return _plans.list.front();
|
|
}
|
|
|
|
/// @brief getPlans, ownership of the plans remains with the optimizer
|
|
std::deque<ExecutionPlan*>& getPlans() { return _plans.list; }
|
|
|
|
/// @brief stealBest, ownership of the plan is handed over to the caller,
|
|
/// all other plans are deleted
|
|
ExecutionPlan* stealBest() {
|
|
if (_plans.empty()) {
|
|
return nullptr;
|
|
}
|
|
auto res = _plans.list.front();
|
|
for (size_t i = 1; i < _plans.size(); i++) {
|
|
delete _plans.list[i];
|
|
}
|
|
_plans.list.clear();
|
|
_plans.levelDone.clear();
|
|
|
|
return res;
|
|
}
|
|
|
|
/// @brief numberOfPlans, returns the current number of plans in the system
|
|
/// this should be called from rules, it will consider those that the
|
|
/// current rules has already added
|
|
size_t numberOfPlans() { return _plans.size() + _newPlans.size() + 1; }
|
|
|
|
/// @brief stealPlans, ownership of the plans is handed over to the caller,
|
|
/// the optimizer will forget about them!
|
|
std::deque<ExecutionPlan*> stealPlans() {
|
|
std::deque<ExecutionPlan*> res;
|
|
res.swap(_plans.list);
|
|
_plans.levelDone.clear();
|
|
return res;
|
|
}
|
|
|
|
/// @brief translate a list of rule ids into rule name
|
|
static std::vector<std::string> translateRules(std::vector<int> const&);
|
|
|
|
/// @brief translate a single rule
|
|
static char const* translateRule(int);
|
|
|
|
/// @brief returns the previous rule (sorted by rule levels)
|
|
static RuleLevel previousRule(RuleLevel level) {
|
|
auto it = _rules.find(level);
|
|
|
|
if (it == _rules.begin()) {
|
|
// already at start
|
|
return level;
|
|
}
|
|
|
|
--it;
|
|
return (*it).second.level;
|
|
}
|
|
|
|
private:
|
|
/// @brief estimatePlans
|
|
void estimatePlans();
|
|
|
|
/// @brief sortPlans
|
|
void sortPlans();
|
|
|
|
/// @brief look up the ids of all disabled rules
|
|
std::unordered_set<int> getDisabledRuleIds(
|
|
std::vector<std::string> const&) const;
|
|
|
|
/// @brief register a rule
|
|
static void registerRule(std::string const& name, RuleFunction func,
|
|
RuleLevel level, bool canCreateAdditionalPlans,
|
|
bool canBeDisabled, bool isHidden = false) {
|
|
if (_ruleLookup.find(name) != _ruleLookup.end()) {
|
|
// duplicate rule names are not allowed
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL,
|
|
"duplicate optimizer rule name");
|
|
}
|
|
|
|
_ruleLookup.emplace(name, level);
|
|
|
|
if (_rules.find(level) != _rules.end()) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL,
|
|
"duplicate optimizer rule level");
|
|
}
|
|
|
|
_rules.emplace(level, Rule(name, func, level, canCreateAdditionalPlans, canBeDisabled, isHidden));
|
|
}
|
|
|
|
/// @brief register a hidden rule
|
|
static void registerHiddenRule(std::string const& name, RuleFunction const& func,
|
|
RuleLevel level, bool canCreateAdditionalPlans, bool canBeDisabled) {
|
|
registerRule(name, func, level, canCreateAdditionalPlans, canBeDisabled, true);
|
|
}
|
|
|
|
/// @brief set up the optimizer rules once and forever
|
|
static void setupRules();
|
|
|
|
public:
|
|
/// @brief optimizer statistics
|
|
Stats _stats;
|
|
|
|
private:
|
|
/// @brief the rules database
|
|
static std::map<int, Rule> _rules;
|
|
|
|
/// @brief map to look up rule id by name
|
|
static std::unordered_map<std::string, int> _ruleLookup;
|
|
|
|
/// @brief mutex to protect rule setup
|
|
static arangodb::Mutex SetupLock;
|
|
|
|
/// @brief the current set of plans to be optimized
|
|
PlanList _plans;
|
|
|
|
/// @brief current list of plans (while applying optimizer rules)
|
|
PlanList _newPlans;
|
|
|
|
/// @brief maximal number of plans to produce
|
|
size_t const _maxNumberOfPlans;
|
|
|
|
/// @brief default value for maximal number of plans to produce
|
|
static size_t const DefaultMaxNumberOfPlans = 192;
|
|
};
|
|
|
|
} // namespace aql
|
|
} // namespace arangodb
|
|
#endif
|