1
0
Fork 0
arangodb/arangod/Aql/Optimizer.h

485 lines
15 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Max Neunhoeffer
////////////////////////////////////////////////////////////////////////////////
#ifndef ARANGOD_AQL_OPTIMIZER_H
#define ARANGOD_AQL_OPTIMIZER_H 1
#include "Basics/Common.h"
#include "Aql/ExecutionPlan.h"
#include "Basics/MutexLocker.h"
#include <velocypack/Builder.h>
#include <velocypack/velocypack-aliases.h>
#include <deque>
namespace arangodb {
namespace aql {
class Optimizer {
public:
/// @brief optimizer statistics
struct Stats {
int64_t rulesExecuted = 0;
int64_t rulesSkipped = 0;
int64_t plansCreated = 1; // 1 for the initial plan
std::shared_ptr<VPackBuilder> toVelocyPack() const {
auto result = std::make_shared<VPackBuilder>();
{
VPackObjectBuilder b(result.get());
result->add("rulesExecuted", VPackValue(rulesExecuted));
result->add("rulesSkipped", VPackValue(rulesSkipped));
result->add("plansCreated", VPackValue(plansCreated));
}
return result;
}
};
/// @brief optimizer rules
enum RuleLevel : int {
// List all the rules in the system here:
// lower level values mean earlier rule execution
// note that levels must be unique
// "Pass 1": moving nodes "up" (potentially outside loops):
pass1 = 100,
// determine the "right" type of CollectNode and
// add a sort node for each COLLECT (may be removed later)
specializeCollectRule_pass1 = 105,
inlineSubqueriesRule_pass1 = 106,
// split and-combined filters into multiple smaller filters
splitFiltersRule_pass1 = 110,
// move calculations up the dependency chain (to pull them out of
// inner loops etc.)
moveCalculationsUpRule_pass1 = 120,
// move filters up the dependency chain (to make result sets as small
// as possible as early as possible)
moveFiltersUpRule_pass1 = 130,
// remove calculations that are repeatedly used in a query
removeRedundantCalculationsRule_pass1 = 140,
/// "Pass 2": try to remove redundant or unnecessary nodes
pass2 = 200,
// remove filters from the query that are not necessary at all
// filters that are always true will be removed entirely
// filters that are always false will be replaced with a NoResults node
removeUnnecessaryFiltersRule_pass2 = 210,
// remove calculations that are never necessary
removeUnnecessaryCalculationsRule_pass2 = 220,
// remove redundant sort blocks
removeRedundantSortsRule_pass2 = 230,
/// "Pass 3": interchange EnumerateCollection nodes in all possible ways
/// this is level 500, please never let new plans from higher
/// levels go back to this or lower levels!
pass3 = 500,
interchangeAdjacentEnumerationsRule_pass3 = 510,
// "Pass 4": moving nodes "up" (potentially outside loops) (second try):
pass4 = 600,
// move calculations up the dependency chain (to pull them out of
// inner loops etc.)
moveCalculationsUpRule_pass4 = 610,
// move filters up the dependency chain (to make result sets as small
// as possible as early as possible)
moveFiltersUpRule_pass4 = 620,
/// "Pass 5": try to remove redundant or unnecessary nodes (second try)
// remove filters from the query that are not necessary at all
// filters that are always true will be removed entirely
// filters that are always false will be replaced with a NoResults node
pass5 = 700,
// remove redundant sort blocks
removeRedundantSortsRule_pass5 = 710,
// remove SORT RAND() if appropriate
removeSortRandRule_pass5 = 720,
// remove INTO for COLLECT if appropriate
removeCollectVariablesRule_pass5 = 740,
// propagate constant attributes in FILTERs
propagateConstantAttributesRule_pass5 = 750,
// remove unused out variables for data-modification queries
removeDataModificationOutVariablesRule_pass5 = 760,
/// "Pass 6": use indexes if possible for FILTER and/or SORT nodes
pass6 = 800,
// replace simple OR conditions with IN
replaceOrWithInRule_pass6 = 810,
// remove redundant OR conditions
removeRedundantOrRule_pass6 = 820,
applyGeoIndexRule = 825,
useIndexesRule_pass6 = 830,
// try to remove filters covered by index ranges
removeFiltersCoveredByIndexRule_pass6 = 840,
removeUnnecessaryFiltersRule_pass6 = 850,
// try to find sort blocks which are superseeded by indexes
useIndexForSortRule_pass6 = 860,
// sort values used in IN comparisons of remaining filters
sortInValuesRule_pass6 = 865,
// remove calculations that are never necessary
removeUnnecessaryCalculationsRule_pass6 = 870,
// merge filters into graph traversals
optimizeTraversalsRule_pass6 = 880,
prepareTraversalsRule_pass6 = 881,
/// Pass 9: push down calculations beyond FILTERs and LIMITs
moveCalculationsDownRule_pass9 = 900,
/// Pass 9: patch update statements
patchUpdateStatementsRule_pass9 = 902,
/// "Pass 10": final transformations for the cluster
// make operations on sharded collections use distribute
distributeInClusterRule_pass10 = 1000,
// make operations on sharded collections use scatter / gather / remote
scatterInClusterRule_pass10 = 1010,
// move FilterNodes & Calculation nodes in between
// scatter(remote) <-> gather(remote) so they're
// distributed to the cluster nodes.
distributeFilternCalcToClusterRule_pass10 = 1020,
// move SortNodes into the distribution.
// adjust gathernode to also contain the sort criteria.
distributeSortToClusterRule_pass10 = 1030,
// try to get rid of a RemoteNode->ScatterNode combination which has
// only a SingletonNode and possibly some CalculationNodes as dependencies
removeUnnecessaryRemoteScatterRule_pass10 = 1040,
// remove any superflous satellite collection joins...
// put it after Scatter rule because we would do
// the work twice otherwise
removeSatelliteJoinsRule_pass10 = 1045,
// recognize that a RemoveNode can be moved to the shards
undistributeRemoveAfterEnumCollRule_pass10 = 1050
};
public:
struct Rule;
/// @brief type of an optimizer rule function, the function gets an
/// optimizer, an ExecutionPlan, and the current rule. it has
/// to append one or more plans to the resulting deque. This must
/// include the original plan if it ought to be kept. The rule has to
/// set the level of the appended plan to the largest level of rule
/// that ought to be considered as done to indicate which rule is to be
/// applied next.
typedef std::function<void(Optimizer*, ExecutionPlan*, Rule const*)>
RuleFunction;
/// @brief type of an optimizer rule
struct Rule {
std::string name;
RuleFunction func;
RuleLevel const level;
bool const canCreateAdditionalPlans;
bool const canBeDisabled;
bool const isHidden;
Rule() = delete;
Rule(std::string const& name, RuleFunction const& func, RuleLevel level,
bool canCreateAdditionalPlans, bool canBeDisabled, bool isHidden)
: name(name),
func(func),
level(level),
canCreateAdditionalPlans(canCreateAdditionalPlans),
canBeDisabled(canBeDisabled),
isHidden(isHidden) {}
};
/// @brief the following struct keeps a list (deque) of ExecutionPlan*
/// and has some automatic convenience functions.
struct PlanList {
std::deque<ExecutionPlan*> list;
std::deque<int> levelDone;
PlanList() {}
/// @brief constructor with a plan
PlanList(ExecutionPlan* p, int level) { push_back(p, level); }
/// @brief destructor, deleting contents
~PlanList() {
for (auto& p : list) {
delete p;
}
}
/// @brief check if a plan is contained in the list
bool isContained(ExecutionPlan* plan) const {
for (auto const& p : list) {
if (p == plan) {
return true;
}
}
return false;
}
/// @brief get number of plans contained
size_t size() const { return list.size(); }
/// @brief check if empty
bool empty() const { return list.empty(); }
/// @brief pop the first one
ExecutionPlan* pop_front(int& levelDoneOut) {
auto p = list.front();
levelDoneOut = levelDone.front();
list.pop_front();
levelDone.pop_front();
return p;
}
/// @brief push_back
void push_back(ExecutionPlan* p, int level) {
list.push_back(p);
try {
levelDone.push_back(level);
} catch (...) {
list.pop_back();
throw;
}
}
/// @brief steals all the plans in b and clears b at the same time
void steal(PlanList& b) {
list.swap(b.list);
levelDone.swap(b.levelDone);
for (auto& p : b.list) {
delete p;
}
b.list.clear();
b.levelDone.clear();
}
/// @brief appends all the plans to the target and clears *this at the same
/// time
void appendTo(PlanList& target) {
while (list.size() > 0) {
auto p = list.front();
int level = levelDone.front();
list.pop_front();
levelDone.pop_front();
try {
target.push_back(p, level);
} catch (...) {
delete p;
throw;
}
}
}
/// @brief clear, deletes all plans contained
void clear() {
for (auto& p : list) {
delete p;
}
list.clear();
levelDone.clear();
}
};
public:
/// @brief constructor, this will initialize the rules database
/// the .cpp file includes Aql/OptimizerRules.h
/// and add all methods there to the rules database
explicit Optimizer(size_t);
~Optimizer() {}
public:
/// @brief do the optimization, this does the optimization, the resulting
/// plans are all estimated, sorted by that estimate and can then be got
/// by getPlans, until the next initialize is called. Note that the optimizer
/// object takes ownership of the execution plan and will delete it
/// automatically on destruction. It will also have ownership of all the
/// newly created plans it recalls and will automatically delete them.
/// If you need to extract the plans from the optimizer use stealBest or
/// stealPlans.
int createPlans(ExecutionPlan* p, std::vector<std::string> const&, bool);
/// @brief add a plan to the optimizer
/// returns false if there are already enough plans, true otherwise
bool addPlan(ExecutionPlan*, Rule const*, bool, int newLevel = 0);
/// @brief getBest, ownership of the plan remains with the optimizer
ExecutionPlan* getBest() {
if (_plans.empty()) {
return nullptr;
}
return _plans.list.front();
}
/// @brief getPlans, ownership of the plans remains with the optimizer
std::deque<ExecutionPlan*>& getPlans() { return _plans.list; }
/// @brief stealBest, ownership of the plan is handed over to the caller,
/// all other plans are deleted
ExecutionPlan* stealBest() {
if (_plans.empty()) {
return nullptr;
}
auto res = _plans.list.front();
for (size_t i = 1; i < _plans.size(); i++) {
delete _plans.list[i];
}
_plans.list.clear();
_plans.levelDone.clear();
return res;
}
/// @brief numberOfPlans, returns the current number of plans in the system
/// this should be called from rules, it will consider those that the
/// current rules has already added
size_t numberOfPlans() { return _plans.size() + _newPlans.size() + 1; }
/// @brief stealPlans, ownership of the plans is handed over to the caller,
/// the optimizer will forget about them!
std::deque<ExecutionPlan*> stealPlans() {
std::deque<ExecutionPlan*> res;
res.swap(_plans.list);
_plans.levelDone.clear();
return res;
}
/// @brief translate a list of rule ids into rule name
static std::vector<std::string> translateRules(std::vector<int> const&);
/// @brief translate a single rule
static char const* translateRule(int);
/// @brief returns the previous rule (sorted by rule levels)
static RuleLevel previousRule(RuleLevel level) {
auto it = _rules.find(level);
if (it == _rules.begin()) {
// already at start
return level;
}
--it;
return (*it).second.level;
}
private:
/// @brief estimatePlans
void estimatePlans();
/// @brief sortPlans
void sortPlans();
/// @brief look up the ids of all disabled rules
std::unordered_set<int> getDisabledRuleIds(
std::vector<std::string> const&) const;
/// @brief register a rule
static void registerRule(std::string const& name, RuleFunction func,
RuleLevel level, bool canCreateAdditionalPlans,
bool canBeDisabled, bool isHidden = false) {
if (_ruleLookup.find(name) != _ruleLookup.end()) {
// duplicate rule names are not allowed
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL,
"duplicate optimizer rule name");
}
_ruleLookup.emplace(name, level);
if (_rules.find(level) != _rules.end()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL,
"duplicate optimizer rule level");
}
_rules.emplace(level, Rule(name, func, level, canCreateAdditionalPlans, canBeDisabled, isHidden));
}
/// @brief register a hidden rule
static void registerHiddenRule(std::string const& name, RuleFunction const& func,
RuleLevel level, bool canCreateAdditionalPlans, bool canBeDisabled) {
registerRule(name, func, level, canCreateAdditionalPlans, canBeDisabled, true);
}
/// @brief set up the optimizer rules once and forever
static void setupRules();
public:
/// @brief optimizer statistics
Stats _stats;
private:
/// @brief the rules database
static std::map<int, Rule> _rules;
/// @brief map to look up rule id by name
static std::unordered_map<std::string, int> _ruleLookup;
/// @brief mutex to protect rule setup
static arangodb::Mutex SetupLock;
/// @brief the current set of plans to be optimized
PlanList _plans;
/// @brief current list of plans (while applying optimizer rules)
PlanList _newPlans;
/// @brief maximal number of plans to produce
size_t const _maxNumberOfPlans;
/// @brief default value for maximal number of plans to produce
static size_t const DefaultMaxNumberOfPlans = 192;
};
} // namespace aql
} // namespace arangodb
#endif