1
0
Fork 0

Completely redo the optimizer.

This commit is contained in:
Max Neunhoeffer 2014-08-27 16:52:38 +02:00
parent 27d8409425
commit 449d0cd961
4 changed files with 220 additions and 112 deletions

View File

@ -42,7 +42,7 @@ Optimizer::Optimizer () {
// List all the rules in the system here:
// try to find sort blocks which are superseeded by indexes
registerRule (useIndexForSort, 888);
registerRule (useIndexForSort, 2000);
// try to find a filter after an enumerate collection and find an index . . .
@ -51,19 +51,19 @@ Optimizer::Optimizer () {
// remove filters from the query that are not necessary at all
// filters that are always true will be removed entirely
// filters that are always false will be replaced with a NoResults node
registerRule(removeUnnecessaryFiltersRule, 10000);
registerRule(removeUnnecessaryFiltersRule, 100);
// move calculations up the dependency chain (to pull them out of inner loops etc.)
registerRule(moveCalculationsUpRule, 1001);
registerRule(moveCalculationsUpRule, 1000);
// move filters up the dependency chain (to make result sets as small as possible
// as early as possible)
registerRule(moveFiltersUpRule, 1000);
registerRule(moveFiltersUpRule, 1010);
// remove calculations that are never necessary
registerRule(removeUnnecessaryCalculationsRule, 999);
registerRule(removeUnnecessaryCalculationsRule, 1020);
// Now sort them by pass:
// Now sort them by level:
std::stable_sort(_rules.begin(), _rules.end());
}
@ -72,46 +72,51 @@ Optimizer::Optimizer () {
////////////////////////////////////////////////////////////////////////////////
int Optimizer::createPlans (ExecutionPlan* plan) {
// This vector holds the plans we have created in the previous pass:
PlanList oldPlans(plan);
bool keep; // used as a return value for rules
int res;
int leastDoneLevel = 0;
int maxRuleLevel = _rules.back().level;
// _plans contains the final result
for (auto p : _plans) {
delete p;
}
// _plans contains the previous optimisation result
_plans.clear();
_plans.push_back(plan, 0);
for (int pass = 1; pass <= numberOfPasses; pass++) {
int pass = 1;
while (leastDoneLevel < maxRuleLevel) {
std::cout << "Entering pass " << pass << " of query optimization..."
<< std::endl;
// This vector holds the plans we have created in this pass:
PlanList newPlans;
// Find variable usage for all old plans now:
for (auto p : oldPlans.list) {
for (auto p : _plans.list) {
if (! p->varUsageComputed()) {
p->findVarUsage();
}
}
// For all rules:
for (auto r : _rules) {
PlanList nextOldPlans;
// For all old plans:
while (oldPlans.size() > 0) {
auto p = oldPlans.pop_front();
std::cout << "Have " << _plans.size() << " plans." << std::endl;
int count = 0;
// For all current plans:
while (_plans.size() > 0) {
int level;
auto p = _plans.pop_front(level);
if (level == maxRuleLevel) {
newPlans.push_back(p, level); // nothing to do, just keep it
}
else { // some rule needs applying
Rule r(dummyRule, level);
auto it = std::upper_bound(_rules.begin(), _rules.end(), r);
TRI_ASSERT(it != _rules.end());
std::cout << "Trying rule " << &(it->func) << " with level "
<< it->level << " to plan " << count++
<< std::endl;
try {
// keep should have a default value so rules that forget to set it
// have a deterministic behavior
keep = true;
res = r.func(this, p, newPlans, keep);
if (keep) {
nextOldPlans.push_back(p);
}
res = it->func(this, p, it->level, newPlans);
}
catch (...) {
delete p;
@ -121,28 +126,30 @@ int Optimizer::createPlans (ExecutionPlan* plan) {
return res;
}
}
oldPlans.steal(nextOldPlans);
}
// Now move the surviving old plans to the result:
oldPlans.appendTo(_plans);
// Now move all the new plans to old:
oldPlans.steal(newPlans);
// A shortcut if nothing new was produced:
if (oldPlans.size() == 0) {
break;
_plans.steal(newPlans);
leastDoneLevel = maxRuleLevel;
for (auto l : _plans.levelDone) {
if (l < leastDoneLevel) {
leastDoneLevel = l;
}
}
std::cout << "Least done level is " << leastDoneLevel << std::endl;
// Stop if the result gets out of hand:
if (_plans.size() + oldPlans.size() >= maxNumberOfPlans) {
if (_plans.size() >= maxNumberOfPlans) {
break;
}
}
// Append the surviving plans to the result:
oldPlans.appendTo(_plans);
estimatePlans();
sortPlans();
std::cout << "Optimisation ends with " << _plans.size() << " plans."
<< std::endl;
std::cout << "Costs:" << std::endl;
for (auto p : _plans.list) {
std::cout << p->getCost() << std::endl;
}
return TRI_ERROR_NO_ERROR;
}
@ -152,7 +159,7 @@ int Optimizer::createPlans (ExecutionPlan* plan) {
////////////////////////////////////////////////////////////////////////////////
void Optimizer::estimatePlans () {
for (auto p : _plans) {
for (auto p : _plans.list) {
p->getCost();
// this value is cached in the plan, so formally this step is
// unnecessary, but for the sake of cleanliness...
@ -164,7 +171,7 @@ void Optimizer::estimatePlans () {
////////////////////////////////////////////////////////////////////////////////
void Optimizer::sortPlans () {
std::sort(_plans.begin(), _plans.end(), [](ExecutionPlan* const& a, ExecutionPlan* const& b) -> bool {
std::sort(_plans.list.begin(), _plans.list.end(), [](ExecutionPlan* const& a, ExecutionPlan* const& b) -> bool {
return a->getCost() < b->getCost();
});
}

View File

@ -49,6 +49,7 @@ namespace triagens {
struct PlanList {
std::deque<ExecutionPlan*> list;
std::deque<int> levelDone;
////////////////////////////////////////////////////////////////////////////////
/// @brief constructor
@ -60,8 +61,8 @@ namespace triagens {
/// @brief constructor with a plan
////////////////////////////////////////////////////////////////////////////////
PlanList (ExecutionPlan* p) {
list.push_back(p);
PlanList (ExecutionPlan* p, int level) {
push_back(p, level);
}
////////////////////////////////////////////////////////////////////////////////
@ -82,13 +83,23 @@ namespace triagens {
return list.size();
}
////////////////////////////////////////////////////////////////////////////////
/// @brief check if empty
////////////////////////////////////////////////////////////////////////////////
bool empty () const {
return list.empty();
}
////////////////////////////////////////////////////////////////////////////////
/// @brief pop the first one
////////////////////////////////////////////////////////////////////////////////
ExecutionPlan* pop_front () {
ExecutionPlan* pop_front (int& levelDoneOut) {
auto p = list.front();
levelDoneOut = levelDone.front();
list.pop_front();
levelDone.pop_front();
return p;
}
@ -96,8 +107,15 @@ namespace triagens {
/// @brief push_back
////////////////////////////////////////////////////////////////////////////////
void push_back (ExecutionPlan* p) {
void push_back (ExecutionPlan* p, int level) {
list.push_back(p);
try {
levelDone.push_back(level);
}
catch (...) {
list.pop_back();
throw;
}
}
////////////////////////////////////////////////////////////////////////////////
@ -106,22 +124,26 @@ namespace triagens {
void steal (PlanList& b) {
list.swap(b.list);
levelDone.swap(b.levelDone);
for (auto p : b.list) {
delete p;
}
b.list.clear();
b.levelDone.clear();
}
////////////////////////////////////////////////////////////////////////////////
/// @brief appends all the plans to the target and clears *this at the same time
////////////////////////////////////////////////////////////////////////////////
void appendTo (std::vector<ExecutionPlan*>& target) {
void appendTo (PlanList& target) {
while (list.size() > 0) {
auto p = list.front();
int level = levelDone.front();
list.pop_front();
levelDone.pop_front();
try {
target.push_back(p);
target.push_back(p, level);
}
catch (...) {
delete p;
@ -130,20 +152,34 @@ namespace triagens {
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief clear, deletes all plans contained
////////////////////////////////////////////////////////////////////////////////
void clear () {
for (auto p : list) {
delete p;
}
list.clear();
levelDone.clear();
}
};
////////////////////////////////////////////////////////////////////////////////
/// @brief type of an optimizer rule function, the function gets an optimiser,
/// an ExecutionPlan and has to append one or more plans to the resulting
/// deque. This must not include the original plan. The rule has to set keep
/// to indicate whether or not the original plan is kept in the resulting
/// list. Note that the optimization is done in multiple passes
/// @brief type of an optimizer rule function, the function gets an
/// optimiser, an ExecutionPlan, the current level of this rule and
/// has to append one or more plans to the resulting deque. This must
/// include the original plan if it ought to be kept. The rule has to
/// set the level of the appended plan to the largest level of rule
/// that ought to be considered as done to indicate which rule is to be
/// applied next.
////////////////////////////////////////////////////////////////////////////////
typedef std::function<int(Optimizer* opt,
ExecutionPlan* plan,
PlanList& out,
bool& keep)>
ExecutionPlan* plan,
int level,
PlanList& out)>
RuleFunction;
////////////////////////////////////////////////////////////////////////////////
@ -152,10 +188,10 @@ namespace triagens {
struct Rule {
RuleFunction func;
int rank;
int level;
Rule (RuleFunction f, int r)
: func(f), rank(r) {
Rule (RuleFunction f, int l)
: func(f), level(l) {
}
////////////////////////////////////////////////////////////////////////////////
@ -163,17 +199,11 @@ namespace triagens {
////////////////////////////////////////////////////////////////////////////////
bool operator< (Rule const& b) const {
return rank > b.rank;
return level < b.level;
}
};
////////////////////////////////////////////////////////////////////////////////
/// @brief number of passes in optimization
////////////////////////////////////////////////////////////////////////////////
static int const numberOfPasses = 3;
////////////////////////////////////////////////////////////////////////////////
/// @brief maximal number of plans to produce:
////////////////////////////////////////////////////////////////////////////////
@ -192,10 +222,6 @@ namespace triagens {
////////////////////////////////////////////////////////////////////////////////
~Optimizer () {
for (auto p : _plans) {
delete p;
}
_plans.clear();
}
////////////////////////////////////////////////////////////////////////////////
@ -219,15 +245,15 @@ namespace triagens {
if (_plans.empty()) {
return nullptr;
}
return _plans[0];
return _plans.list.front();
}
////////////////////////////////////////////////////////////////////////////////
/// @brief getPlans, ownership of the plans remains with the optimizer
////////////////////////////////////////////////////////////////////////////////
std::vector<ExecutionPlan*>& getPlans () {
return _plans;
std::deque<ExecutionPlan*>& getPlans () {
return _plans.list;
}
////////////////////////////////////////////////////////////////////////////////
@ -239,11 +265,12 @@ namespace triagens {
if (_plans.empty()) {
return nullptr;
}
auto res = _plans[0];
auto res = _plans.list.front();
for (size_t i = 1; i < _plans.size(); i++) {
delete _plans[i];
delete _plans.list[i];
}
_plans.clear();
_plans.list.clear();
_plans.levelDone.clear();
std::cout << res->toJson(TRI_UNKNOWN_MEM_ZONE, false).toString() << "\n";
return res;
@ -254,9 +281,10 @@ namespace triagens {
/// the optimizer will forget about them!
////////////////////////////////////////////////////////////////////////////////
std::vector<ExecutionPlan*> stealPlans () {
std::vector<ExecutionPlan*> res;
res.swap(_plans);
std::deque<ExecutionPlan*> stealPlans () {
std::deque<ExecutionPlan*> res;
res.swap(_plans.list);
_plans.levelDone.clear();
return res;
}
@ -270,8 +298,8 @@ namespace triagens {
/// @brief registerRule
////////////////////////////////////////////////////////////////////////////////
void registerRule (RuleFunction f, int pass) {
_rules.emplace_back(f, pass);
void registerRule (RuleFunction f, int level) {
_rules.emplace_back(f, level);
}
////////////////////////////////////////////////////////////////////////////////
@ -302,7 +330,7 @@ namespace triagens {
/// @brief the current set of plans to be optimised
////////////////////////////////////////////////////////////////////////////////
std::vector<ExecutionPlan*> _plans;
PlanList _plans;
};

View File

@ -37,6 +37,17 @@ using Json = triagens::basics::Json;
// --SECTION-- rules for the optimizer
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief dummyrule
////////////////////////////////////////////////////////////////////////////////
int triagens::aql::dummyRule (Optimizer*,
ExecutionPlan*,
int level,
Optimizer::PlanList&) {
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief remove all unnecessary filters
/// this rule modifies the plan in place:
@ -46,9 +57,8 @@ using Json = triagens::basics::Json;
int triagens::aql::removeUnnecessaryFiltersRule (Optimizer* opt,
ExecutionPlan* plan,
Optimizer::PlanList& out,
bool& keep) {
keep = true; // plan will always be kept
int level,
Optimizer::PlanList& out) {
std::unordered_set<ExecutionNode*> toUnlink;
std::vector<ExecutionNode*> nodes = plan->findNodesOfType(triagens::aql::ExecutionNode::FILTER, true);
@ -99,6 +109,8 @@ int triagens::aql::removeUnnecessaryFiltersRule (Optimizer* opt,
plan->findVarUsage();
}
out.push_back(plan, level);
return TRI_ERROR_NO_ERROR;
}
@ -111,9 +123,8 @@ int triagens::aql::removeUnnecessaryFiltersRule (Optimizer* opt,
int triagens::aql::moveCalculationsUpRule (Optimizer* opt,
ExecutionPlan* plan,
Optimizer::PlanList& out,
bool& keep) {
keep = true; // plan will always be kept
int level,
Optimizer::PlanList& out) {
std::vector<ExecutionNode*> nodes = plan->findNodesOfType(triagens::aql::ExecutionNode::CALCULATION, true);
bool modified = false;
@ -180,6 +191,8 @@ int triagens::aql::moveCalculationsUpRule (Optimizer* opt,
plan->findVarUsage();
}
out.push_back(plan, level);
return TRI_ERROR_NO_ERROR;
}
@ -192,10 +205,9 @@ int triagens::aql::moveCalculationsUpRule (Optimizer* opt,
////////////////////////////////////////////////////////////////////////////////
int triagens::aql::moveFiltersUpRule (Optimizer* opt,
ExecutionPlan* plan,
Optimizer::PlanList& out,
bool& keep) {
keep = true; // plan will always be kept
ExecutionPlan* plan,
int level,
Optimizer::PlanList& out) {
std::vector<ExecutionNode*> nodes = plan->findNodesOfType(triagens::aql::ExecutionNode::FILTER, true);
bool modified = false;
@ -259,6 +271,8 @@ int triagens::aql::moveFiltersUpRule (Optimizer* opt,
plan->findVarUsage();
}
out.push_back(plan, level);
return TRI_ERROR_NO_ERROR;
}
@ -268,10 +282,9 @@ int triagens::aql::moveFiltersUpRule (Optimizer* opt,
////////////////////////////////////////////////////////////////////////////////
int triagens::aql::removeUnnecessaryCalculationsRule (Optimizer* opt,
ExecutionPlan* plan,
Optimizer::PlanList& out,
bool& keep) {
keep = true;
ExecutionPlan* plan,
int level,
Optimizer::PlanList& out) {
std::vector<ExecutionNode*> nodes
= plan->findNodesOfType(triagens::aql::ExecutionNode::CALCULATION, true);
std::unordered_set<ExecutionNode*> toUnlink;
@ -300,6 +313,8 @@ int triagens::aql::removeUnnecessaryCalculationsRule (Optimizer* opt,
plan->findVarUsage();
}
out.push_back(plan, level);
return TRI_ERROR_NO_ERROR;
}
@ -370,7 +385,7 @@ class FilterToEnumCollFinder : public WalkerWorker<ExecutionNode> {
auto noRes = new NoResultsNode(newPlan->nextId());
newPlan->registerNode(noRes);
newPlan->insertDependency(x, noRes);
_out->push_back(newPlan);
_out->push_back(newPlan, 0);
}
}
else {
@ -398,7 +413,7 @@ class FilterToEnumCollFinder : public WalkerWorker<ExecutionNode> {
throw;
}
newPlan->replaceNode(newPlan->getNodeById(node->id()), newNode);
_out->push_back(newPlan);
_out->push_back(newPlan, 0);
}
}
}
@ -520,9 +535,8 @@ class FilterToEnumCollFinder : public WalkerWorker<ExecutionNode> {
int triagens::aql::useIndexRange (Optimizer* opt,
ExecutionPlan* plan,
Optimizer::PlanList& out,
bool& keep) {
keep = true;
int level,
Optimizer::PlanList& out) {
std::vector<ExecutionNode*> nodes
= plan->findNodesOfType(triagens::aql::ExecutionNode::FILTER, true);
@ -534,6 +548,8 @@ int triagens::aql::useIndexRange (Optimizer* opt,
nn->walk(&finder);
}
out.push_back(plan, level);
return TRI_ERROR_NO_ERROR;
}
@ -648,7 +664,7 @@ class sortToIndexNode : public WalkerWorker<ExecutionNode> {
if (idx.fullmatch) { // if the index superseedes the sort, remove it.
RemoveSortNode(newPlan);
}
_out.push_back(newPlan);
_out.push_back(newPlan, 0);
}
}
@ -677,10 +693,9 @@ class sortToIndexNode : public WalkerWorker<ExecutionNode> {
int triagens::aql::useIndexForSort (Optimizer* opt,
ExecutionPlan* plan,
Optimizer::PlanList& out,
bool& keep) {
keep = true;
ExecutionPlan* plan,
int level,
Optimizer::PlanList& out) {
std::vector<ExecutionNode*> nodes
= plan->findNodesOfType(triagens::aql::ExecutionNode::SORT, true);
for (auto n : nodes) {
@ -695,11 +710,54 @@ int triagens::aql::useIndexForSort (Optimizer* opt,
oneNode->walk(&finder);
}
out.push_back(plan, level);
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief interchange adjacent EnumerateCollectionNodes in all possible ways
////////////////////////////////////////////////////////////////////////////////
int triagens::aql::interchangeAdjacentEnumerations (Optimizer* opt,
ExecutionPlan* plan,
int level,
Optimizer::PlanList& out) {
std::vector<ExecutionNode*> nodes
= plan->findNodesOfType(triagens::aql::ExecutionNode::ENUMERATE_COLLECTION,
true);
// We use that the order of the nodes is such that a node B that is among the
// recursive dependencies of a node A is later in the vector.
for (size_t i = 0; i < nodes.size(); i++) {
ExecutionNode* n = nodes[i];
std::vector<ExecutionNode*> nn;
nn.push_back(n);
// Now follow the dependencies as long as we see further such nodes:
while (true) {
auto deps = n->getDependencies();
if (deps.size() == 0) {
break;
}
if (deps[0]->getType() != triagens::aql::ExecutionNode::ENUMERATE_COLLECTION) {
break;
}
n = deps[0];
nn.push_back(n);
}
if (nn.size() > 1) {
// Now we want to compute all permutations of nn
}
}
out.push_back(plan, level);
return TRI_ERROR_NO_ERROR;
}
// Local Variables:
// mode: outline-minor
// outline-regexp: "^\\(/// @brief\\|/// {@inheritDoc}\\|/// @addtogroup\\|// --SECTION--\\|/// @\\}\\)"

View File

@ -40,6 +40,12 @@ namespace triagens {
// --SECTION-- rules for the optimizer
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief dummyrule
////////////////////////////////////////////////////////////////////////////////
int dummyRule (Optimizer*, ExecutionPlan*, int level, Optimizer::PlanList&);
////////////////////////////////////////////////////////////////////////////////
/// @brief remove all unnecessary filters
/// this rule modifies the plan in place:
@ -47,7 +53,7 @@ namespace triagens {
/// - filters that are always false will be replaced by a NoResults node
////////////////////////////////////////////////////////////////////////////////
int removeUnnecessaryFiltersRule (Optimizer*, ExecutionPlan*, Optimizer::PlanList&, bool&);
int removeUnnecessaryFiltersRule (Optimizer*, ExecutionPlan*, int level, Optimizer::PlanList&);
////////////////////////////////////////////////////////////////////////////////
/// @brief move calculations up in the plan
@ -56,7 +62,7 @@ namespace triagens {
/// avoid redundant calculations in inner loops
////////////////////////////////////////////////////////////////////////////////
int moveCalculationsUpRule (Optimizer*, ExecutionPlan*, Optimizer::PlanList&, bool&);
int moveCalculationsUpRule (Optimizer*, ExecutionPlan*, int level, Optimizer::PlanList&);
////////////////////////////////////////////////////////////////////////////////
/// @brief move filters up in the plan
@ -66,25 +72,34 @@ namespace triagens {
/// filters are not pushed beyond limits
////////////////////////////////////////////////////////////////////////////////
int moveFiltersUpRule (Optimizer*, ExecutionPlan*, Optimizer::PlanList&, bool&);
int moveFiltersUpRule (Optimizer*, ExecutionPlan*, int level, Optimizer::PlanList&);
////////////////////////////////////////////////////////////////////////////////
/// @brief remove a CalculationNode that is never needed
////////////////////////////////////////////////////////////////////////////////
int removeUnnecessaryCalculationsRule (Optimizer*, ExecutionPlan*, Optimizer::PlanList&, bool&);
int removeUnnecessaryCalculationsRule (Optimizer*, ExecutionPlan*, int level, Optimizer::PlanList&);
////////////////////////////////////////////////////////////////////////////////
/// @brief prefer IndexRange nodes over EnumerateCollection nodes
////////////////////////////////////////////////////////////////////////////////
int useIndexRange (Optimizer*, ExecutionPlan*, Optimizer::PlanList&, bool&);
int useIndexRange (Optimizer*, ExecutionPlan*, int level, Optimizer::PlanList&);
////////////////////////////////////////////////////////////////////////////////
/// @brief try to use the index for sorting
////////////////////////////////////////////////////////////////////////////////
int useIndexForSort (Optimizer*, ExecutionPlan*, Optimizer::PlanList&, bool&);
int useIndexForSort (Optimizer*, ExecutionPlan*, int level, Optimizer::PlanList&);
////////////////////////////////////////////////////////////////////////////////
/// @brief interchange adjacent EnumerateCollectionNodes in all possible ways
////////////////////////////////////////////////////////////////////////////////
int interchangeAdjacentEnumerations ( Optimizer* opt,
ExecutionPlan* plan,
int level,
Optimizer::PlanList& out);
} // namespace aql
} // namespace triagens