1
0
Fork 0

speed up AQL query plan generation (#6243)

- add startup option `--query.optimizer-max-plans value`
- honor "maxNumberOfPlans" in all cases
- lower default value for "maxNumberOfPlans" from 192 to 128
This commit is contained in:
Jan 2018-08-24 13:11:06 +02:00 committed by GitHub
parent 91d4c7edb1
commit 3795f8c773
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 91 additions and 33 deletions

View File

@ -1,6 +1,28 @@
v3.3.15 (XXXX-XX-XX)
--------------------
* added startup option `--query.optimizer-max-plans value`
This option allows limiting the number of query execution plans created by the
AQL optimizer for any incoming queries. The default value is `128`.
By adjusting this value it can be controlled how many different query execution
plans the AQL query optimizer will generate at most for any given AQL query.
Normally the AQL query optimizer will generate a single execution plan per AQL query,
but there are some cases in which it creates multiple competing plans. More plans
can lead to better optimized queries, however, plan creation has its costs. The
more plans are created and shipped through the optimization pipeline, the more time
will be spent in the optimizer.
Lowering this option's value will make the optimizer stop creating additional plans
when it has already created enough plans.
Note that this setting controls the default maximum number of plans to create. The
value can still be adjusted on a per-query basis by setting the *maxNumberOfPlans*
attribute when running a query.
This change also lowers the default maximum number of query plans from 192 to 128.
* bug fix: facilitate faster shutdown of coordinators and db servers
* upgraded arangodb starter version to 0.13.2

View File

@ -456,6 +456,26 @@ default timeout value (600 seconds) and that time out. The option has no effect
in single-server mode.
### Limiting the number of query execution plans created by the AQL optimizer
`--query.optimizer-max-plans value`
By setting *value* it can be controlled how many different query execution plans
the AQL query optimizer will generate at most for any given AQL query. Normally
the AQL query optimizer will generate a single execution plan per AQL query, but
there are some cases in which it creates multiple competing plans. More plans
can lead to better optimized queries, however, plan creation has its costs. The
more plans are created and shipped through the optimization pipeline, the more
time will be spent in the optimizer.
Lowering *value* will make the optimizer stop creating additional plans when it
has already created enough plans.
Note that this setting controls the default maximum number of plans to create. The
value can still be adjusted on a per-query basis by setting the *maxNumberOfPlans*
attribute when running a query.
The default value is *128*.
### Throw collection not loaded error
`--database.throw-collection-not-loaded-error flag`

View File

@ -1504,9 +1504,7 @@ struct SubqueryVarUsageFinder final : public WalkerWorker<ExecutionNode> {
bool before(ExecutionNode* en) override final {
// Add variables used here to _usedLater:
for (auto const& v : en->getVariablesUsedHere()) {
_usedLater.emplace(v);
}
en->getVariablesUsedHere(_usedLater);
return false;
}

View File

@ -31,12 +31,12 @@ using namespace arangodb::aql;
// @brief constructor, this will initialize the rules database
Optimizer::Optimizer(size_t maxNumberOfPlans)
: _maxNumberOfPlans(maxNumberOfPlans > 0 ? maxNumberOfPlans
: defaultMaxNumberOfPlans),
: _maxNumberOfPlans(maxNumberOfPlans),
_runOnlyRequiredRules(false) {}
size_t Optimizer::hasEnoughPlans(size_t extraPlans) const {
return (_newPlans.size() + extraPlans >= _maxNumberOfPlans);
bool Optimizer::runOnlyRequiredRules(size_t extraPlans) const {
return (_runOnlyRequiredRules ||
(_newPlans.size() + _plans.size() + extraPlans >= _maxNumberOfPlans));
}
void Optimizer::disableRule(int rule) {
@ -54,7 +54,6 @@ void Optimizer::addPlan(std::unique_ptr<ExecutionPlan> plan, OptimizerRule const
// else use user-specified new level
}
if (wasModified) {
if (!rule->isHidden) {
// register which rules modified / created the plan
@ -70,6 +69,11 @@ void Optimizer::addPlan(std::unique_ptr<ExecutionPlan> plan, OptimizerRule const
// hand over ownership
_newPlans.push_back(plan.get(), newLevel);
plan.release();
// stop adding new plans in case we already have enough
if (_newPlans.size() + _plans.size() >= _maxNumberOfPlans) {
_runOnlyRequiredRules = true;
}
}
// @brief the actual optimization
@ -185,14 +189,6 @@ int Optimizer::createPlans(ExecutionPlan* plan,
leastDoneLevel = l;
}
}
// Stop if the result gets out of hand:
if (!_runOnlyRequiredRules && _plans.size() >= _maxNumberOfPlans) {
// must still iterate over all REQUIRED remaining transformation rules
// because there are some rules which are required to make the query
// work in cluster mode etc
_runOnlyRequiredRules = true;
}
}
_stats.plansCreated = _plans.size();

View File

@ -152,7 +152,7 @@ class Optimizer {
/// @brief constructor, this will initialize the rules database
/// the .cpp file includes Aql/OptimizerRules.h
/// and add all methods there to the rules database
explicit Optimizer(size_t);
explicit Optimizer(size_t maxNumberOfPlans);
~Optimizer() {}
@ -167,8 +167,6 @@ class Optimizer {
/// stealPlans.
int createPlans(ExecutionPlan* p, std::vector<std::string> const&, bool);
size_t hasEnoughPlans(size_t extraPlans) const;
/// @brief add a plan to the optimizer
void addPlan(std::unique_ptr<ExecutionPlan>, OptimizerRule const*, bool, int newLevel = 0);
@ -201,7 +199,7 @@ class Optimizer {
return res;
}
bool runOnlyRequiredRules() const { return _runOnlyRequiredRules; }
bool runOnlyRequiredRules(size_t extraPlans) const;
/// @brief numberOfPlans, returns the current number of plans in the system
/// this should be called from rules, it will consider those that the
@ -242,9 +240,6 @@ class Optimizer {
/// @brief run only the required optimizer rules
bool _runOnlyRequiredRules;
/// @brief default value for maximal number of plans to produce
static constexpr size_t defaultMaxNumberOfPlans = 192;
};
} // namespace aql

View File

@ -54,10 +54,6 @@ struct OptimizerRule {
// "Pass 1": moving nodes "up" (potentially outside loops):
// ========================================================
// determine the "right" type of CollectNode and
// add a sort node for each COLLECT (may be removed later)
specializeCollectRule_pass1,
inlineSubqueriesRule_pass1,
// split and-combined filters into multiple smaller filters
@ -85,6 +81,10 @@ struct OptimizerRule {
// remove calculations that are never necessary
removeUnnecessaryCalculationsRule_pass2,
// determine the "right" type of CollectNode and
// add a sort node for each COLLECT (may be removed later)
specializeCollectRule_pass1,
// remove redundant sort blocks
removeRedundantSortsRule_pass2,

View File

@ -715,7 +715,9 @@ void arangodb::aql::removeCollectVariablesRule(
if (outVariable != nullptr &&
varsUsedLater.find(outVariable) == varsUsedLater.end()) {
// outVariable not used later
if (!collectNode->count()) {
collectNode->clearOutVariable();
}
modified = true;
}
@ -1140,7 +1142,7 @@ void arangodb::aql::specializeCollectRule(Optimizer* opt,
(!collectNode->hasOutVariable() || collectNode->count()) &&
collectNode->getOptions().canUseMethod(CollectOptions::CollectMethod::HASH));
if (canUseHashAggregation && !opt->runOnlyRequiredRules()) {
if (canUseHashAggregation && !opt->runOnlyRequiredRules(1)) {
if (collectNode->getOptions().shouldUseMethod(CollectOptions::CollectMethod::HASH)) {
// user has explicitly asked for hash method
// specialize existing the CollectNode so it will become a HashedCollectBlock
@ -2506,7 +2508,7 @@ void arangodb::aql::interchangeAdjacentEnumerationsRule(
do {
// check if we already have enough plans (plus the one plan that we will
// add at the end of this function)
if (opt->hasEnoughPlans(1)) {
if (opt->runOnlyRequiredRules(1)) {
// have enough plans. stop permutations
break;
}

View File

@ -67,6 +67,9 @@ QueryOptions::QueryOptions() :
// "cache" only defaults to true if query cache is turned on
auto queryCacheMode = QueryCache::instance()->mode();
cache = (queryCacheMode == CACHE_ALWAYS_ON);
maxNumberOfPlans = q->maxQueryPlans();
TRI_ASSERT(maxNumberOfPlans > 0);
}
void QueryOptions::fromVelocyPack(VPackSlice const& slice) {
@ -87,6 +90,9 @@ void QueryOptions::fromVelocyPack(VPackSlice const& slice) {
value = slice.get("maxNumberOfPlans");
if (value.isNumber()) {
maxNumberOfPlans = value.getNumber<size_t>();
if (maxNumberOfPlans == 0) {
maxNumberOfPlans = 1;
}
}
value = slice.get("maxWarningCount");
if (value.isNumber()) {

View File

@ -25,6 +25,7 @@
#include "Aql/Query.h"
#include "Aql/QueryCache.h"
#include "Aql/QueryRegistry.h"
#include "Logger/Logger.h"
#include "ProgramOptions/ProgramOptions.h"
#include "ProgramOptions/Section.h"
@ -41,6 +42,7 @@ QueryRegistryFeature::QueryRegistryFeature(ApplicationServer* server)
_trackBindVars(true),
_failOnWarning(false),
_queryMemoryLimit(0),
_maxQueryPlans(128),
_slowQueryThreshold(10.0),
_queryCacheMode("off"),
_queryCacheEntries(128),
@ -83,10 +85,24 @@ void QueryRegistryFeature::collectOptions(
"maximum number of results in query result cache per database",
new UInt64Parameter(&_queryCacheEntries));
options->addOption("--query.optimizer-max-plans", "maximum number of query plans to create for a query",
new UInt64Parameter(&_maxQueryPlans));
options->addHiddenOption("--query.registry-ttl", "Default time-to-live of query snippets (in seconds)",
new DoubleParameter(&_queryRegistryTTL));
}
void QueryRegistryFeature::validateOptions(
std::shared_ptr<ProgramOptions> options) {
if (_maxQueryPlans == 0) {
LOG_TOPIC(FATAL, Logger::FIXME) << "invalid value for `--query.optimizer-max-plans`. expecting at least 1";
FATAL_ERROR_EXIT();
}
// cap the value somehow. creating this many plans really does not make sense
_maxQueryPlans = std::min(_maxQueryPlans, decltype(_maxQueryPlans)(1024));
}
void QueryRegistryFeature::prepare() {
// configure the query cache
std::pair<std::string, size_t> cacheProperties{_queryCacheMode,

View File

@ -39,6 +39,7 @@ class QueryRegistryFeature final : public application_features::ApplicationFeatu
public:
void collectOptions(std::shared_ptr<options::ProgramOptions>) override final;
void validateOptions(std::shared_ptr<options::ProgramOptions>) override final;
void prepare() override final;
void start() override final;
void unprepare() override final;
@ -48,12 +49,14 @@ class QueryRegistryFeature final : public application_features::ApplicationFeatu
double slowQueryThreshold() const { return _slowQueryThreshold; }
bool failOnWarning() const { return _failOnWarning; }
uint64_t queryMemoryLimit() const { return _queryMemoryLimit; }
uint64_t maxQueryPlans() const { return _maxQueryPlans; }
private:
bool _trackSlowQueries;
bool _trackBindVars;
bool _failOnWarning;
uint64_t _queryMemoryLimit;
uint64_t _maxQueryPlans;
double _slowQueryThreshold;
std::string _queryCacheMode;
uint64_t _queryCacheEntries;

View File

@ -474,7 +474,7 @@ function optimizerCollectMethodsTestSuite () {
g.push("q" + i);
}
q += "RETURN INTERSECTION(" + g.join(", ") + ")";
assertTrue(AQL_EXPLAIN(q, null).stats.plansCreated >= 256);
assertTrue(AQL_EXPLAIN(q, null).stats.plansCreated >= 128);
var result = AQL_EXECUTE(q).json;
assertEqual([3], result[0]);
},

View File

@ -229,7 +229,7 @@ function optimizerRuleTestSuite () {
"FOR o IN " + collectionName + " RETURN 1";
var explain = AQL_EXPLAIN(query);
assertEqual(192, explain.stats.plansCreated); // default limit enforced by optimizer
assertEqual(128, explain.stats.plansCreated); // default limit enforced by optimizer
},
////////////////////////////////////////////////////////////////////////////////