mirror of https://gitee.com/bigwinds/arangodb
speed up AQL query plan generation (#6243)
- add startup option `--query.optimizer-max-plans value` - honor "maxNumberOfPlans" in all cases - lower default value for "maxNumberOfPlans" from 192 to 128
This commit is contained in:
parent
91d4c7edb1
commit
3795f8c773
22
CHANGELOG
22
CHANGELOG
|
@ -1,6 +1,28 @@
|
|||
v3.3.15 (XXXX-XX-XX)
|
||||
--------------------
|
||||
|
||||
* added startup option `--query.optimizer-max-plans value`
|
||||
|
||||
This option allows limiting the number of query execution plans created by the
|
||||
AQL optimizer for any incoming queries. The default value is `128`.
|
||||
|
||||
By adjusting this value it can be controlled how many different query execution
|
||||
plans the AQL query optimizer will generate at most for any given AQL query.
|
||||
Normally the AQL query optimizer will generate a single execution plan per AQL query,
|
||||
but there are some cases in which it creates multiple competing plans. More plans
|
||||
can lead to better optimized queries, however, plan creation has its costs. The
|
||||
more plans are created and shipped through the optimization pipeline, the more time
|
||||
will be spent in the optimizer.
|
||||
|
||||
Lowering this option's value will make the optimizer stop creating additional plans
|
||||
when it has already created enough plans.
|
||||
|
||||
Note that this setting controls the default maximum number of plans to create. The
|
||||
value can still be adjusted on a per-query basis by setting the *maxNumberOfPlans*
|
||||
attribute when running a query.
|
||||
|
||||
This change also lowers the default maximum number of query plans from 192 to 128.
|
||||
|
||||
* bug fix: facilitate faster shutdown of coordinators and db servers
|
||||
|
||||
* upgraded arangodb starter version to 0.13.2
|
||||
|
|
|
@ -456,6 +456,26 @@ default timeout value (600 seconds) and that time out. The option has no effect
|
|||
in single-server mode.
|
||||
|
||||
|
||||
### Limiting the number of query execution plans created by the AQL optimizer
|
||||
|
||||
`--query.optimizer-max-plans value`
|
||||
|
||||
By setting *value* it can be controlled how many different query execution plans
|
||||
the AQL query optimizer will generate at most for any given AQL query. Normally
|
||||
the AQL query optimizer will generate a single execution plan per AQL query, but
|
||||
there are some cases in which it creates multiple competing plans. More plans
|
||||
can lead to better optimized queries, however, plan creation has its costs. The
|
||||
more plans are created and shipped through the optimization pipeline, the more
|
||||
time will be spent in the optimizer.
|
||||
Lowering *value* will make the optimizer stop creating additional plans when it
|
||||
has already created enough plans.
|
||||
Note that this setting controls the default maximum number of plans to create. The
|
||||
value can still be adjusted on a per-query basis by setting the *maxNumberOfPlans*
|
||||
attribute when running a query.
|
||||
|
||||
The default value is *128*.
|
||||
|
||||
|
||||
### Throw collection not loaded error
|
||||
|
||||
`--database.throw-collection-not-loaded-error flag`
|
||||
|
|
|
@ -1504,9 +1504,7 @@ struct SubqueryVarUsageFinder final : public WalkerWorker<ExecutionNode> {
|
|||
|
||||
bool before(ExecutionNode* en) override final {
|
||||
// Add variables used here to _usedLater:
|
||||
for (auto const& v : en->getVariablesUsedHere()) {
|
||||
_usedLater.emplace(v);
|
||||
}
|
||||
en->getVariablesUsedHere(_usedLater);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -31,12 +31,12 @@ using namespace arangodb::aql;
|
|||
|
||||
// @brief constructor, this will initialize the rules database
|
||||
Optimizer::Optimizer(size_t maxNumberOfPlans)
|
||||
: _maxNumberOfPlans(maxNumberOfPlans > 0 ? maxNumberOfPlans
|
||||
: defaultMaxNumberOfPlans),
|
||||
: _maxNumberOfPlans(maxNumberOfPlans),
|
||||
_runOnlyRequiredRules(false) {}
|
||||
|
||||
size_t Optimizer::hasEnoughPlans(size_t extraPlans) const {
|
||||
return (_newPlans.size() + extraPlans >= _maxNumberOfPlans);
|
||||
bool Optimizer::runOnlyRequiredRules(size_t extraPlans) const {
|
||||
return (_runOnlyRequiredRules ||
|
||||
(_newPlans.size() + _plans.size() + extraPlans >= _maxNumberOfPlans));
|
||||
}
|
||||
|
||||
void Optimizer::disableRule(int rule) {
|
||||
|
@ -54,7 +54,6 @@ void Optimizer::addPlan(std::unique_ptr<ExecutionPlan> plan, OptimizerRule const
|
|||
// else use user-specified new level
|
||||
}
|
||||
|
||||
|
||||
if (wasModified) {
|
||||
if (!rule->isHidden) {
|
||||
// register which rules modified / created the plan
|
||||
|
@ -70,6 +69,11 @@ void Optimizer::addPlan(std::unique_ptr<ExecutionPlan> plan, OptimizerRule const
|
|||
// hand over ownership
|
||||
_newPlans.push_back(plan.get(), newLevel);
|
||||
plan.release();
|
||||
|
||||
// stop adding new plans in case we already have enough
|
||||
if (_newPlans.size() + _plans.size() >= _maxNumberOfPlans) {
|
||||
_runOnlyRequiredRules = true;
|
||||
}
|
||||
}
|
||||
|
||||
// @brief the actual optimization
|
||||
|
@ -185,14 +189,6 @@ int Optimizer::createPlans(ExecutionPlan* plan,
|
|||
leastDoneLevel = l;
|
||||
}
|
||||
}
|
||||
|
||||
// Stop if the result gets out of hand:
|
||||
if (!_runOnlyRequiredRules && _plans.size() >= _maxNumberOfPlans) {
|
||||
// must still iterate over all REQUIRED remaining transformation rules
|
||||
// because there are some rules which are required to make the query
|
||||
// work in cluster mode etc
|
||||
_runOnlyRequiredRules = true;
|
||||
}
|
||||
}
|
||||
|
||||
_stats.plansCreated = _plans.size();
|
||||
|
|
|
@ -152,7 +152,7 @@ class Optimizer {
|
|||
/// @brief constructor, this will initialize the rules database
|
||||
/// the .cpp file includes Aql/OptimizerRules.h
|
||||
/// and add all methods there to the rules database
|
||||
explicit Optimizer(size_t);
|
||||
explicit Optimizer(size_t maxNumberOfPlans);
|
||||
|
||||
~Optimizer() {}
|
||||
|
||||
|
@ -167,8 +167,6 @@ class Optimizer {
|
|||
/// stealPlans.
|
||||
int createPlans(ExecutionPlan* p, std::vector<std::string> const&, bool);
|
||||
|
||||
size_t hasEnoughPlans(size_t extraPlans) const;
|
||||
|
||||
/// @brief add a plan to the optimizer
|
||||
void addPlan(std::unique_ptr<ExecutionPlan>, OptimizerRule const*, bool, int newLevel = 0);
|
||||
|
||||
|
@ -201,7 +199,7 @@ class Optimizer {
|
|||
return res;
|
||||
}
|
||||
|
||||
bool runOnlyRequiredRules() const { return _runOnlyRequiredRules; }
|
||||
bool runOnlyRequiredRules(size_t extraPlans) const;
|
||||
|
||||
/// @brief numberOfPlans, returns the current number of plans in the system
|
||||
/// this should be called from rules, it will consider those that the
|
||||
|
@ -242,9 +240,6 @@ class Optimizer {
|
|||
|
||||
/// @brief run only the required optimizer rules
|
||||
bool _runOnlyRequiredRules;
|
||||
|
||||
/// @brief default value for maximal number of plans to produce
|
||||
static constexpr size_t defaultMaxNumberOfPlans = 192;
|
||||
};
|
||||
|
||||
} // namespace aql
|
||||
|
|
|
@ -54,10 +54,6 @@ struct OptimizerRule {
|
|||
// "Pass 1": moving nodes "up" (potentially outside loops):
|
||||
// ========================================================
|
||||
|
||||
// determine the "right" type of CollectNode and
|
||||
// add a sort node for each COLLECT (may be removed later)
|
||||
specializeCollectRule_pass1,
|
||||
|
||||
inlineSubqueriesRule_pass1,
|
||||
|
||||
// split and-combined filters into multiple smaller filters
|
||||
|
@ -85,6 +81,10 @@ struct OptimizerRule {
|
|||
// remove calculations that are never necessary
|
||||
removeUnnecessaryCalculationsRule_pass2,
|
||||
|
||||
// determine the "right" type of CollectNode and
|
||||
// add a sort node for each COLLECT (may be removed later)
|
||||
specializeCollectRule_pass1,
|
||||
|
||||
// remove redundant sort blocks
|
||||
removeRedundantSortsRule_pass2,
|
||||
|
||||
|
|
|
@ -715,7 +715,9 @@ void arangodb::aql::removeCollectVariablesRule(
|
|||
if (outVariable != nullptr &&
|
||||
varsUsedLater.find(outVariable) == varsUsedLater.end()) {
|
||||
// outVariable not used later
|
||||
if (!collectNode->count()) {
|
||||
collectNode->clearOutVariable();
|
||||
}
|
||||
modified = true;
|
||||
}
|
||||
|
||||
|
@ -1140,7 +1142,7 @@ void arangodb::aql::specializeCollectRule(Optimizer* opt,
|
|||
(!collectNode->hasOutVariable() || collectNode->count()) &&
|
||||
collectNode->getOptions().canUseMethod(CollectOptions::CollectMethod::HASH));
|
||||
|
||||
if (canUseHashAggregation && !opt->runOnlyRequiredRules()) {
|
||||
if (canUseHashAggregation && !opt->runOnlyRequiredRules(1)) {
|
||||
if (collectNode->getOptions().shouldUseMethod(CollectOptions::CollectMethod::HASH)) {
|
||||
// user has explicitly asked for hash method
|
||||
// specialize existing the CollectNode so it will become a HashedCollectBlock
|
||||
|
@ -2506,7 +2508,7 @@ void arangodb::aql::interchangeAdjacentEnumerationsRule(
|
|||
do {
|
||||
// check if we already have enough plans (plus the one plan that we will
|
||||
// add at the end of this function)
|
||||
if (opt->hasEnoughPlans(1)) {
|
||||
if (opt->runOnlyRequiredRules(1)) {
|
||||
// have enough plans. stop permutations
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -67,6 +67,9 @@ QueryOptions::QueryOptions() :
|
|||
// "cache" only defaults to true if query cache is turned on
|
||||
auto queryCacheMode = QueryCache::instance()->mode();
|
||||
cache = (queryCacheMode == CACHE_ALWAYS_ON);
|
||||
|
||||
maxNumberOfPlans = q->maxQueryPlans();
|
||||
TRI_ASSERT(maxNumberOfPlans > 0);
|
||||
}
|
||||
|
||||
void QueryOptions::fromVelocyPack(VPackSlice const& slice) {
|
||||
|
@ -87,6 +90,9 @@ void QueryOptions::fromVelocyPack(VPackSlice const& slice) {
|
|||
value = slice.get("maxNumberOfPlans");
|
||||
if (value.isNumber()) {
|
||||
maxNumberOfPlans = value.getNumber<size_t>();
|
||||
if (maxNumberOfPlans == 0) {
|
||||
maxNumberOfPlans = 1;
|
||||
}
|
||||
}
|
||||
value = slice.get("maxWarningCount");
|
||||
if (value.isNumber()) {
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "Aql/Query.h"
|
||||
#include "Aql/QueryCache.h"
|
||||
#include "Aql/QueryRegistry.h"
|
||||
#include "Logger/Logger.h"
|
||||
#include "ProgramOptions/ProgramOptions.h"
|
||||
#include "ProgramOptions/Section.h"
|
||||
|
||||
|
@ -41,6 +42,7 @@ QueryRegistryFeature::QueryRegistryFeature(ApplicationServer* server)
|
|||
_trackBindVars(true),
|
||||
_failOnWarning(false),
|
||||
_queryMemoryLimit(0),
|
||||
_maxQueryPlans(128),
|
||||
_slowQueryThreshold(10.0),
|
||||
_queryCacheMode("off"),
|
||||
_queryCacheEntries(128),
|
||||
|
@ -83,10 +85,24 @@ void QueryRegistryFeature::collectOptions(
|
|||
"maximum number of results in query result cache per database",
|
||||
new UInt64Parameter(&_queryCacheEntries));
|
||||
|
||||
options->addOption("--query.optimizer-max-plans", "maximum number of query plans to create for a query",
|
||||
new UInt64Parameter(&_maxQueryPlans));
|
||||
|
||||
options->addHiddenOption("--query.registry-ttl", "Default time-to-live of query snippets (in seconds)",
|
||||
new DoubleParameter(&_queryRegistryTTL));
|
||||
}
|
||||
|
||||
void QueryRegistryFeature::validateOptions(
|
||||
std::shared_ptr<ProgramOptions> options) {
|
||||
if (_maxQueryPlans == 0) {
|
||||
LOG_TOPIC(FATAL, Logger::FIXME) << "invalid value for `--query.optimizer-max-plans`. expecting at least 1";
|
||||
FATAL_ERROR_EXIT();
|
||||
}
|
||||
|
||||
// cap the value somehow. creating this many plans really does not make sense
|
||||
_maxQueryPlans = std::min(_maxQueryPlans, decltype(_maxQueryPlans)(1024));
|
||||
}
|
||||
|
||||
void QueryRegistryFeature::prepare() {
|
||||
// configure the query cache
|
||||
std::pair<std::string, size_t> cacheProperties{_queryCacheMode,
|
||||
|
|
|
@ -39,6 +39,7 @@ class QueryRegistryFeature final : public application_features::ApplicationFeatu
|
|||
|
||||
public:
|
||||
void collectOptions(std::shared_ptr<options::ProgramOptions>) override final;
|
||||
void validateOptions(std::shared_ptr<options::ProgramOptions>) override final;
|
||||
void prepare() override final;
|
||||
void start() override final;
|
||||
void unprepare() override final;
|
||||
|
@ -48,12 +49,14 @@ class QueryRegistryFeature final : public application_features::ApplicationFeatu
|
|||
double slowQueryThreshold() const { return _slowQueryThreshold; }
|
||||
bool failOnWarning() const { return _failOnWarning; }
|
||||
uint64_t queryMemoryLimit() const { return _queryMemoryLimit; }
|
||||
uint64_t maxQueryPlans() const { return _maxQueryPlans; }
|
||||
|
||||
private:
|
||||
bool _trackSlowQueries;
|
||||
bool _trackBindVars;
|
||||
bool _failOnWarning;
|
||||
uint64_t _queryMemoryLimit;
|
||||
uint64_t _maxQueryPlans;
|
||||
double _slowQueryThreshold;
|
||||
std::string _queryCacheMode;
|
||||
uint64_t _queryCacheEntries;
|
||||
|
|
|
@ -474,7 +474,7 @@ function optimizerCollectMethodsTestSuite () {
|
|||
g.push("q" + i);
|
||||
}
|
||||
q += "RETURN INTERSECTION(" + g.join(", ") + ")";
|
||||
assertTrue(AQL_EXPLAIN(q, null).stats.plansCreated >= 256);
|
||||
assertTrue(AQL_EXPLAIN(q, null).stats.plansCreated >= 128);
|
||||
var result = AQL_EXECUTE(q).json;
|
||||
assertEqual([3], result[0]);
|
||||
},
|
||||
|
|
|
@ -229,7 +229,7 @@ function optimizerRuleTestSuite () {
|
|||
"FOR o IN " + collectionName + " RETURN 1";
|
||||
|
||||
var explain = AQL_EXPLAIN(query);
|
||||
assertEqual(192, explain.stats.plansCreated); // default limit enforced by optimizer
|
||||
assertEqual(128, explain.stats.plansCreated); // default limit enforced by optimizer
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
Loading…
Reference in New Issue