diff --git a/CHANGELOG b/CHANGELOG index 6cb31d1fe2..d7d398f1b2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,28 @@ v3.3.15 (XXXX-XX-XX) -------------------- +* added startup option `--query.optimizer-max-plans value` + + This option allows limiting the number of query execution plans created by the + AQL optimizer for any incoming queries. The default value is `128`. + + By adjusting this value it can be controlled how many different query execution + plans the AQL query optimizer will generate at most for any given AQL query. + Normally the AQL query optimizer will generate a single execution plan per AQL query, + but there are some cases in which it creates multiple competing plans. More plans + can lead to better optimized queries, however, plan creation has its costs. The + more plans are created and shipped through the optimization pipeline, the more time + will be spent in the optimizer. + + Lowering this option's value will make the optimizer stop creating additional plans + when it has already created enough plans. + + Note that this setting controls the default maximum number of plans to create. The + value can still be adjusted on a per-query basis by setting the *maxNumberOfPlans* + attribute when running a query. + + This change also lowers the default maximum number of query plans from 192 to 128. + * bug fix: facilitate faster shutdown of coordinators and db servers * upgraded arangodb starter version to 0.13.2 diff --git a/Documentation/Books/Manual/Administration/Configuration/GeneralArangod.md b/Documentation/Books/Manual/Administration/Configuration/GeneralArangod.md index 547ef8260d..3d668f96ae 100644 --- a/Documentation/Books/Manual/Administration/Configuration/GeneralArangod.md +++ b/Documentation/Books/Manual/Administration/Configuration/GeneralArangod.md @@ -456,6 +456,26 @@ default timeout value (600 seconds) and that time out. The option has no effect in single-server mode. +### Limiting the number of query execution plans created by the AQL optimizer + +`--query.optimizer-max-plans value` + +By setting *value* it can be controlled how many different query execution plans +the AQL query optimizer will generate at most for any given AQL query. Normally +the AQL query optimizer will generate a single execution plan per AQL query, but +there are some cases in which it creates multiple competing plans. More plans +can lead to better optimized queries, however, plan creation has its costs. The +more plans are created and shipped through the optimization pipeline, the more +time will be spent in the optimizer. +Lowering *value* will make the optimizer stop creating additional plans when it +has already created enough plans. +Note that this setting controls the default maximum number of plans to create. The +value can still be adjusted on a per-query basis by setting the *maxNumberOfPlans* +attribute when running a query. + +The default value is *128*. + + ### Throw collection not loaded error `--database.throw-collection-not-loaded-error flag` diff --git a/arangod/Aql/ExecutionNode.cpp b/arangod/Aql/ExecutionNode.cpp index 8d4754e40f..5bf33b8423 100644 --- a/arangod/Aql/ExecutionNode.cpp +++ b/arangod/Aql/ExecutionNode.cpp @@ -1504,9 +1504,7 @@ struct SubqueryVarUsageFinder final : public WalkerWorker { bool before(ExecutionNode* en) override final { // Add variables used here to _usedLater: - for (auto const& v : en->getVariablesUsedHere()) { - _usedLater.emplace(v); - } + en->getVariablesUsedHere(_usedLater); return false; } diff --git a/arangod/Aql/Optimizer.cpp b/arangod/Aql/Optimizer.cpp index 577671efb4..c4bdc64d39 100644 --- a/arangod/Aql/Optimizer.cpp +++ b/arangod/Aql/Optimizer.cpp @@ -31,12 +31,12 @@ using namespace arangodb::aql; // @brief constructor, this will initialize the rules database Optimizer::Optimizer(size_t maxNumberOfPlans) - : _maxNumberOfPlans(maxNumberOfPlans > 0 ? maxNumberOfPlans - : defaultMaxNumberOfPlans), + : _maxNumberOfPlans(maxNumberOfPlans), _runOnlyRequiredRules(false) {} -size_t Optimizer::hasEnoughPlans(size_t extraPlans) const { - return (_newPlans.size() + extraPlans >= _maxNumberOfPlans); +bool Optimizer::runOnlyRequiredRules(size_t extraPlans) const { + return (_runOnlyRequiredRules || + (_newPlans.size() + _plans.size() + extraPlans >= _maxNumberOfPlans)); } void Optimizer::disableRule(int rule) { @@ -54,7 +54,6 @@ void Optimizer::addPlan(std::unique_ptr plan, OptimizerRule const // else use user-specified new level } - if (wasModified) { if (!rule->isHidden) { // register which rules modified / created the plan @@ -70,6 +69,11 @@ void Optimizer::addPlan(std::unique_ptr plan, OptimizerRule const // hand over ownership _newPlans.push_back(plan.get(), newLevel); plan.release(); + + // stop adding new plans in case we already have enough + if (_newPlans.size() + _plans.size() >= _maxNumberOfPlans) { + _runOnlyRequiredRules = true; + } } // @brief the actual optimization @@ -185,14 +189,6 @@ int Optimizer::createPlans(ExecutionPlan* plan, leastDoneLevel = l; } } - - // Stop if the result gets out of hand: - if (!_runOnlyRequiredRules && _plans.size() >= _maxNumberOfPlans) { - // must still iterate over all REQUIRED remaining transformation rules - // because there are some rules which are required to make the query - // work in cluster mode etc - _runOnlyRequiredRules = true; - } } _stats.plansCreated = _plans.size(); diff --git a/arangod/Aql/Optimizer.h b/arangod/Aql/Optimizer.h index 8cdad60df9..9aa12be75a 100644 --- a/arangod/Aql/Optimizer.h +++ b/arangod/Aql/Optimizer.h @@ -152,7 +152,7 @@ class Optimizer { /// @brief constructor, this will initialize the rules database /// the .cpp file includes Aql/OptimizerRules.h /// and add all methods there to the rules database - explicit Optimizer(size_t); + explicit Optimizer(size_t maxNumberOfPlans); ~Optimizer() {} @@ -167,8 +167,6 @@ class Optimizer { /// stealPlans. int createPlans(ExecutionPlan* p, std::vector const&, bool); - size_t hasEnoughPlans(size_t extraPlans) const; - /// @brief add a plan to the optimizer void addPlan(std::unique_ptr, OptimizerRule const*, bool, int newLevel = 0); @@ -200,8 +198,8 @@ class Optimizer { return res; } - - bool runOnlyRequiredRules() const { return _runOnlyRequiredRules; } + + bool runOnlyRequiredRules(size_t extraPlans) const; /// @brief numberOfPlans, returns the current number of plans in the system /// this should be called from rules, it will consider those that the @@ -242,9 +240,6 @@ class Optimizer { /// @brief run only the required optimizer rules bool _runOnlyRequiredRules; - - /// @brief default value for maximal number of plans to produce - static constexpr size_t defaultMaxNumberOfPlans = 192; }; } // namespace aql diff --git a/arangod/Aql/OptimizerRule.h b/arangod/Aql/OptimizerRule.h index 86afb2807f..5676e6597e 100644 --- a/arangod/Aql/OptimizerRule.h +++ b/arangod/Aql/OptimizerRule.h @@ -54,10 +54,6 @@ struct OptimizerRule { // "Pass 1": moving nodes "up" (potentially outside loops): // ======================================================== - // determine the "right" type of CollectNode and - // add a sort node for each COLLECT (may be removed later) - specializeCollectRule_pass1, - inlineSubqueriesRule_pass1, // split and-combined filters into multiple smaller filters @@ -84,6 +80,10 @@ struct OptimizerRule { // remove calculations that are never necessary removeUnnecessaryCalculationsRule_pass2, + + // determine the "right" type of CollectNode and + // add a sort node for each COLLECT (may be removed later) + specializeCollectRule_pass1, // remove redundant sort blocks removeRedundantSortsRule_pass2, diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 16f8bb155e..657447fb6f 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -715,7 +715,9 @@ void arangodb::aql::removeCollectVariablesRule( if (outVariable != nullptr && varsUsedLater.find(outVariable) == varsUsedLater.end()) { // outVariable not used later - collectNode->clearOutVariable(); + if (!collectNode->count()) { + collectNode->clearOutVariable(); + } modified = true; } @@ -1140,7 +1142,7 @@ void arangodb::aql::specializeCollectRule(Optimizer* opt, (!collectNode->hasOutVariable() || collectNode->count()) && collectNode->getOptions().canUseMethod(CollectOptions::CollectMethod::HASH)); - if (canUseHashAggregation && !opt->runOnlyRequiredRules()) { + if (canUseHashAggregation && !opt->runOnlyRequiredRules(1)) { if (collectNode->getOptions().shouldUseMethod(CollectOptions::CollectMethod::HASH)) { // user has explicitly asked for hash method // specialize existing the CollectNode so it will become a HashedCollectBlock @@ -2506,7 +2508,7 @@ void arangodb::aql::interchangeAdjacentEnumerationsRule( do { // check if we already have enough plans (plus the one plan that we will // add at the end of this function) - if (opt->hasEnoughPlans(1)) { + if (opt->runOnlyRequiredRules(1)) { // have enough plans. stop permutations break; } diff --git a/arangod/Aql/QueryOptions.cpp b/arangod/Aql/QueryOptions.cpp index a0667623d8..fc9a6b3525 100644 --- a/arangod/Aql/QueryOptions.cpp +++ b/arangod/Aql/QueryOptions.cpp @@ -67,6 +67,9 @@ QueryOptions::QueryOptions() : // "cache" only defaults to true if query cache is turned on auto queryCacheMode = QueryCache::instance()->mode(); cache = (queryCacheMode == CACHE_ALWAYS_ON); + + maxNumberOfPlans = q->maxQueryPlans(); + TRI_ASSERT(maxNumberOfPlans > 0); } void QueryOptions::fromVelocyPack(VPackSlice const& slice) { @@ -87,6 +90,9 @@ void QueryOptions::fromVelocyPack(VPackSlice const& slice) { value = slice.get("maxNumberOfPlans"); if (value.isNumber()) { maxNumberOfPlans = value.getNumber(); + if (maxNumberOfPlans == 0) { + maxNumberOfPlans = 1; + } } value = slice.get("maxWarningCount"); if (value.isNumber()) { diff --git a/arangod/RestServer/QueryRegistryFeature.cpp b/arangod/RestServer/QueryRegistryFeature.cpp index 6f7e282be5..c5fa9e4333 100644 --- a/arangod/RestServer/QueryRegistryFeature.cpp +++ b/arangod/RestServer/QueryRegistryFeature.cpp @@ -25,6 +25,7 @@ #include "Aql/Query.h" #include "Aql/QueryCache.h" #include "Aql/QueryRegistry.h" +#include "Logger/Logger.h" #include "ProgramOptions/ProgramOptions.h" #include "ProgramOptions/Section.h" @@ -41,6 +42,7 @@ QueryRegistryFeature::QueryRegistryFeature(ApplicationServer* server) _trackBindVars(true), _failOnWarning(false), _queryMemoryLimit(0), + _maxQueryPlans(128), _slowQueryThreshold(10.0), _queryCacheMode("off"), _queryCacheEntries(128), @@ -82,11 +84,25 @@ void QueryRegistryFeature::collectOptions( options->addOption("--query.cache-entries", "maximum number of results in query result cache per database", new UInt64Parameter(&_queryCacheEntries)); + + options->addOption("--query.optimizer-max-plans", "maximum number of query plans to create for a query", + new UInt64Parameter(&_maxQueryPlans)); options->addHiddenOption("--query.registry-ttl", "Default time-to-live of query snippets (in seconds)", new DoubleParameter(&_queryRegistryTTL)); } +void QueryRegistryFeature::validateOptions( + std::shared_ptr options) { + if (_maxQueryPlans == 0) { + LOG_TOPIC(FATAL, Logger::FIXME) << "invalid value for `--query.optimizer-max-plans`. expecting at least 1"; + FATAL_ERROR_EXIT(); + } + + // cap the value somehow. creating this many plans really does not make sense + _maxQueryPlans = std::min(_maxQueryPlans, decltype(_maxQueryPlans)(1024)); +} + void QueryRegistryFeature::prepare() { // configure the query cache std::pair cacheProperties{_queryCacheMode, diff --git a/arangod/RestServer/QueryRegistryFeature.h b/arangod/RestServer/QueryRegistryFeature.h index 41540f1eba..b88c631a1b 100644 --- a/arangod/RestServer/QueryRegistryFeature.h +++ b/arangod/RestServer/QueryRegistryFeature.h @@ -39,6 +39,7 @@ class QueryRegistryFeature final : public application_features::ApplicationFeatu public: void collectOptions(std::shared_ptr) override final; + void validateOptions(std::shared_ptr) override final; void prepare() override final; void start() override final; void unprepare() override final; @@ -48,12 +49,14 @@ class QueryRegistryFeature final : public application_features::ApplicationFeatu double slowQueryThreshold() const { return _slowQueryThreshold; } bool failOnWarning() const { return _failOnWarning; } uint64_t queryMemoryLimit() const { return _queryMemoryLimit; } + uint64_t maxQueryPlans() const { return _maxQueryPlans; } private: bool _trackSlowQueries; bool _trackBindVars; bool _failOnWarning; uint64_t _queryMemoryLimit; + uint64_t _maxQueryPlans; double _slowQueryThreshold; std::string _queryCacheMode; uint64_t _queryCacheEntries; diff --git a/js/server/tests/aql/aql-optimizer-collect-methods.js b/js/server/tests/aql/aql-optimizer-collect-methods.js index 411f05990f..7edc0c1b1d 100644 --- a/js/server/tests/aql/aql-optimizer-collect-methods.js +++ b/js/server/tests/aql/aql-optimizer-collect-methods.js @@ -474,7 +474,7 @@ function optimizerCollectMethodsTestSuite () { g.push("q" + i); } q += "RETURN INTERSECTION(" + g.join(", ") + ")"; - assertTrue(AQL_EXPLAIN(q, null).stats.plansCreated >= 256); + assertTrue(AQL_EXPLAIN(q, null).stats.plansCreated >= 128); var result = AQL_EXECUTE(q).json; assertEqual([3], result[0]); }, diff --git a/js/server/tests/aql/aql-optimizer-rule-interchange-adjacent-enumerations-noncluster.js b/js/server/tests/aql/aql-optimizer-rule-interchange-adjacent-enumerations-noncluster.js index bebe95166a..4b398843c6 100644 --- a/js/server/tests/aql/aql-optimizer-rule-interchange-adjacent-enumerations-noncluster.js +++ b/js/server/tests/aql/aql-optimizer-rule-interchange-adjacent-enumerations-noncluster.js @@ -229,7 +229,7 @@ function optimizerRuleTestSuite () { "FOR o IN " + collectionName + " RETURN 1"; var explain = AQL_EXPLAIN(query); - assertEqual(192, explain.stats.plansCreated); // default limit enforced by optimizer + assertEqual(128, explain.stats.plansCreated); // default limit enforced by optimizer }, ////////////////////////////////////////////////////////////////////////////////