speed up AQL query plan generation (#6243)

- add startup option `--query.optimizer-max-plans value` - honor "maxNumberOfPlans" in all cases - lower default value for "maxNumberOfPlans" from 192 to 128
2018-08-24 13:11:06 +02:00 · 2018-08-24 13:11:06 +02:00 · 3795f8c773
parent 91d4c7edb1
commit 3795f8c773
12 changed files with 91 additions and 33 deletions
--- a/22
+++ b/22
@ -1,6 +1,28 @@
 v3.3.15 (XXXX-XX-XX)
 --------------------

+* added startup option `--query.optimizer-max-plans value`
+
+  This option allows limiting the number of query execution plans created by the 
+  AQL optimizer for any incoming queries. The default value is `128`.
+
+  By adjusting this value it can be controlled how many different query execution 
+  plans the AQL query optimizer will generate at most for any given AQL query. 
+  Normally the AQL query optimizer will generate a single execution plan per AQL query, 
+  but there are some cases in which it creates multiple competing plans. More plans
+  can lead to better optimized queries, however, plan creation has its costs. The
+  more plans are created and shipped through the optimization pipeline, the more time 
+  will be spent in the optimizer.
+
+  Lowering this option's value will make the optimizer stop creating additional plans 
+  when it has already created enough plans.
+
+  Note that this setting controls the default maximum number of plans to create. The
+  value can still be adjusted on a per-query basis by setting the *maxNumberOfPlans*
+  attribute when running a query.
+
+  This change also lowers the default maximum number of query plans from 192 to 128.
+
 * bug fix: facilitate faster shutdown of coordinators and db servers

 * upgraded arangodb starter version to 0.13.2
--- a/Documentation/Books/Manual/Administration/Configuration/GeneralArangod.md
+++ b/Documentation/Books/Manual/Administration/Configuration/GeneralArangod.md
@ -456,6 +456,26 @@ default timeout value (600 seconds) and that time out. The option has no effect
 in single-server mode.


+### Limiting the number of query execution plans created by the AQL optimizer
+
+`--query.optimizer-max-plans value`
+
+By setting *value* it can be controlled how many different query execution plans
+the AQL query optimizer will generate at most for any given AQL query. Normally
+the AQL query optimizer will generate a single execution plan per AQL query, but
+there are some cases in which it creates multiple competing plans. More plans
+can lead to better optimized queries, however, plan creation has its costs. The
+more plans are created and shipped through the optimization pipeline, the more
+time will be spent in the optimizer.
+Lowering *value* will make the optimizer stop creating additional plans when it
+has already created enough plans.
+Note that this setting controls the default maximum number of plans to create. The
+value can still be adjusted on a per-query basis by setting the *maxNumberOfPlans*
+attribute when running a query.
+
+The default value is *128*.
+
+
 ### Throw collection not loaded error

 `--database.throw-collection-not-loaded-error flag`
--- a/arangod/Aql/ExecutionNode.cpp
+++ b/arangod/Aql/ExecutionNode.cpp
@ -1504,9 +1504,7 @@ struct SubqueryVarUsageFinder final : public WalkerWorker<ExecutionNode> {

  bool before(ExecutionNode* en) override final {
    // Add variables used here to _usedLater:
-    for (auto const& v : en->getVariablesUsedHere()) {
-      _usedLater.emplace(v);
-    }
+    en->getVariablesUsedHere(_usedLater);
    return false;
  }

--- a/arangod/Aql/Optimizer.cpp
+++ b/arangod/Aql/Optimizer.cpp
@ -31,12 +31,12 @@ using namespace arangodb::aql;

 // @brief constructor, this will initialize the rules database
 Optimizer::Optimizer(size_t maxNumberOfPlans)
-    : _maxNumberOfPlans(maxNumberOfPlans > 0 ? maxNumberOfPlans
-                                             : defaultMaxNumberOfPlans),
+    : _maxNumberOfPlans(maxNumberOfPlans),
      _runOnlyRequiredRules(false) {}
  
-size_t Optimizer::hasEnoughPlans(size_t extraPlans) const {
-  return (_newPlans.size() + extraPlans >= _maxNumberOfPlans);
+bool Optimizer::runOnlyRequiredRules(size_t extraPlans) const {
+  return (_runOnlyRequiredRules ||
+          (_newPlans.size() + _plans.size() + extraPlans >= _maxNumberOfPlans));
 }
  
 void Optimizer::disableRule(int rule) {
@ -54,7 +54,6 @@ void Optimizer::addPlan(std::unique_ptr<ExecutionPlan> plan, OptimizerRule const
    // else use user-specified new level
  }

-
  if (wasModified) {
    if (!rule->isHidden) {
      // register which rules modified / created the plan
@ -70,6 +69,11 @@ void Optimizer::addPlan(std::unique_ptr<ExecutionPlan> plan, OptimizerRule const
  // hand over ownership
  _newPlans.push_back(plan.get(), newLevel);
  plan.release();
+  
+  // stop adding new plans in case we already have enough
+  if (_newPlans.size() + _plans.size() >= _maxNumberOfPlans) {
+    _runOnlyRequiredRules = true;
+  }
 }

 // @brief the actual optimization
@ -185,14 +189,6 @@ int Optimizer::createPlans(ExecutionPlan* plan,
        leastDoneLevel = l;
      }
    }
-
-    // Stop if the result gets out of hand:
-    if (!_runOnlyRequiredRules && _plans.size() >= _maxNumberOfPlans) {
-      // must still iterate over all REQUIRED remaining transformation rules
-      // because there are some rules which are required to make the query
-      // work in cluster mode etc
-      _runOnlyRequiredRules = true;
-    }
  }

  _stats.plansCreated = _plans.size();
--- a/arangod/Aql/Optimizer.h
+++ b/arangod/Aql/Optimizer.h
@ -152,7 +152,7 @@ class Optimizer {
  /// @brief constructor, this will initialize the rules database
  /// the .cpp file includes Aql/OptimizerRules.h
  /// and add all methods there to the rules database
-  explicit Optimizer(size_t);
+  explicit Optimizer(size_t maxNumberOfPlans);

  ~Optimizer() {}

@ -167,8 +167,6 @@ class Optimizer {
  /// stealPlans.
  int createPlans(ExecutionPlan* p, std::vector<std::string> const&, bool);

-  size_t hasEnoughPlans(size_t extraPlans) const;
-
  /// @brief add a plan to the optimizer
  void addPlan(std::unique_ptr<ExecutionPlan>, OptimizerRule const*, bool, int newLevel = 0);

@ -201,7 +199,7 @@ class Optimizer {
    return res;
  }
  
-  bool runOnlyRequiredRules() const { return _runOnlyRequiredRules; }
+  bool runOnlyRequiredRules(size_t extraPlans) const;

  /// @brief numberOfPlans, returns the current number of plans in the system
  /// this should be called from rules, it will consider those that the
@ -242,9 +240,6 @@ class Optimizer {
  
  /// @brief run only the required optimizer rules
  bool _runOnlyRequiredRules;
-
-  /// @brief default value for maximal number of plans to produce
-  static constexpr size_t defaultMaxNumberOfPlans = 192;
 };

 }  // namespace aql
--- a/arangod/Aql/OptimizerRule.h
+++ b/arangod/Aql/OptimizerRule.h
@ -54,10 +54,6 @@ struct OptimizerRule {
    // "Pass 1": moving nodes "up" (potentially outside loops):
    // ========================================================

-    // determine the "right" type of CollectNode and
-    // add a sort node for each COLLECT (may be removed later)
-    specializeCollectRule_pass1,
-
    inlineSubqueriesRule_pass1,

    // split and-combined filters into multiple smaller filters
@ -85,6 +81,10 @@ struct OptimizerRule {
    // remove calculations that are never necessary
    removeUnnecessaryCalculationsRule_pass2,
    
+    // determine the "right" type of CollectNode and
+    // add a sort node for each COLLECT (may be removed later)
+    specializeCollectRule_pass1,
+
    // remove redundant sort blocks
    removeRedundantSortsRule_pass2,

--- a/arangod/Aql/OptimizerRules.cpp
+++ b/arangod/Aql/OptimizerRules.cpp
@ -715,7 +715,9 @@ void arangodb::aql::removeCollectVariablesRule(
    if (outVariable != nullptr &&
        varsUsedLater.find(outVariable) == varsUsedLater.end()) {
      // outVariable not used later
+      if (!collectNode->count()) {
        collectNode->clearOutVariable();
+      }
      modified = true;
    }

@ -1140,7 +1142,7 @@ void arangodb::aql::specializeCollectRule(Optimizer* opt,
         (!collectNode->hasOutVariable() || collectNode->count()) &&
         collectNode->getOptions().canUseMethod(CollectOptions::CollectMethod::HASH));

-    if (canUseHashAggregation && !opt->runOnlyRequiredRules()) {
+    if (canUseHashAggregation && !opt->runOnlyRequiredRules(1)) {
      if (collectNode->getOptions().shouldUseMethod(CollectOptions::CollectMethod::HASH)) {
        // user has explicitly asked for hash method
        // specialize existing the CollectNode so it will become a HashedCollectBlock
@ -2506,7 +2508,7 @@ void arangodb::aql::interchangeAdjacentEnumerationsRule(
    do {
      // check if we already have enough plans (plus the one plan that we will
      // add at the end of this function)
-      if (opt->hasEnoughPlans(1)) {
+      if (opt->runOnlyRequiredRules(1)) {
        // have enough plans. stop permutations
        break;
      }
--- a/arangod/Aql/QueryOptions.cpp
+++ b/arangod/Aql/QueryOptions.cpp
@ -67,6 +67,9 @@ QueryOptions::QueryOptions() :
  // "cache" only defaults to true if query cache is turned on
  auto queryCacheMode = QueryCache::instance()->mode();
  cache = (queryCacheMode == CACHE_ALWAYS_ON);
+  
+  maxNumberOfPlans = q->maxQueryPlans();
+  TRI_ASSERT(maxNumberOfPlans > 0);
 }
  
 void QueryOptions::fromVelocyPack(VPackSlice const& slice) {
@ -87,6 +90,9 @@ void QueryOptions::fromVelocyPack(VPackSlice const& slice) {
  value = slice.get("maxNumberOfPlans"); 
  if (value.isNumber()) {
    maxNumberOfPlans = value.getNumber<size_t>();
+    if (maxNumberOfPlans == 0) {
+      maxNumberOfPlans = 1;
+    }
  }
  value = slice.get("maxWarningCount"); 
  if (value.isNumber()) {
--- a/arangod/RestServer/QueryRegistryFeature.cpp
+++ b/arangod/RestServer/QueryRegistryFeature.cpp
@ -25,6 +25,7 @@
 #include "Aql/Query.h"
 #include "Aql/QueryCache.h"
 #include "Aql/QueryRegistry.h"
+#include "Logger/Logger.h"
 #include "ProgramOptions/ProgramOptions.h"
 #include "ProgramOptions/Section.h"

@ -41,6 +42,7 @@ QueryRegistryFeature::QueryRegistryFeature(ApplicationServer* server)
      _trackBindVars(true),
      _failOnWarning(false),
      _queryMemoryLimit(0),
+      _maxQueryPlans(128),
      _slowQueryThreshold(10.0),
      _queryCacheMode("off"),
      _queryCacheEntries(128),
@ -83,10 +85,24 @@ void QueryRegistryFeature::collectOptions(
                     "maximum number of results in query result cache per database",
                     new UInt64Parameter(&_queryCacheEntries));

+  options->addOption("--query.optimizer-max-plans", "maximum number of query plans to create for a query",
+                     new UInt64Parameter(&_maxQueryPlans));
+  
  options->addHiddenOption("--query.registry-ttl", "Default time-to-live of query snippets (in seconds)",
                           new DoubleParameter(&_queryRegistryTTL));
 }

+void QueryRegistryFeature::validateOptions(
+    std::shared_ptr<ProgramOptions> options) {
+  if (_maxQueryPlans == 0) {
+    LOG_TOPIC(FATAL, Logger::FIXME) << "invalid value for `--query.optimizer-max-plans`. expecting at least 1";
+    FATAL_ERROR_EXIT();
+  }
+
+  // cap the value somehow. creating this many plans really does not make sense
+  _maxQueryPlans = std::min(_maxQueryPlans, decltype(_maxQueryPlans)(1024));
+}
+
 void QueryRegistryFeature::prepare() {
  // configure the query cache
  std::pair<std::string, size_t> cacheProperties{_queryCacheMode,
--- a/arangod/RestServer/QueryRegistryFeature.h
+++ b/arangod/RestServer/QueryRegistryFeature.h
@ -39,6 +39,7 @@ class QueryRegistryFeature final : public application_features::ApplicationFeatu

 public:
  void collectOptions(std::shared_ptr<options::ProgramOptions>) override final;
+  void validateOptions(std::shared_ptr<options::ProgramOptions>) override final;
  void prepare() override final;
  void start() override final;
  void unprepare() override final;
@ -48,12 +49,14 @@ class QueryRegistryFeature final : public application_features::ApplicationFeatu
  double slowQueryThreshold() const { return _slowQueryThreshold; }
  bool failOnWarning() const { return _failOnWarning; }
  uint64_t queryMemoryLimit() const { return _queryMemoryLimit; }
+  uint64_t maxQueryPlans() const { return _maxQueryPlans; }

 private:
  bool _trackSlowQueries;
  bool _trackBindVars;
  bool _failOnWarning;
  uint64_t _queryMemoryLimit;
+  uint64_t _maxQueryPlans;
  double _slowQueryThreshold;
  std::string _queryCacheMode;
  uint64_t _queryCacheEntries;
--- a/js/server/tests/aql/aql-optimizer-collect-methods.js
+++ b/js/server/tests/aql/aql-optimizer-collect-methods.js
@ -474,7 +474,7 @@ function optimizerCollectMethodsTestSuite () {
        g.push("q" + i);
      }
      q += "RETURN INTERSECTION(" + g.join(", ") + ")";
-      assertTrue(AQL_EXPLAIN(q, null).stats.plansCreated >= 256);
+      assertTrue(AQL_EXPLAIN(q, null).stats.plansCreated >= 128);
      var result = AQL_EXECUTE(q).json;
      assertEqual([3], result[0]);
    },
--- a/js/server/tests/aql/aql-optimizer-rule-interchange-adjacent-enumerations-noncluster.js
+++ b/js/server/tests/aql/aql-optimizer-rule-interchange-adjacent-enumerations-noncluster.js
@ -229,7 +229,7 @@ function optimizerRuleTestSuite () {
                  "FOR o IN " + collectionName + " RETURN 1";

      var explain = AQL_EXPLAIN(query);
-      assertEqual(192, explain.stats.plansCreated); // default limit enforced by optimizer
+      assertEqual(128, explain.stats.plansCreated); // default limit enforced by optimizer
    },

 ////////////////////////////////////////////////////////////////////////////////