diff --git a/CHANGELOG b/CHANGELOG index e7f364382c..7abacc47cd 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ v2.7.0 (XXXX-XX-XX) ------------------- +* added AQL optimizer rule `patch-update-statements` + * Linux startup scripts and systemd configuration for arangod now try to adjust the NOFILE (number of open files) limits for the process. The limit value is set to 131072 (128k) when ArangoDB is started via start/stop diff --git a/Documentation/Books/Users/Aql/Optimizer.mdpp b/Documentation/Books/Users/Aql/Optimizer.mdpp index 027eedc546..24c96baa01 100644 --- a/Documentation/Books/Users/Aql/Optimizer.mdpp +++ b/Documentation/Books/Users/Aql/Optimizer.mdpp @@ -360,6 +360,10 @@ The following optimizer rules may appear in the `rules` attribute of a plan: The intention of this rule is to move calculations down in the processing pipeline as far as possible (below *FILTER*, *LIMIT* and *SUBQUERY* nodes) so they are executed as late as possible and not before their results are required. +* `patch-update-statements`: will appear if an *UpdateNode* was patched to not buffer + its input completely, but to process it in smaller batches. The rule will fire for an + *UPDATE* query that is fed by a full collection scan, and that does not use any other + indexes and subqueries. The following optimizer rules may appear in the `rules` attribute of cluster plans: diff --git a/arangod/Aql/Optimizer.cpp b/arangod/Aql/Optimizer.cpp index bc85dfbb04..d94d14d4af 100644 --- a/arangod/Aql/Optimizer.cpp +++ b/arangod/Aql/Optimizer.cpp @@ -594,7 +594,7 @@ void Optimizer::setupRules () { moveCalculationsDownRule, moveCalculationsDownRule_pass9, true); - + // fuse calculations #if 0 registerRule("fuse-calculations", @@ -602,6 +602,12 @@ void Optimizer::setupRules () { fuseCalculationsRule_pass9, true); #endif + + // patch update statements + registerRule("patch-update-statements", + patchUpdateStatementsRule, + patchUpdateStatementsRule_pass9, + true); if (triagens::arango::ServerState::instance()->isCoordinator()) { // distribute operations in cluster diff --git a/arangod/Aql/Optimizer.h b/arangod/Aql/Optimizer.h index fc2e2a5d82..be4d167bc4 100644 --- a/arangod/Aql/Optimizer.h +++ b/arangod/Aql/Optimizer.h @@ -204,6 +204,12 @@ namespace triagens { fuseCalculationsRule_pass9 = 901, +////////////////////////////////////////////////////////////////////////////// +/// Pass 9: patch update statements +////////////////////////////////////////////////////////////////////////////// + + patchUpdateStatementsRule_pass9 = 902, + ////////////////////////////////////////////////////////////////////////////// /// "Pass 10": final transformations for the cluster ////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 079a4073f0..3fffd57e0b 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -4766,6 +4766,73 @@ int triagens::aql::removeDataModificationOutVariablesRule (Optimizer* opt, return TRI_ERROR_NO_ERROR; } +//////////////////////////////////////////////////////////////////////////////// +/// @brief patch UPDATE statement on single collection that iterates over the +/// entire collection to operate in batches +//////////////////////////////////////////////////////////////////////////////// + +int triagens::aql::patchUpdateStatementsRule (Optimizer* opt, + ExecutionPlan* plan, + Optimizer::Rule const* rule) { + bool modified = false; + + // not need to dive into subqueries here, as UPDATE needs to be on the top level + std::vector&& nodes = plan->findNodesOfType(EN::UPDATE, false); + + for (auto const& n : nodes) { + // we should only get through here a single time + auto node = static_cast(n); + TRI_ASSERT(node != nullptr); + + auto& options = node->getOptions(); + if (! options.readCompleteInput) { + // already ok + continue; + } + + auto const collection = node->collection(); + + auto dep = n->getFirstDependency(); + + while (dep != nullptr) { + auto const type = dep->getType(); + + if (type == EN::ENUMERATE_LIST || + type == EN::INDEX_RANGE || + type == EN::SUBQUERY) { + // not suitable + modified = false; + break; + } + + if (type == EN::ENUMERATE_COLLECTION) { + auto collectionNode = static_cast(dep); + + if (collectionNode->collection() != collection) { + // different collection, not suitable + modified = false; + break; + } + else { + modified = true; + } + } + + dep = dep->getFirstDependency(); + } + + if (modified) { + options.readCompleteInput = false; + } + } + + // always re-add the original plan, be it modified or not + // only a flag in the plan will be modified + opt->addPlan(plan, rule, modified); + + return TRI_ERROR_NO_ERROR; +} + // Local Variables: // mode: outline-minor // outline-regexp: "^\\(/// @brief\\|/// {@inheritDoc}\\|/// @addtogroup\\|// --SECTION--\\|/// @\\}\\)" diff --git a/arangod/Aql/OptimizerRules.h b/arangod/Aql/OptimizerRules.h index 524002f2e2..e43aa38430 100644 --- a/arangod/Aql/OptimizerRules.h +++ b/arangod/Aql/OptimizerRules.h @@ -244,6 +244,13 @@ namespace triagens { //////////////////////////////////////////////////////////////////////////////// int removeDataModificationOutVariablesRule (Optimizer*, ExecutionPlan*, Optimizer::Rule const*); + +//////////////////////////////////////////////////////////////////////////////// +/// @brief patch UPDATE statement on single collection that iterates over the +/// entire collection to operate in batches +//////////////////////////////////////////////////////////////////////////////// + + int patchUpdateStatementsRule (Optimizer*, ExecutionPlan*, Optimizer::Rule const*); } // namespace aql } // namespace triagens