arangodb/arangod/Aql/OptimizerRule.h

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2017 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
///     http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Steemann
////////////////////////////////////////////////////////////////////////////////

#ifndef ARANGOD_AQL_AQL_OPTIMIZER_RULE_H
#define ARANGOD_AQL_AQL_OPTIMIZER_RULE_H 1

#include "Basics/Common.h"

namespace arangodb {
namespace aql {
class ExecutionPlan;
class Optimizer;
struct OptimizerRule;

/// @brief type of an optimizer rule function, the function gets an
/// optimizer, an ExecutionPlan, and the current rule. it has
/// to append one or more plans to the resulting deque. This must
/// include the original plan if it ought to be kept. The rule has to
/// set the level of the appended plan to the largest level of rule
/// that ought to be considered as done to indicate which rule is to be
/// applied next.
typedef std::function<void(Optimizer*, std::unique_ptr<ExecutionPlan>, OptimizerRule const*)> RuleFunction;

/// @brief type of an optimizer rule
struct OptimizerRule {
  /// @brief optimizer rules
  enum RuleLevel : int {
    // List all the rules in the system here:
    // lower level values mean earlier rule execution

    // note that levels must be unique
    initial = 100,

    // "Pass 1": moving nodes "up" (potentially outside loops):
    // ========================================================
    replaceNearWithinFulltext,

    inlineSubqueriesRule,

    // split and-combined filters into multiple smaller filters
    splitFiltersRule,

    /// simplify some conditions in CalculationNodes
    simplifyConditionsRule,

    // move calculations up the dependency chain (to pull them out of
    // inner loops etc.)
    moveCalculationsUpRule,

    // move filters up the dependency chain (to make result sets as small
    // as possible as early as possible)
    moveFiltersUpRule,

    // remove calculations that are repeatedly used in a query
    removeRedundantCalculationsRule,

    // "Pass 2": try to remove redundant or unnecessary nodes
    // ======================================================

    // remove filters from the query that are not necessary at all
    // filters that are always true will be removed entirely
    // filters that are always false will be replaced with a NoResults node
    removeUnnecessaryFiltersRule,

    // remove calculations that are never necessary
    removeUnnecessaryCalculationsRule,

    // determine the "right" type of CollectNode and
    // add a sort node for each COLLECT (may be removed later)
    specializeCollectRule,

    // remove redundant sort blocks
    removeRedundantSortsRule,

    // push limits into subqueries and simplify them
    optimizeSubqueriesRule,

    // "Pass 3": interchange EnumerateCollection nodes in all possible ways
    //           this is level 500, please never let new plans from higher
    //           levels go back to this or lower levels!
    // ======================================================

    interchangeAdjacentEnumerationsRule,

    // "Pass 4": moving nodes "up" (potentially outside loops) (second try):
    // ======================================================

    // move calculations up the dependency chain (to pull them out of
    // inner loops etc.)
    moveCalculationsUpRule2,

    // move filters up the dependency chain (to make result sets as small
    // as possible as early as possible)
    moveFiltersUpRule2,

    /// "Pass 5": try to remove redundant or unnecessary nodes (second try)
    // remove filters from the query that are not necessary at all
    // filters that are always true will be removed entirely
    // filters that are always false will be replaced with a NoResults node
    // ======================================================

    // remove redundant sort blocks
    removeRedundantSortsRule2,

    // remove SORT RAND() if appropriate
    removeSortRandRule,

    // remove INTO for COLLECT if appropriate
    removeCollectVariablesRule,

    // propagate constant attributes in FILTERs
    propagateConstantAttributesRule,

    // remove unused out variables for data-modification queries
    removeDataModificationOutVariablesRule,

    /// "Pass 6": use indexes if possible for FILTER and/or SORT nodes
    // ======================================================

    // replace simple OR conditions with IN
    replaceOrWithInRule,

    // remove redundant OR conditions
    removeRedundantOrRule,

    // remove FILTER and SORT if there are geoindexes
    applyGeoIndexRule,

    // replace FULLTEXT with index
    applyFulltextIndexRule,

    useIndexesRule,

    // try to remove filters covered by index ranges
    removeFiltersCoveredByIndexRule,

    removeUnnecessaryFiltersRule2,

    // try to find sort blocks which are superseeded by indexes
    useIndexForSortRule,

    // sort values used in IN comparisons of remaining filters
    sortInValuesRule,

    // merge filters into graph traversals
    optimizeTraversalsRule,
    // remove redundant filters statements
    removeFiltersCoveredByTraversal,

    // move filters and sort conditions into views and remove them
    handleArangoSearchViewsRule,

    // remove calculations that are redundant
    // needs to run after filter removal
    removeUnnecessaryCalculationsRule2,

    // remove now obsolete path variables
    removeTraversalPathVariable,
    prepareTraversalsRule,

    // when we have single document operations, fill in special cluster
    // handling.
    substituteSingleDocumentOperations,

    // make sort node aware of subsequent limit statements for internal optimizations
    applySortLimitRule,

    /// Pass 9: push down calculations beyond FILTERs and LIMITs
    moveCalculationsDownRule,

    /// Pass 9: fuse filter conditions
    fuseFiltersRule,

    /// Pass 9: patch update statements
    patchUpdateStatementsRule,

    /// "Pass 10": final transformations for the cluster

    // optimize queries in the cluster so that the entire query
    // gets pushed to a single server
    optimizeClusterSingleShardRule,

    // make operations on sharded collections use distribute
    distributeInClusterRule,

#ifdef USE_ENTERPRISE
    smartJoinsRule,
#endif

    // make operations on sharded collections use scatter / gather / remote
    scatterInClusterRule,

    // FIXME order-???
    // make operations on sharded IResearch views use scatter / gather / remote
    scatterIResearchViewInClusterRule,

    // move FilterNodes & Calculation nodes in between
    // scatter(remote) <-> gather(remote) so they're
    // distributed to the cluster nodes.
    distributeFilternCalcToClusterRule,

    // move SortNodes into the distribution.
    // adjust gathernode to also contain the sort criteria.
    distributeSortToClusterRule,

    // try to get rid of a RemoteNode->ScatterNode combination which has
    // only a SingletonNode and possibly some CalculationNodes as dependencies
    removeUnnecessaryRemoteScatterRule,

#ifdef USE_ENTERPRISE
    // remove any superflous satellite collection joins...
    // put it after Scatter rule because we would do
    // the work twice otherwise
    removeSatelliteJoinsRule,
#endif

    // recognize that a RemoveNode can be moved to the shards
    undistributeRemoveAfterEnumCollRule,

    // push collect operations to the db servers
    collectInClusterRule,

    // try to restrict fragments to a single shard if possible
    restrictToSingleShardRule,

    // simplify an EnumerationCollectionNode that fetches an
    // entire document to a projection of this document
    reduceExtractionToProjectionRule,
  };

  std::string name;
  RuleFunction func;
  RuleLevel const level;
  bool const canCreateAdditionalPlans;
  bool const canBeDisabled;
  bool const isHidden;

  OptimizerRule() = delete;

  OptimizerRule(std::string const& name, RuleFunction const& func, RuleLevel level,
                bool canCreateAdditionalPlans, bool canBeDisabled, bool isHidden)
      : name(name),
        func(func),
        level(level),
        canCreateAdditionalPlans(canCreateAdditionalPlans),
        canBeDisabled(canBeDisabled),
        isHidden(isHidden) {}
};

}  // namespace aql
}  // namespace arangodb

#endif