make index selection more deterministic (#9736)

* make index selection more deterministic * updated CHANGELOG * serialize indexes used by traversal with their estimates * serialize selectivity estimates for shortest path nodes too * fix assertion that doesn't hold true in unit tests * fix test
2019-08-20 11:55:21 +02:00 · 2019-08-20 11:55:21 +02:00 · 3dcc293224
parent 0f03655ce3
commit 3dcc293224
10 changed files with 391 additions and 124 deletions
--- a/3
+++ b/3
@ -1,6 +1,9 @@
 v3.5.1 (XXXX-XX-XX)
 -------------------
 * Made index selection much more deterministic in case there are
  multiple competing indexes.
 * Fixed issue #9654: honor value of `--rocksdb.max-write-buffer-number` if it
  is set to at least 9 (which is the recommended value). Ignore it if it is
  set to a lower value than 9, and warn the end user about it.
--- a/arangod/Graph/ShortestPathOptions.cpp
+++ b/arangod/Graph/ShortestPathOptions.cpp
@ -150,7 +150,7 @@ void ShortestPathOptions::toVelocyPackIndexes(VPackBuilder& builder) const {
  builder.add("base", VPackValue(VPackValueType::Array));
  for (auto const& it : _baseLookupInfos) {
    for (auto const& it2 : it.idxHandles) {
-      it2.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics));
+      it2.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics, Index::Serialize::Estimates));
    }
  }
  builder.close();
--- a/arangod/Graph/TraverserOptions.cpp
+++ b/arangod/Graph/TraverserOptions.cpp
@ -304,7 +304,7 @@ void TraverserOptions::toVelocyPackIndexes(VPackBuilder& builder) const {
  builder.add("base", VPackValue(VPackValueType::Array));
  for (auto const& it : _baseLookupInfos) {
    for (auto const& it2 : it.idxHandles) {
-      it2.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics));
+      it2.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics, Index::Serialize::Estimates));
    }
  }
  builder.close();
@ -316,7 +316,7 @@ void TraverserOptions::toVelocyPackIndexes(VPackBuilder& builder) const {
    builder.add(VPackValue(VPackValueType::Array));
    for (auto const& it2 : it.second) {
      for (auto const& it3 : it2.idxHandles) {
-        it3.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics));
+        it3.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics, Index::Serialize::Estimates));
      }
    }
    builder.close();
--- a/arangod/Indexes/SimpleAttributeEqualityMatcher.cpp
+++ b/arangod/Indexes/SimpleAttributeEqualityMatcher.cpp
@ -26,8 +26,12 @@
 #include "Aql/AstNode.h"
 #include "Aql/Variable.h"
 #include "Indexes/Index.h"
 #include "StorageEngine/EngineSelectorFeature.h"
 #include "StorageEngine/StorageEngine.h"
 #include "VocBase/vocbase.h"
 #include <cmath>
 #include <velocypack/StringRef.h>
 using namespace arangodb;
@ -79,7 +83,7 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchOne(arangodb::Index cons
    if (which != nullptr) {
      // we can use the index for the condition
-      costs = calculateIndexCosts(index, which, itemsInIndex * values, 1);
+      costs = calculateIndexCosts(index, which, itemsInIndex, values, 1);
    } else {
      // we cannot use the index for the condition
      ++postFilterConditions;
@ -155,7 +159,7 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchAll(arangodb::Index cons
    values = 1;
  }
-  Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex * values);
+  Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex);
  if (_found.size() == _attributes.size()) {
    // can only use this index if all index attributes are covered by the
@ -168,7 +172,7 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchAll(arangodb::Index cons
      which = nullptr;
    }
-    costs = calculateIndexCosts(index, which, itemsInIndex * values, _found.size());
+    costs = calculateIndexCosts(index, which, itemsInIndex, values, _found.size());
  }
  // honor the costs of post-index filter conditions
@ -315,34 +319,34 @@ arangodb::aql::AstNode* SimpleAttributeEqualityMatcher::specializeAll(
 /// cost values have no special meaning, except that multiple cost values are
 /// comparable, and lower values mean lower costs
 Index::FilterCosts SimpleAttributeEqualityMatcher::calculateIndexCosts(
-    arangodb::Index const* index, arangodb::aql::AstNode const* attribute,
+    arangodb::Index const* idx, arangodb::aql::AstNode const* attribute,
-    size_t itemsInIndex, size_t coveredAttributes) const {
+    size_t itemsInIndex, size_t values, size_t coveredAttributes) const {
  // note: attribute will be set to the index attribute for single-attribute
  // indexes such as the primary and edge indexes, and is a nullptr for the
  // other indexes
-  Index::FilterCosts costs;
+  Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex);
  costs.supportsCondition = true;
  costs.coveredAttributes = coveredAttributes;
-  if (index->unique() || index->implicitlyUnique()) {
+  if (itemsInIndex > 0) {
-    // index is unique, and the condition covers all attributes
+    costs.estimatedItems = static_cast<size_t>(itemsInIndex * values);
-    // now use a low value for the costs
+
-    costs.estimatedItems = 1;
+    // the index mocks do not have a selectivity estimate...
-    costs.estimatedCosts = 0.95 - 0.05 * (index->fields().size() - 1);
+    if (idx->hasSelectivityEstimate()) {
-  } else if (index->hasSelectivityEstimate()) {
+      // use index selectivity estimate
-    // use index selectivity estimate
+      arangodb::velocypack::StringRef att;
-    arangodb::velocypack::StringRef att;
+      if (attribute != nullptr && attribute->type == aql::NODE_TYPE_ATTRIBUTE_ACCESS) {
-    if (attribute != nullptr && attribute->type == aql::NODE_TYPE_ATTRIBUTE_ACCESS) {
+        att = arangodb::velocypack::StringRef(attribute->getStringValue(), attribute->getStringLength());
-      att = arangodb::velocypack::StringRef(attribute->getStringValue(), attribute->getStringLength());
+      }
-    }
+      double estimate = idx->selectivityEstimate(att);
-    double estimate = index->selectivityEstimate(att);
+      if (estimate > 0.0) {
-    if (estimate <= 0.0) {
+        costs.estimatedItems = static_cast<size_t>(1.0 / estimate * values);
-      // prevent division by zero
+      }
-      costs.estimatedItems = itemsInIndex;
+    } else {
-      // the more attributes are contained in the index, the more specific the
+      // no selectivity estimate present. this should only happen for mock indexes.
-      // lookup will be
+      // anyway, use a hard-coded formula for determining the number of results
      double equalityReductionFactor = 20.0;
-      for (size_t i = 0; i < index->fields().size(); ++i) {
+      for (size_t i = 0; i < coveredAttributes; ++i) {
        costs.estimatedItems /= static_cast<size_t>(equalityReductionFactor);
        // decrease the effect of the equality reduction factor
        equalityReductionFactor *= 0.25;
@ -351,17 +355,34 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::calculateIndexCosts(
          equalityReductionFactor = 2.0;
        }
      }
    } else {
      costs.estimatedItems = static_cast<size_t>(1.0 / estimate);
    }
-    costs.estimatedItems = (std::max)(costs.estimatedItems, static_cast<size_t>(1));
+    // costs.estimatedItems is always set here, make it at least 1
-    // the more attributes are covered by an index, the more accurate it
+    costs.estimatedItems = std::max(size_t(1), costs.estimatedItems);
-    // is considered to be
+
-    costs.estimatedCosts = static_cast<double>(costs.estimatedItems) - index->fields().size() * 0.01;
+    // seek cost is O(log(n)) for RocksDB, and O(1) for mmfiles
-  } else {
+    // TODO: move this into storage engine!
-    // no such index should exist
+    if (EngineSelectorFeature::ENGINE->typeName() == "mmfiles") {
-    TRI_ASSERT(false);
+      costs.estimatedCosts = std::max(double(1.0), double(values));
    } else {
      costs.estimatedCosts = std::max(double(1.0),
                                    std::log2(double(itemsInIndex)) * values);
      if (idx->unique()) {
        costs.estimatedCosts = std::max(double(1.0), double(itemsInIndex) * values);
      }
    }
    // add per-document processing cost
    costs.estimatedCosts += costs.estimatedItems * 0.05;
    // slightly prefer indexes that cover more attributes
    costs.estimatedCosts -= (idx->fields().size() - 1) * 0.02;
    // cost is already low... now slightly prioritize unique indexes
    if (idx->unique() || idx->implicitlyUnique()) {
      costs.estimatedCosts *= 0.995 - 0.05 * (idx->fields().size() - 1);
     }
    // box the estimated costs to [0 - inf
    costs.estimatedCosts = std::max(double(0.0), costs.estimatedCosts);
  }
  return costs;
--- a/arangod/Indexes/SimpleAttributeEqualityMatcher.h
+++ b/arangod/Indexes/SimpleAttributeEqualityMatcher.h
@ -86,7 +86,8 @@ class SimpleAttributeEqualityMatcher {
  /// comparable, and lower values mean lower costs
  Index::FilterCosts calculateIndexCosts(arangodb::Index const* index,
                                         arangodb::aql::AstNode const* attribute, 
-                                         size_t itemsInIndex, size_t coveredAttributes) const;
+                                         size_t itemsInIndex, size_t values,
                                         size_t coveredAttributes) const;
  /// @brief whether or not the access fits
  bool accessFitsIndex(arangodb::Index const*, arangodb::aql::AstNode const*,
--- a/arangod/Indexes/SortedIndexAttributeMatcher.cpp
+++ b/arangod/Indexes/SortedIndexAttributeMatcher.cpp
@ -42,7 +42,7 @@ bool SortedIndexAttributeMatcher::accessFitsIndex(
    arangodb::aql::AstNode const* op,  // binary operation that is parent of access and other
    arangodb::aql::Variable const* reference,  // variable used in access(es)
    std::unordered_map<size_t /*offset in idx->fields()*/, std::vector<arangodb::aql::AstNode const*> /*conjunct - operation*/>& found,  // marks operations covered by index-fields
-    std::unordered_set<std::string>& nonNullAttributes,  // set of stringified op-childeren (access other) that may not be null
+    std::unordered_set<std::string>& nonNullAttributes,  // set of stringified op-children (access other) that may not be null
    bool isExecution  // skip usage check in execution phase
 ) {
  if (!idx->canUseConditionPart(access, other, op, reference, nonNullAttributes, isExecution)) {
@ -218,7 +218,7 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition(
  size_t attributesCovered = 0;
  size_t attributesCoveredByEquality = 0;
  double equalityReductionFactor = 20.0;
-  double estimatedCosts = static_cast<double>(itemsInIndex);
+  double estimatedItems = static_cast<double>(itemsInIndex);
  for (size_t i = 0; i < idx->fields().size(); ++i) {
    auto it = found.find(i);
@ -244,7 +244,7 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition(
    if (containsEquality) {
      ++attributesCoveredByEquality;
-      estimatedCosts /= equalityReductionFactor;
+      estimatedItems /= equalityReductionFactor;
      // decrease the effect of the equality reduction factor
      equalityReductionFactor *= 0.25;
@ -257,10 +257,10 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition(
      if (nodes.size() >= 2) {
        // at least two (non-equality) conditions. probably a range with lower
        // and upper bound defined
-        estimatedCosts /= 7.5;
+        estimatedItems /= 7.5;
      } else {
        // one (non-equality). this is either a lower or a higher bound
-        estimatedCosts /= 2.0;
+        estimatedItems /= 2.0;
      }
    }
@ -274,91 +274,87 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition(
  Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex);
  costs.coveredAttributes = attributesCovered;
-  if (attributesCoveredByEquality == idx->fields().size() &&
+  if (attributesCovered > 0 &&
-      (idx->unique() || idx->implicitlyUnique())) {
+      (!idx->sparse() || attributesCovered == idx->fields().size())) {
    // index is unique and condition covers all attributes by equality
    costs.supportsCondition = true;
    if (itemsInIndex == 0) {
      costs.estimatedItems = 0;
      costs.estimatedCosts = 0.0;
    } else {
      costs.estimatedItems = values;
      costs.estimatedCosts = (std::max)(static_cast<double>(1),
                                        std::log2(static_cast<double>(itemsInIndex)) * values);
    }
    // cost is already low... now slightly prioritize unique indexes
    costs.estimatedCosts *= 0.995 - 0.05 * (idx->fields().size() - 1);
  } else if (attributesCovered > 0 &&
             (!idx->sparse() || attributesCovered == idx->fields().size())) {
    // if the condition contains at least one index attribute and is not sparse,
    // or the index is sparse and all attributes are covered by the condition,
    // then it can be used (note: additional checks for condition parts in
    // sparse indexes are contained in Index::canUseConditionPart)
    costs.supportsCondition = true;
    costs.estimatedItems = static_cast<size_t>(
        (std::max)(static_cast<size_t>(estimatedCosts * values), static_cast<size_t>(1)));
-    // check if the index has a selectivity estimate ready
+    if (itemsInIndex > 0) {
-    if (idx->hasSelectivityEstimate() &&
+      costs.estimatedItems = static_cast<size_t>(estimatedItems * values);
-        attributesCoveredByEquality == idx->fields().size()) {
+
-      double estimate = idx->selectivityEstimate();
+      // check if the index has a selectivity estimate ready
-      if (estimate > 0.0) {
+      if (idx->hasSelectivityEstimate() &&
-        costs.estimatedItems = static_cast<size_t>(1.0 / estimate);
+          attributesCoveredByEquality == idx->fields().size()) {
-      }
+        double estimate = idx->selectivityEstimate();
-    } else if (attributesCoveredByEquality > 0) {
+        if (estimate > 0.0) {
-      TRI_ASSERT(attributesCovered > 0);
+          costs.estimatedItems = static_cast<size_t>(1.0 / estimate * values);
      // the index either does not have a selectivity estimate, or not all
      // of its attributes are covered by the condition using an equality lookup
      // however, if the search condition uses equality lookups on the prefix
      // of the index, then we can check if there is another index which is just
      // indexing the prefix, and "steal" the selectivity estimate from that
      // index for example, if the condition is "doc.a == 1 && doc.b > 2", and
      // the current index is created on ["a", "b"], then we will not use the
      // selectivity estimate of the current index (due to the range condition
      // used for the second index attribute). however, if there is another
      // index on just "a", we know that the current index is at least as
      // selective as the index on the single attribute. and that the extra
      // condition we have will make it even more selectivity. so in this case
      // we will re-use the selectivity estimate from the other index, and are
      // happy.
      for (auto const& otherIdx : allIndexes) {
        auto const* other = otherIdx.get();
        if (other == idx || !other->hasSelectivityEstimate()) {
          continue;
        }
-        auto const& otherFields = other->fields();
+      } else if (attributesCoveredByEquality > 0) {
-        if (otherFields.size() >= attributesCovered) {
+        TRI_ASSERT(attributesCovered > 0);
-          // other index has more fields than we have, or the same amount.
+        // the index either does not have a selectivity estimate, or not all
-          // then it will not be helpful
+        // of its attributes are covered by the condition using an equality lookup
-          continue;
+        // however, if the search condition uses equality lookups on the prefix
-        }
+        // of the index, then we can check if there is another index which is just
-        size_t matches = 0;
+        // indexing the prefix, and "steal" the selectivity estimate from that
-        for (size_t i = 0; i < otherFields.size(); ++i) {
+        // index for example, if the condition is "doc.a == 1 && doc.b > 2", and
-          if (otherFields[i] != idx->fields()[i]) {
+        // the current index is created on ["a", "b"], then we will not use the
-            break;
+        // selectivity estimate of the current index (due to the range condition
        // used for the second index attribute). however, if there is another
        // index on just "a", we know that the current index is at least as
        // selective as the index on the single attribute. and that the extra
        // condition we have will make it even more selectivity. so in this case
        // we will re-use the selectivity estimate from the other index, and are
        // happy.
        for (auto const& otherIdx : allIndexes) {
          auto const* other = otherIdx.get();
          if (other == idx || !other->hasSelectivityEstimate()) {
            continue;
          }
-          ++matches;
+          auto const& otherFields = other->fields();
-        }
+          if (otherFields.size() >= attributesCovered) {
-        if (matches == otherFields.size()) {
+            // other index has more fields than we have, or the same amount.
-          double estimate = other->selectivityEstimate();
+            // then it will not be helpful
-          if (estimate > 0.0) {
+            continue;
-            // reuse the estimate from the other index
+          }
-            costs.estimatedItems = static_cast<size_t>(1.0 / estimate);
+          size_t matches = 0;
-            break;
+          for (size_t i = 0; i < otherFields.size(); ++i) {
            if (otherFields[i] != idx->fields()[i]) {
              break;
            }
            ++matches;
          }
          if (matches == otherFields.size()) {
            double estimate = other->selectivityEstimate();
            if (estimate > 0.0) {
              // reuse the estimate from the other index
              costs.estimatedItems = static_cast<size_t>(1.0 / estimate * values);
              break;
            }
          }
        }
      }
    }
-    if (itemsInIndex == 0) {
+      // costs.estimatedItems is always set here, make it at least 1
-      costs.estimatedCosts = 0.0;
+      costs.estimatedItems = std::max(size_t(1), costs.estimatedItems);
-    } else {
+      
-      // lookup cost is O(log(n))
+      // seek cost is O(log(n))
-      costs.estimatedCosts = (std::max)(static_cast<double>(1),
+      costs.estimatedCosts = std::max(double(1.0),
-                                 std::log2(static_cast<double>(itemsInIndex)) * values);
+                                      std::log2(double(itemsInIndex)) * values);
      // add per-document processing cost
      costs.estimatedCosts += costs.estimatedItems * 0.05;
      // slightly prefer indexes that cover more attributes
      costs.estimatedCosts -= (attributesCovered - 1) * 0.02;
      // cost is already low... now slightly prioritize unique indexes
      if (idx->unique() || idx->implicitlyUnique()) {
        costs.estimatedCosts *= 0.995 - 0.05 * (idx->fields().size() - 1);
      }
      // box the estimated costs to [0 - inf
      costs.estimatedCosts = std::max(double(0.0), costs.estimatedCosts);
    }
  } else {
    // index does not help for this condition
@ -397,7 +393,7 @@ Index::SortCosts SortedIndexAttributeMatcher::supportsSortCondition(
        costs.supportsCondition = true;
      } else if (costs.coveredAttributes > 0) {
        costs.estimatedCosts = (itemsInIndex / costs.coveredAttributes) *
-                               std::log2(static_cast<double>(itemsInIndex));
+                               std::log2(double(itemsInIndex));
        if (idx->isPersistent() && sortCondition->isDescending()) {
          // reverse iteration is more expensive
          costs.estimatedCosts *= 4;
--- a/arangod/Transaction/Methods.cpp
+++ b/arangod/Transaction/Methods.cpp
@ -634,14 +634,19 @@ std::pair<bool, bool> transaction::Methods::findIndexHandleForAndNode(
    }
    LOG_TOPIC("7278d", TRACE, Logger::FIXME)
-        << "looking at index: " << idx.get() << ", isSorted: " << idx->isSorted()
+        << "looking at index: " << idx.get() 
-        << ", isSparse: " << idx->sparse() << ", fields: " << idx->fields().size()
+        << ", isSorted: " << idx->isSorted()
-        << ", supportsFilter: " << supportsFilter << ", supportsSort: " << supportsSort
+        << ", isSparse: " << idx->sparse() 
-        << ", filterCost: " << filterCost << ", sortCost: " << sortCost
+        << ", fields: " << idx->fields().size()
-        << ", totalCost: " << totalCost << ", isOnlyAttributeAccess: " << isOnlyAttributeAccess
+        << ", supportsFilter: " << supportsFilter 
        << ", supportsSort: " << supportsSort
        << ", filterCost: " << (supportsFilter ? filterCost : 0.0)
        << ", sortCost: " << (supportsSort ? sortCost : 0.0)
        << ", totalCost: " << totalCost 
        << ", isOnlyAttributeAccess: " << isOnlyAttributeAccess
        << ", isUnidirectional: " << sortCondition.isUnidirectional()
        << ", isOnlyEqualityMatch: " << node->isOnlyEqualityMatch()
-        << ", itemsInIndex: " << itemsInIndex;
+        << ", itemsInIndex/estimatedItems: " << itemsInIndex;
    if (bestIndex == nullptr || totalCost < bestCost) {
      bestIndex = idx;
--- a/tests/js/server/aql/aql-graph-traverser.js
+++ b/tests/js/server/aql/aql-graph-traverser.js
@ -2082,8 +2082,13 @@ function complexFilteringSuite() {
      assertEqual(stats.scannedFull, 0);
      // The lookup will be using the primary Index.
      // It will find 0 elements.
-      assertEqual(stats.scannedIndex, 0);
+      if (mmfilesEngine) {
-      assertEqual(stats.filtered, 0);
+        assertEqual(stats.scannedIndex, 1);
        assertEqual(stats.filtered, 1);
      } else {
        assertEqual(stats.scannedIndex, 0);
        assertEqual(stats.filtered, 0);
      }
    },
    testVertexLevel0: function () {
--- a/tests/js/server/aql/aql-index-hints.js
+++ b/tests/js/server/aql/aql-index-hints.js
@ -69,8 +69,8 @@ function ahuacatlSkiplistOverlappingTestSuite () {
      collection.ensureIndex({type: 'skiplist', name: 'skip_b_a', fields: ['b', 'a']});
      const isMMFiles = db._engine().name === "mmfiles";
-      defaultEqualityIndex = isMMFiles ? 'skip_a' : 'hash_a';
+      defaultEqualityIndex = isMMFiles ? 'hash_a' : 'hash_a';
-      alternateEqualityIndex = isMMFiles ? 'hash_a' : 'skip_a';
+      alternateEqualityIndex = isMMFiles ? 'skip_a' : 'skip_a';
      defaultSortingIndex = isMMFiles ? 'skip_a' : 'hash_a';
      alternateSortingIndex = 'skip_a_b';
    },
--- a/tests/js/server/shell/shell-index-selectivity.js
+++ b/tests/js/server/shell/shell-index-selectivity.js
@ -0,0 +1,236 @@
 /*jshint globalstrict:false, strict:false */
 /*global assertEqual, assertTrue, AQL_EXPLAIN */
 ////////////////////////////////////////////////////////////////////////////////
 /// @brief test the index
 ///
 /// @file
 ///
 /// DISCLAIMER
 ///
 /// Copyright 2018-2019 ArangoDB GmbH, Cologne, Germany
 ///
 /// Licensed under the Apache License, Version 2.0 (the "License");
 /// you may not use this file except in compliance with the License.
 /// You may obtain a copy of the License at
 ///
 ///     http://www.apache.org/licenses/LICENSE-2.0
 ///
 /// Unless required by applicable law or agreed to in writing, software
 /// distributed under the License is distributed on an "AS IS" BASIS,
 /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 /// See the License for the specific language governing permissions and
 /// limitations under the License.
 ///
 /// Copyright holder is ArangoDB GmbH, Cologne, Germany
 ///
 /// @author 2018 Jan Steemann
 ////////////////////////////////////////////////////////////////////////////////
 const jsunity = require("jsunity");
 const internal = require("internal");
 const db = internal.db;
 function indexSelectivitySuite() {
  'use strict';
  const cn = "UnitTestsCollectionIdx";
  let assertIndexUsed = function(expected, plan) {
    let nodes = plan.nodes.filter(function(node) {
      return node.type === 'IndexNode';
    });
    assertEqual(1, nodes.length);
    let node = nodes[0];
    assertEqual(expected, node.indexes[0].fields);
  };
  return {
    setUp : function () {
      db._drop(cn);
      db._create(cn);
    },
    tearDown : function () {
      db._drop(cn);
    },
    testTwoIndexesSingleField: function () {
      let c = db._collection(cn);
      c.ensureIndex({ type: "hash", fields: ["a"] });
      c.ensureIndex({ type: "hash", fields: ["b"] });
      // index on "a" has lower selectivity than index on "b"
      for (let i = 0; i < 1000; ++i) {
        c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
      }
      internal.waitForEstimatorSync();
      let indexes = c.indexes();
      assertEqual(["a"], indexes[1].fields);
      assertEqual(["b"], indexes[2].fields);
      assertTrue(indexes[1].selectivityEstimate < indexes[2].selectivityEstimate);
      let query, plan;
      query = "FOR doc IN @@collection FILTER doc.a == @value RETURN doc";
      plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
      assertIndexUsed(["a"], plan);
      query = "FOR doc IN @@collection FILTER doc.b == @value RETURN doc";
      plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
      assertIndexUsed(["b"], plan);
    },
    testTwoIndexesMultipleFields: function () {
      let c = db._collection(cn);
      c.ensureIndex({ type: "hash", fields: ["a"] });
      c.ensureIndex({ type: "hash", fields: ["b"] });
      // index on "a" has lower selectivity than index on "b"
      for (let i = 0; i < 1000; ++i) {
        c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
      }
      internal.waitForEstimatorSync();
      let indexes = c.indexes();
      assertEqual(["a"], indexes[1].fields);
      assertEqual(["b"], indexes[2].fields);
      assertTrue(indexes[1].selectivityEstimate < indexes[2].selectivityEstimate);
      let query, plan;
      query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value RETURN doc";
      plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
      assertIndexUsed(["b"], plan);
      query = "FOR doc IN @@collection FILTER doc.b == @value && doc.a == @value RETURN doc";
      plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
      assertIndexUsed(["b"], plan);
    },
    testTwoIndexesMultipleFieldsOtherIndexCreationOrder: function () {
      let c = db._collection(cn);
      c.ensureIndex({ type: "hash", fields: ["b"] });
      c.ensureIndex({ type: "hash", fields: ["a"] });
      // index on "a" has lower selectivity than index on "b"
      for (let i = 0; i < 1000; ++i) {
        c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
      }
      internal.waitForEstimatorSync();
      let indexes = c.indexes();
      assertEqual(["b"], indexes[1].fields);
      assertEqual(["a"], indexes[2].fields);
      assertTrue(indexes[1].selectivityEstimate > indexes[2].selectivityEstimate);
      let query, plan;
      query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value RETURN doc";
      plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
      assertIndexUsed(["b"], plan);
      query = "FOR doc IN @@collection FILTER doc.b == @value && doc.a == @value RETURN doc";
      plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
      assertIndexUsed(["b"], plan);
    },
    testTwoCompositeIndexesMultipleFields: function () {
      let c = db._collection(cn);
      c.ensureIndex({ type: "hash", fields: ["a", "b"] });
      c.ensureIndex({ type: "hash", fields: ["a", "b", "c"] });
      // index on "a" has lower selectivity than index on "b"
      for (let i = 0; i < 1000; ++i) {
        c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
      }
      internal.waitForEstimatorSync();
      let indexes = c.indexes();
      assertEqual(["a", "b"], indexes[1].fields);
      assertEqual(["a", "b", "c"], indexes[2].fields);
      assertTrue(indexes[1].selectivityEstimate < indexes[2].selectivityEstimate);
      let query, plan;
      query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value && doc.c == @value RETURN doc";
      plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
      assertIndexUsed(["a", "b", "c"], plan);
    },
    testTwoCompositeIndexesMultipleFieldsOtherIndexCreationOrder: function () {
      let c = db._collection(cn);
      c.ensureIndex({ type: "hash", fields: ["a", "b", "c"] });
      c.ensureIndex({ type: "hash", fields: ["a", "b"] });
      // index on "a" has lower selectivity than index on "b"
      for (let i = 0; i < 1000; ++i) {
        c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
      }
      internal.waitForEstimatorSync();
      let indexes = c.indexes();
      assertEqual(["a", "b", "c"], indexes[1].fields);
      assertEqual(["a", "b"], indexes[2].fields);
      assertTrue(indexes[1].selectivityEstimate > indexes[2].selectivityEstimate);
      let query, plan;
      query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value && doc.c == @value RETURN doc";
      plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
      assertIndexUsed(["a", "b", "c"], plan);
    },
    testTwoCompositeIndexesMultipleFieldsPartialLookup: function () {
      let c = db._collection(cn);
      c.ensureIndex({ type: "hash", fields: ["a", "b"] });
      c.ensureIndex({ type: "hash", fields: ["a", "b", "c"] });
      // index on "a" has lower selectivity than index on "b"
      for (let i = 0; i < 1000; ++i) {
        c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
      }
      internal.waitForEstimatorSync();
      let indexes = c.indexes();
      assertEqual(["a", "b"], indexes[1].fields);
      assertEqual(["a", "b", "c"], indexes[2].fields);
      assertTrue(indexes[1].selectivityEstimate < indexes[2].selectivityEstimate);
      let query, plan;
      query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value RETURN doc";
      plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
      assertIndexUsed(["a", "b"], plan);
    },
    testTwoCompositeIndexesMultipleFieldsPartialLookupOtherIndexCreationOrder: function () {
      let c = db._collection(cn);
      c.ensureIndex({ type: "hash", fields: ["a", "b", "c"] });
      c.ensureIndex({ type: "hash", fields: ["a", "b"] });
      // index on "a" has lower selectivity than index on "b"
      for (let i = 0; i < 1000; ++i) {
        c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
      }
      internal.waitForEstimatorSync();
      let indexes = c.indexes();
      assertEqual(["a", "b", "c"], indexes[1].fields);
      assertEqual(["a", "b"], indexes[2].fields);
      assertTrue(indexes[1].selectivityEstimate > indexes[2].selectivityEstimate);
      let query, plan;
      query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value RETURN doc";
      plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
      assertIndexUsed(["a", "b"], plan);
    },
  };
 }
 jsunity.run(indexSelectivitySuite);
 return jsunity.done();