diff --git a/arangod/Graph/ShortestPathOptions.cpp b/arangod/Graph/ShortestPathOptions.cpp index be2ade195f..888cd92c0c 100644 --- a/arangod/Graph/ShortestPathOptions.cpp +++ b/arangod/Graph/ShortestPathOptions.cpp @@ -150,7 +150,7 @@ void ShortestPathOptions::toVelocyPackIndexes(VPackBuilder& builder) const { builder.add("base", VPackValue(VPackValueType::Array)); for (auto const& it : _baseLookupInfos) { for (auto const& it2 : it.idxHandles) { - it2.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics)); + it2.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics, Index::Serialize::Estimates)); } } builder.close(); diff --git a/arangod/Graph/TraverserOptions.cpp b/arangod/Graph/TraverserOptions.cpp index 6ececc47cc..5688fb6c17 100644 --- a/arangod/Graph/TraverserOptions.cpp +++ b/arangod/Graph/TraverserOptions.cpp @@ -305,7 +305,7 @@ void TraverserOptions::toVelocyPackIndexes(VPackBuilder& builder) const { builder.add("base", VPackValue(VPackValueType::Array)); for (auto const& it : _baseLookupInfos) { for (auto const& it2 : it.idxHandles) { - it2.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics)); + it2.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics, Index::Serialize::Estimates)); } } builder.close(); @@ -317,7 +317,7 @@ void TraverserOptions::toVelocyPackIndexes(VPackBuilder& builder) const { builder.add(VPackValue(VPackValueType::Array)); for (auto const& it2 : it.second) { for (auto const& it3 : it2.idxHandles) { - it3.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics)); + it3.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics, Index::Serialize::Estimates)); } } builder.close(); diff --git a/arangod/Indexes/SimpleAttributeEqualityMatcher.cpp b/arangod/Indexes/SimpleAttributeEqualityMatcher.cpp index 39a4eebb93..bc1754a428 100644 --- a/arangod/Indexes/SimpleAttributeEqualityMatcher.cpp +++ b/arangod/Indexes/SimpleAttributeEqualityMatcher.cpp @@ -26,8 +26,12 @@ #include "Aql/AstNode.h" #include "Aql/Variable.h" #include "Indexes/Index.h" +#include "StorageEngine/EngineSelectorFeature.h" +#include "StorageEngine/StorageEngine.h" #include "VocBase/vocbase.h" +#include + #include using namespace arangodb; @@ -79,7 +83,7 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchOne(arangodb::Index cons if (which != nullptr) { // we can use the index for the condition - costs = calculateIndexCosts(index, which, itemsInIndex * values, 1); + costs = calculateIndexCosts(index, which, itemsInIndex, values, 1); } else { // we cannot use the index for the condition ++postFilterConditions; @@ -155,7 +159,7 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchAll(arangodb::Index cons values = 1; } - Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex * values); + Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex); if (_found.size() == _attributes.size()) { // can only use this index if all index attributes are covered by the @@ -168,7 +172,7 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchAll(arangodb::Index cons which = nullptr; } - costs = calculateIndexCosts(index, which, itemsInIndex * values, _found.size()); + costs = calculateIndexCosts(index, which, itemsInIndex, values, _found.size()); } // honor the costs of post-index filter conditions @@ -315,34 +319,34 @@ arangodb::aql::AstNode* SimpleAttributeEqualityMatcher::specializeAll( /// cost values have no special meaning, except that multiple cost values are /// comparable, and lower values mean lower costs Index::FilterCosts SimpleAttributeEqualityMatcher::calculateIndexCosts( - arangodb::Index const* index, arangodb::aql::AstNode const* attribute, - size_t itemsInIndex, size_t coveredAttributes) const { + arangodb::Index const* idx, arangodb::aql::AstNode const* attribute, + size_t itemsInIndex, size_t values, size_t coveredAttributes) const { // note: attribute will be set to the index attribute for single-attribute // indexes such as the primary and edge indexes, and is a nullptr for the // other indexes - Index::FilterCosts costs; + Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex); costs.supportsCondition = true; costs.coveredAttributes = coveredAttributes; - if (index->unique() || index->implicitlyUnique()) { - // index is unique, and the condition covers all attributes - // now use a low value for the costs - costs.estimatedItems = 1; - costs.estimatedCosts = 0.95 - 0.05 * (index->fields().size() - 1); - } else if (index->hasSelectivityEstimate()) { - // use index selectivity estimate - arangodb::velocypack::StringRef att; - if (attribute != nullptr && attribute->type == aql::NODE_TYPE_ATTRIBUTE_ACCESS) { - att = arangodb::velocypack::StringRef(attribute->getStringValue(), attribute->getStringLength()); - } - double estimate = index->selectivityEstimate(att); - if (estimate <= 0.0) { - // prevent division by zero - costs.estimatedItems = itemsInIndex; - // the more attributes are contained in the index, the more specific the - // lookup will be + if (itemsInIndex > 0) { + costs.estimatedItems = static_cast(itemsInIndex * values); + + // the index mocks do not have a selectivity estimate... + if (idx->hasSelectivityEstimate()) { + // use index selectivity estimate + arangodb::velocypack::StringRef att; + if (attribute != nullptr && attribute->type == aql::NODE_TYPE_ATTRIBUTE_ACCESS) { + att = arangodb::velocypack::StringRef(attribute->getStringValue(), attribute->getStringLength()); + } + double estimate = idx->selectivityEstimate(att); + if (estimate > 0.0) { + costs.estimatedItems = static_cast(1.0 / estimate * values); + } + } else { + // no selectivity estimate present. this should only happen for mock indexes. + // anyway, use a hard-coded formula for determining the number of results double equalityReductionFactor = 20.0; - for (size_t i = 0; i < index->fields().size(); ++i) { + for (size_t i = 0; i < coveredAttributes; ++i) { costs.estimatedItems /= static_cast(equalityReductionFactor); // decrease the effect of the equality reduction factor equalityReductionFactor *= 0.25; @@ -351,17 +355,34 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::calculateIndexCosts( equalityReductionFactor = 2.0; } } - } else { - costs.estimatedItems = static_cast(1.0 / estimate); } + + // costs.estimatedItems is always set here, make it at least 1 + costs.estimatedItems = std::max(size_t(1), costs.estimatedItems); - costs.estimatedItems = (std::max)(costs.estimatedItems, static_cast(1)); - // the more attributes are covered by an index, the more accurate it - // is considered to be - costs.estimatedCosts = static_cast(costs.estimatedItems) - index->fields().size() * 0.01; - } else { - // no such index should exist - TRI_ASSERT(false); + // seek cost is O(log(n)) for RocksDB, and O(1) for mmfiles + // TODO: move this into storage engine! + if (EngineSelectorFeature::ENGINE->typeName() == "mmfiles") { + costs.estimatedCosts = std::max(double(1.0), double(values)); + } else { + costs.estimatedCosts = std::max(double(1.0), + std::log2(double(itemsInIndex)) * values); + if (idx->unique()) { + costs.estimatedCosts = std::max(double(1.0), double(itemsInIndex) * values); + } + } + // add per-document processing cost + costs.estimatedCosts += costs.estimatedItems * 0.05; + // slightly prefer indexes that cover more attributes + costs.estimatedCosts -= (idx->fields().size() - 1) * 0.02; + + // cost is already low... now slightly prioritize unique indexes + if (idx->unique() || idx->implicitlyUnique()) { + costs.estimatedCosts *= 0.995 - 0.05 * (idx->fields().size() - 1); + } + + // box the estimated costs to [0 - inf + costs.estimatedCosts = std::max(double(0.0), costs.estimatedCosts); } return costs; diff --git a/arangod/Indexes/SimpleAttributeEqualityMatcher.h b/arangod/Indexes/SimpleAttributeEqualityMatcher.h index a67dbc5d65..d69bf28633 100644 --- a/arangod/Indexes/SimpleAttributeEqualityMatcher.h +++ b/arangod/Indexes/SimpleAttributeEqualityMatcher.h @@ -86,7 +86,8 @@ class SimpleAttributeEqualityMatcher { /// comparable, and lower values mean lower costs Index::FilterCosts calculateIndexCosts(arangodb::Index const* index, arangodb::aql::AstNode const* attribute, - size_t itemsInIndex, size_t coveredAttributes) const; + size_t itemsInIndex, size_t values, + size_t coveredAttributes) const; /// @brief whether or not the access fits bool accessFitsIndex(arangodb::Index const*, arangodb::aql::AstNode const*, diff --git a/arangod/Indexes/SortedIndexAttributeMatcher.cpp b/arangod/Indexes/SortedIndexAttributeMatcher.cpp index b74dd7bd1e..7234c88c72 100644 --- a/arangod/Indexes/SortedIndexAttributeMatcher.cpp +++ b/arangod/Indexes/SortedIndexAttributeMatcher.cpp @@ -44,7 +44,7 @@ bool SortedIndexAttributeMatcher::accessFitsIndex( arangodb::aql::AstNode const* op, // binary operation that is parent of access and other arangodb::aql::Variable const* reference, // variable used in access(es) std::unordered_mapfields()*/, std::vector /*conjunct - operation*/>& found, // marks operations covered by index-fields - std::unordered_set& nonNullAttributes, // set of stringified op-childeren (access other) that may not be null + std::unordered_set& nonNullAttributes, // set of stringified op-children (access other) that may not be null bool isExecution // skip usage check in execution phase ) { if (!idx->canUseConditionPart(access, other, op, reference, nonNullAttributes, isExecution)) { @@ -220,7 +220,7 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition( size_t attributesCovered = 0; size_t attributesCoveredByEquality = 0; double equalityReductionFactor = 20.0; - double estimatedCosts = static_cast(itemsInIndex); + double estimatedItems = static_cast(itemsInIndex); for (size_t i = 0; i < idx->fields().size(); ++i) { auto it = found.find(i); @@ -246,7 +246,7 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition( if (containsEquality) { ++attributesCoveredByEquality; - estimatedCosts /= equalityReductionFactor; + estimatedItems /= equalityReductionFactor; // decrease the effect of the equality reduction factor equalityReductionFactor *= 0.25; @@ -259,10 +259,10 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition( if (nodes.size() >= 2) { // at least two (non-equality) conditions. probably a range with lower // and upper bound defined - estimatedCosts /= 7.5; + estimatedItems /= 7.5; } else { // one (non-equality). this is either a lower or a higher bound - estimatedCosts /= 2.0; + estimatedItems /= 2.0; } } @@ -276,91 +276,87 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition( Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex); costs.coveredAttributes = attributesCovered; - if (attributesCoveredByEquality == idx->fields().size() && - (idx->unique() || idx->implicitlyUnique())) { - // index is unique and condition covers all attributes by equality - costs.supportsCondition = true; - - if (itemsInIndex == 0) { - costs.estimatedItems = 0; - costs.estimatedCosts = 0.0; - } else { - costs.estimatedItems = values; - costs.estimatedCosts = (std::max)(static_cast(1), - std::log2(static_cast(itemsInIndex)) * values); - } - // cost is already low... now slightly prioritize unique indexes - costs.estimatedCosts *= 0.995 - 0.05 * (idx->fields().size() - 1); - } else if (attributesCovered > 0 && - (!idx->sparse() || attributesCovered == idx->fields().size())) { + if (attributesCovered > 0 && + (!idx->sparse() || attributesCovered == idx->fields().size())) { // if the condition contains at least one index attribute and is not sparse, // or the index is sparse and all attributes are covered by the condition, // then it can be used (note: additional checks for condition parts in // sparse indexes are contained in Index::canUseConditionPart) costs.supportsCondition = true; - costs.estimatedItems = static_cast( - (std::max)(static_cast(estimatedCosts * values), static_cast(1))); - // check if the index has a selectivity estimate ready - if (idx->hasSelectivityEstimate() && - attributesCoveredByEquality == idx->fields().size()) { - double estimate = idx->selectivityEstimate(); - if (estimate > 0.0) { - costs.estimatedItems = static_cast(1.0 / estimate); - } - } else if (attributesCoveredByEquality > 0) { - TRI_ASSERT(attributesCovered > 0); - // the index either does not have a selectivity estimate, or not all - // of its attributes are covered by the condition using an equality lookup - // however, if the search condition uses equality lookups on the prefix - // of the index, then we can check if there is another index which is just - // indexing the prefix, and "steal" the selectivity estimate from that - // index for example, if the condition is "doc.a == 1 && doc.b > 2", and - // the current index is created on ["a", "b"], then we will not use the - // selectivity estimate of the current index (due to the range condition - // used for the second index attribute). however, if there is another - // index on just "a", we know that the current index is at least as - // selective as the index on the single attribute. and that the extra - // condition we have will make it even more selectivity. so in this case - // we will re-use the selectivity estimate from the other index, and are - // happy. - for (auto const& otherIdx : allIndexes) { - auto const* other = otherIdx.get(); - if (other == idx || !other->hasSelectivityEstimate()) { - continue; + if (itemsInIndex > 0) { + costs.estimatedItems = static_cast(estimatedItems * values); + + // check if the index has a selectivity estimate ready + if (idx->hasSelectivityEstimate() && + attributesCoveredByEquality == idx->fields().size()) { + double estimate = idx->selectivityEstimate(); + if (estimate > 0.0) { + costs.estimatedItems = static_cast(1.0 / estimate * values); } - auto const& otherFields = other->fields(); - if (otherFields.size() >= attributesCovered) { - // other index has more fields than we have, or the same amount. - // then it will not be helpful - continue; - } - size_t matches = 0; - for (size_t i = 0; i < otherFields.size(); ++i) { - if (otherFields[i] != idx->fields()[i]) { - break; + } else if (attributesCoveredByEquality > 0) { + TRI_ASSERT(attributesCovered > 0); + // the index either does not have a selectivity estimate, or not all + // of its attributes are covered by the condition using an equality lookup + // however, if the search condition uses equality lookups on the prefix + // of the index, then we can check if there is another index which is just + // indexing the prefix, and "steal" the selectivity estimate from that + // index for example, if the condition is "doc.a == 1 && doc.b > 2", and + // the current index is created on ["a", "b"], then we will not use the + // selectivity estimate of the current index (due to the range condition + // used for the second index attribute). however, if there is another + // index on just "a", we know that the current index is at least as + // selective as the index on the single attribute. and that the extra + // condition we have will make it even more selectivity. so in this case + // we will re-use the selectivity estimate from the other index, and are + // happy. + for (auto const& otherIdx : allIndexes) { + auto const* other = otherIdx.get(); + if (other == idx || !other->hasSelectivityEstimate()) { + continue; } - ++matches; - } - if (matches == otherFields.size()) { - double estimate = other->selectivityEstimate(); - if (estimate > 0.0) { - // reuse the estimate from the other index - costs.estimatedItems = static_cast(1.0 / estimate); - break; + auto const& otherFields = other->fields(); + if (otherFields.size() >= attributesCovered) { + // other index has more fields than we have, or the same amount. + // then it will not be helpful + continue; + } + size_t matches = 0; + for (size_t i = 0; i < otherFields.size(); ++i) { + if (otherFields[i] != idx->fields()[i]) { + break; + } + ++matches; + } + if (matches == otherFields.size()) { + double estimate = other->selectivityEstimate(); + if (estimate > 0.0) { + // reuse the estimate from the other index + costs.estimatedItems = static_cast(1.0 / estimate * values); + break; + } } } } - } - if (itemsInIndex == 0) { - costs.estimatedCosts = 0.0; - } else { - // lookup cost is O(log(n)) - costs.estimatedCosts = (std::max)(static_cast(1), - std::log2(static_cast(itemsInIndex)) * values); + // costs.estimatedItems is always set here, make it at least 1 + costs.estimatedItems = std::max(size_t(1), costs.estimatedItems); + + // seek cost is O(log(n)) + costs.estimatedCosts = std::max(double(1.0), + std::log2(double(itemsInIndex)) * values); + // add per-document processing cost + costs.estimatedCosts += costs.estimatedItems * 0.05; // slightly prefer indexes that cover more attributes costs.estimatedCosts -= (attributesCovered - 1) * 0.02; + + // cost is already low... now slightly prioritize unique indexes + if (idx->unique() || idx->implicitlyUnique()) { + costs.estimatedCosts *= 0.995 - 0.05 * (idx->fields().size() - 1); + } + + // box the estimated costs to [0 - inf + costs.estimatedCosts = std::max(double(0.0), costs.estimatedCosts); } } else { // index does not help for this condition @@ -399,7 +395,7 @@ Index::SortCosts SortedIndexAttributeMatcher::supportsSortCondition( costs.supportsCondition = true; } else if (costs.coveredAttributes > 0) { costs.estimatedCosts = (itemsInIndex / costs.coveredAttributes) * - std::log2(static_cast(itemsInIndex)); + std::log2(double(itemsInIndex)); if (idx->isPersistent() && sortCondition->isDescending()) { // reverse iteration is more expensive costs.estimatedCosts *= 4; diff --git a/arangod/Transaction/Methods.cpp b/arangod/Transaction/Methods.cpp index 9b71d0d13a..af3b1ee29f 100644 --- a/arangod/Transaction/Methods.cpp +++ b/arangod/Transaction/Methods.cpp @@ -635,14 +635,19 @@ std::pair transaction::Methods::findIndexHandleForAndNode( } LOG_TOPIC("7278d", TRACE, Logger::FIXME) - << "looking at index: " << idx.get() << ", isSorted: " << idx->isSorted() - << ", isSparse: " << idx->sparse() << ", fields: " << idx->fields().size() - << ", supportsFilter: " << supportsFilter << ", supportsSort: " << supportsSort - << ", filterCost: " << filterCost << ", sortCost: " << sortCost - << ", totalCost: " << totalCost << ", isOnlyAttributeAccess: " << isOnlyAttributeAccess + << "looking at index: " << idx.get() + << ", isSorted: " << idx->isSorted() + << ", isSparse: " << idx->sparse() + << ", fields: " << idx->fields().size() + << ", supportsFilter: " << supportsFilter + << ", supportsSort: " << supportsSort + << ", filterCost: " << (supportsFilter ? filterCost : 0.0) + << ", sortCost: " << (supportsSort ? sortCost : 0.0) + << ", totalCost: " << totalCost + << ", isOnlyAttributeAccess: " << isOnlyAttributeAccess << ", isUnidirectional: " << sortCondition.isUnidirectional() << ", isOnlyEqualityMatch: " << node->isOnlyEqualityMatch() - << ", itemsInIndex: " << itemsInIndex; + << ", itemsInIndex/estimatedItems: " << itemsInIndex; if (bestIndex == nullptr || totalCost < bestCost) { bestIndex = idx; diff --git a/tests/js/server/aql/aql-graph-traverser.js b/tests/js/server/aql/aql-graph-traverser.js index 46bb048168..2f0abc1549 100644 --- a/tests/js/server/aql/aql-graph-traverser.js +++ b/tests/js/server/aql/aql-graph-traverser.js @@ -2082,8 +2082,13 @@ function complexFilteringSuite() { assertEqual(stats.scannedFull, 0); // The lookup will be using the primary Index. // It will find 0 elements. - assertEqual(stats.scannedIndex, 0); - assertEqual(stats.filtered, 0); + if (mmfilesEngine) { + assertEqual(stats.scannedIndex, 1); + assertEqual(stats.filtered, 1); + } else { + assertEqual(stats.scannedIndex, 0); + assertEqual(stats.filtered, 0); + } }, testVertexLevel0: function () { diff --git a/tests/js/server/aql/aql-index-hints.js b/tests/js/server/aql/aql-index-hints.js index 5122a5ab97..0babe8b9f8 100644 --- a/tests/js/server/aql/aql-index-hints.js +++ b/tests/js/server/aql/aql-index-hints.js @@ -69,8 +69,8 @@ function ahuacatlSkiplistOverlappingTestSuite () { collection.ensureIndex({type: 'skiplist', name: 'skip_b_a', fields: ['b', 'a']}); const isMMFiles = db._engine().name === "mmfiles"; - defaultEqualityIndex = isMMFiles ? 'skip_a' : 'hash_a'; - alternateEqualityIndex = isMMFiles ? 'hash_a' : 'skip_a'; + defaultEqualityIndex = isMMFiles ? 'hash_a' : 'hash_a'; + alternateEqualityIndex = isMMFiles ? 'skip_a' : 'skip_a'; defaultSortingIndex = isMMFiles ? 'skip_a' : 'hash_a'; alternateSortingIndex = 'skip_a_b'; }, diff --git a/tests/js/server/shell/shell-index-selectivity.js b/tests/js/server/shell/shell-index-selectivity.js new file mode 100644 index 0000000000..ed980d44eb --- /dev/null +++ b/tests/js/server/shell/shell-index-selectivity.js @@ -0,0 +1,236 @@ +/*jshint globalstrict:false, strict:false */ +/*global assertEqual, assertTrue, AQL_EXPLAIN */ + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test the index +/// +/// @file +/// +/// DISCLAIMER +/// +/// Copyright 2018-2019 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author 2018 Jan Steemann +//////////////////////////////////////////////////////////////////////////////// + +const jsunity = require("jsunity"); +const internal = require("internal"); +const db = internal.db; + +function indexSelectivitySuite() { + 'use strict'; + const cn = "UnitTestsCollectionIdx"; + + let assertIndexUsed = function(expected, plan) { + let nodes = plan.nodes.filter(function(node) { + return node.type === 'IndexNode'; + }); + assertEqual(1, nodes.length); + let node = nodes[0]; + assertEqual(expected, node.indexes[0].fields); + }; + + return { + setUp : function () { + db._drop(cn); + db._create(cn); + }, + + tearDown : function () { + db._drop(cn); + }, + + testTwoIndexesSingleField: function () { + let c = db._collection(cn); + c.ensureIndex({ type: "hash", fields: ["a"] }); + c.ensureIndex({ type: "hash", fields: ["b"] }); + + // index on "a" has lower selectivity than index on "b" + for (let i = 0; i < 1000; ++i) { + c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i }); + } + + internal.waitForEstimatorSync(); + let indexes = c.indexes(); + assertEqual(["a"], indexes[1].fields); + assertEqual(["b"], indexes[2].fields); + assertTrue(indexes[1].selectivityEstimate < indexes[2].selectivityEstimate); + + let query, plan; + + query = "FOR doc IN @@collection FILTER doc.a == @value RETURN doc"; + plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan; + assertIndexUsed(["a"], plan); + + query = "FOR doc IN @@collection FILTER doc.b == @value RETURN doc"; + plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan; + assertIndexUsed(["b"], plan); + }, + + testTwoIndexesMultipleFields: function () { + let c = db._collection(cn); + c.ensureIndex({ type: "hash", fields: ["a"] }); + c.ensureIndex({ type: "hash", fields: ["b"] }); + + // index on "a" has lower selectivity than index on "b" + for (let i = 0; i < 1000; ++i) { + c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i }); + } + + internal.waitForEstimatorSync(); + let indexes = c.indexes(); + assertEqual(["a"], indexes[1].fields); + assertEqual(["b"], indexes[2].fields); + assertTrue(indexes[1].selectivityEstimate < indexes[2].selectivityEstimate); + + let query, plan; + + query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value RETURN doc"; + plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan; + assertIndexUsed(["b"], plan); + + query = "FOR doc IN @@collection FILTER doc.b == @value && doc.a == @value RETURN doc"; + plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan; + assertIndexUsed(["b"], plan); + }, + + testTwoIndexesMultipleFieldsOtherIndexCreationOrder: function () { + let c = db._collection(cn); + c.ensureIndex({ type: "hash", fields: ["b"] }); + c.ensureIndex({ type: "hash", fields: ["a"] }); + + // index on "a" has lower selectivity than index on "b" + for (let i = 0; i < 1000; ++i) { + c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i }); + } + + internal.waitForEstimatorSync(); + let indexes = c.indexes(); + assertEqual(["b"], indexes[1].fields); + assertEqual(["a"], indexes[2].fields); + assertTrue(indexes[1].selectivityEstimate > indexes[2].selectivityEstimate); + + let query, plan; + + query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value RETURN doc"; + plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan; + assertIndexUsed(["b"], plan); + + query = "FOR doc IN @@collection FILTER doc.b == @value && doc.a == @value RETURN doc"; + plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan; + assertIndexUsed(["b"], plan); + }, + + testTwoCompositeIndexesMultipleFields: function () { + let c = db._collection(cn); + c.ensureIndex({ type: "hash", fields: ["a", "b"] }); + c.ensureIndex({ type: "hash", fields: ["a", "b", "c"] }); + + // index on "a" has lower selectivity than index on "b" + for (let i = 0; i < 1000; ++i) { + c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i }); + } + + internal.waitForEstimatorSync(); + let indexes = c.indexes(); + assertEqual(["a", "b"], indexes[1].fields); + assertEqual(["a", "b", "c"], indexes[2].fields); + assertTrue(indexes[1].selectivityEstimate < indexes[2].selectivityEstimate); + + let query, plan; + + query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value && doc.c == @value RETURN doc"; + plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan; + assertIndexUsed(["a", "b", "c"], plan); + }, + + testTwoCompositeIndexesMultipleFieldsOtherIndexCreationOrder: function () { + let c = db._collection(cn); + c.ensureIndex({ type: "hash", fields: ["a", "b", "c"] }); + c.ensureIndex({ type: "hash", fields: ["a", "b"] }); + + // index on "a" has lower selectivity than index on "b" + for (let i = 0; i < 1000; ++i) { + c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i }); + } + + internal.waitForEstimatorSync(); + let indexes = c.indexes(); + assertEqual(["a", "b", "c"], indexes[1].fields); + assertEqual(["a", "b"], indexes[2].fields); + assertTrue(indexes[1].selectivityEstimate > indexes[2].selectivityEstimate); + + let query, plan; + + query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value && doc.c == @value RETURN doc"; + plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan; + assertIndexUsed(["a", "b", "c"], plan); + }, + + testTwoCompositeIndexesMultipleFieldsPartialLookup: function () { + let c = db._collection(cn); + c.ensureIndex({ type: "hash", fields: ["a", "b"] }); + c.ensureIndex({ type: "hash", fields: ["a", "b", "c"] }); + + // index on "a" has lower selectivity than index on "b" + for (let i = 0; i < 1000; ++i) { + c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i }); + } + + internal.waitForEstimatorSync(); + let indexes = c.indexes(); + assertEqual(["a", "b"], indexes[1].fields); + assertEqual(["a", "b", "c"], indexes[2].fields); + assertTrue(indexes[1].selectivityEstimate < indexes[2].selectivityEstimate); + + let query, plan; + + query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value RETURN doc"; + plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan; + assertIndexUsed(["a", "b"], plan); + }, + + testTwoCompositeIndexesMultipleFieldsPartialLookupOtherIndexCreationOrder: function () { + let c = db._collection(cn); + c.ensureIndex({ type: "hash", fields: ["a", "b", "c"] }); + c.ensureIndex({ type: "hash", fields: ["a", "b"] }); + + // index on "a" has lower selectivity than index on "b" + for (let i = 0; i < 1000; ++i) { + c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i }); + } + + internal.waitForEstimatorSync(); + let indexes = c.indexes(); + assertEqual(["a", "b", "c"], indexes[1].fields); + assertEqual(["a", "b"], indexes[2].fields); + assertTrue(indexes[1].selectivityEstimate > indexes[2].selectivityEstimate); + + let query, plan; + + query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value RETURN doc"; + plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan; + assertIndexUsed(["a", "b"], plan); + }, + + }; + +} + +jsunity.run(indexSelectivitySuite); + +return jsunity.done();