mirror of https://gitee.com/bigwinds/arangodb
make index selection more deterministic (#9736)
* make index selection more deterministic * updated CHANGELOG * serialize indexes used by traversal with their estimates * serialize selectivity estimates for shortest path nodes too * fix assertion that doesn't hold true in unit tests * fix test
This commit is contained in:
parent
0f03655ce3
commit
3dcc293224
|
@ -1,6 +1,9 @@
|
||||||
v3.5.1 (XXXX-XX-XX)
|
v3.5.1 (XXXX-XX-XX)
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
|
* Made index selection much more deterministic in case there are
|
||||||
|
multiple competing indexes.
|
||||||
|
|
||||||
* Fixed issue #9654: honor value of `--rocksdb.max-write-buffer-number` if it
|
* Fixed issue #9654: honor value of `--rocksdb.max-write-buffer-number` if it
|
||||||
is set to at least 9 (which is the recommended value). Ignore it if it is
|
is set to at least 9 (which is the recommended value). Ignore it if it is
|
||||||
set to a lower value than 9, and warn the end user about it.
|
set to a lower value than 9, and warn the end user about it.
|
||||||
|
|
|
@ -150,7 +150,7 @@ void ShortestPathOptions::toVelocyPackIndexes(VPackBuilder& builder) const {
|
||||||
builder.add("base", VPackValue(VPackValueType::Array));
|
builder.add("base", VPackValue(VPackValueType::Array));
|
||||||
for (auto const& it : _baseLookupInfos) {
|
for (auto const& it : _baseLookupInfos) {
|
||||||
for (auto const& it2 : it.idxHandles) {
|
for (auto const& it2 : it.idxHandles) {
|
||||||
it2.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics));
|
it2.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics, Index::Serialize::Estimates));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
builder.close();
|
builder.close();
|
||||||
|
|
|
@ -304,7 +304,7 @@ void TraverserOptions::toVelocyPackIndexes(VPackBuilder& builder) const {
|
||||||
builder.add("base", VPackValue(VPackValueType::Array));
|
builder.add("base", VPackValue(VPackValueType::Array));
|
||||||
for (auto const& it : _baseLookupInfos) {
|
for (auto const& it : _baseLookupInfos) {
|
||||||
for (auto const& it2 : it.idxHandles) {
|
for (auto const& it2 : it.idxHandles) {
|
||||||
it2.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics));
|
it2.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics, Index::Serialize::Estimates));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
builder.close();
|
builder.close();
|
||||||
|
@ -316,7 +316,7 @@ void TraverserOptions::toVelocyPackIndexes(VPackBuilder& builder) const {
|
||||||
builder.add(VPackValue(VPackValueType::Array));
|
builder.add(VPackValue(VPackValueType::Array));
|
||||||
for (auto const& it2 : it.second) {
|
for (auto const& it2 : it.second) {
|
||||||
for (auto const& it3 : it2.idxHandles) {
|
for (auto const& it3 : it2.idxHandles) {
|
||||||
it3.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics));
|
it3.getIndex()->toVelocyPack(builder, Index::makeFlags(Index::Serialize::Basics, Index::Serialize::Estimates));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
builder.close();
|
builder.close();
|
||||||
|
|
|
@ -26,8 +26,12 @@
|
||||||
#include "Aql/AstNode.h"
|
#include "Aql/AstNode.h"
|
||||||
#include "Aql/Variable.h"
|
#include "Aql/Variable.h"
|
||||||
#include "Indexes/Index.h"
|
#include "Indexes/Index.h"
|
||||||
|
#include "StorageEngine/EngineSelectorFeature.h"
|
||||||
|
#include "StorageEngine/StorageEngine.h"
|
||||||
#include "VocBase/vocbase.h"
|
#include "VocBase/vocbase.h"
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
#include <velocypack/StringRef.h>
|
#include <velocypack/StringRef.h>
|
||||||
|
|
||||||
using namespace arangodb;
|
using namespace arangodb;
|
||||||
|
@ -79,7 +83,7 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchOne(arangodb::Index cons
|
||||||
|
|
||||||
if (which != nullptr) {
|
if (which != nullptr) {
|
||||||
// we can use the index for the condition
|
// we can use the index for the condition
|
||||||
costs = calculateIndexCosts(index, which, itemsInIndex * values, 1);
|
costs = calculateIndexCosts(index, which, itemsInIndex, values, 1);
|
||||||
} else {
|
} else {
|
||||||
// we cannot use the index for the condition
|
// we cannot use the index for the condition
|
||||||
++postFilterConditions;
|
++postFilterConditions;
|
||||||
|
@ -155,7 +159,7 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchAll(arangodb::Index cons
|
||||||
values = 1;
|
values = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex * values);
|
Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex);
|
||||||
|
|
||||||
if (_found.size() == _attributes.size()) {
|
if (_found.size() == _attributes.size()) {
|
||||||
// can only use this index if all index attributes are covered by the
|
// can only use this index if all index attributes are covered by the
|
||||||
|
@ -168,7 +172,7 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchAll(arangodb::Index cons
|
||||||
which = nullptr;
|
which = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
costs = calculateIndexCosts(index, which, itemsInIndex * values, _found.size());
|
costs = calculateIndexCosts(index, which, itemsInIndex, values, _found.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
// honor the costs of post-index filter conditions
|
// honor the costs of post-index filter conditions
|
||||||
|
@ -315,34 +319,34 @@ arangodb::aql::AstNode* SimpleAttributeEqualityMatcher::specializeAll(
|
||||||
/// cost values have no special meaning, except that multiple cost values are
|
/// cost values have no special meaning, except that multiple cost values are
|
||||||
/// comparable, and lower values mean lower costs
|
/// comparable, and lower values mean lower costs
|
||||||
Index::FilterCosts SimpleAttributeEqualityMatcher::calculateIndexCosts(
|
Index::FilterCosts SimpleAttributeEqualityMatcher::calculateIndexCosts(
|
||||||
arangodb::Index const* index, arangodb::aql::AstNode const* attribute,
|
arangodb::Index const* idx, arangodb::aql::AstNode const* attribute,
|
||||||
size_t itemsInIndex, size_t coveredAttributes) const {
|
size_t itemsInIndex, size_t values, size_t coveredAttributes) const {
|
||||||
// note: attribute will be set to the index attribute for single-attribute
|
// note: attribute will be set to the index attribute for single-attribute
|
||||||
// indexes such as the primary and edge indexes, and is a nullptr for the
|
// indexes such as the primary and edge indexes, and is a nullptr for the
|
||||||
// other indexes
|
// other indexes
|
||||||
Index::FilterCosts costs;
|
Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex);
|
||||||
costs.supportsCondition = true;
|
costs.supportsCondition = true;
|
||||||
costs.coveredAttributes = coveredAttributes;
|
costs.coveredAttributes = coveredAttributes;
|
||||||
|
|
||||||
if (index->unique() || index->implicitlyUnique()) {
|
if (itemsInIndex > 0) {
|
||||||
// index is unique, and the condition covers all attributes
|
costs.estimatedItems = static_cast<size_t>(itemsInIndex * values);
|
||||||
// now use a low value for the costs
|
|
||||||
costs.estimatedItems = 1;
|
// the index mocks do not have a selectivity estimate...
|
||||||
costs.estimatedCosts = 0.95 - 0.05 * (index->fields().size() - 1);
|
if (idx->hasSelectivityEstimate()) {
|
||||||
} else if (index->hasSelectivityEstimate()) {
|
// use index selectivity estimate
|
||||||
// use index selectivity estimate
|
arangodb::velocypack::StringRef att;
|
||||||
arangodb::velocypack::StringRef att;
|
if (attribute != nullptr && attribute->type == aql::NODE_TYPE_ATTRIBUTE_ACCESS) {
|
||||||
if (attribute != nullptr && attribute->type == aql::NODE_TYPE_ATTRIBUTE_ACCESS) {
|
att = arangodb::velocypack::StringRef(attribute->getStringValue(), attribute->getStringLength());
|
||||||
att = arangodb::velocypack::StringRef(attribute->getStringValue(), attribute->getStringLength());
|
}
|
||||||
}
|
double estimate = idx->selectivityEstimate(att);
|
||||||
double estimate = index->selectivityEstimate(att);
|
if (estimate > 0.0) {
|
||||||
if (estimate <= 0.0) {
|
costs.estimatedItems = static_cast<size_t>(1.0 / estimate * values);
|
||||||
// prevent division by zero
|
}
|
||||||
costs.estimatedItems = itemsInIndex;
|
} else {
|
||||||
// the more attributes are contained in the index, the more specific the
|
// no selectivity estimate present. this should only happen for mock indexes.
|
||||||
// lookup will be
|
// anyway, use a hard-coded formula for determining the number of results
|
||||||
double equalityReductionFactor = 20.0;
|
double equalityReductionFactor = 20.0;
|
||||||
for (size_t i = 0; i < index->fields().size(); ++i) {
|
for (size_t i = 0; i < coveredAttributes; ++i) {
|
||||||
costs.estimatedItems /= static_cast<size_t>(equalityReductionFactor);
|
costs.estimatedItems /= static_cast<size_t>(equalityReductionFactor);
|
||||||
// decrease the effect of the equality reduction factor
|
// decrease the effect of the equality reduction factor
|
||||||
equalityReductionFactor *= 0.25;
|
equalityReductionFactor *= 0.25;
|
||||||
|
@ -351,17 +355,34 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::calculateIndexCosts(
|
||||||
equalityReductionFactor = 2.0;
|
equalityReductionFactor = 2.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
costs.estimatedItems = static_cast<size_t>(1.0 / estimate);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
costs.estimatedItems = (std::max)(costs.estimatedItems, static_cast<size_t>(1));
|
// costs.estimatedItems is always set here, make it at least 1
|
||||||
// the more attributes are covered by an index, the more accurate it
|
costs.estimatedItems = std::max(size_t(1), costs.estimatedItems);
|
||||||
// is considered to be
|
|
||||||
costs.estimatedCosts = static_cast<double>(costs.estimatedItems) - index->fields().size() * 0.01;
|
// seek cost is O(log(n)) for RocksDB, and O(1) for mmfiles
|
||||||
} else {
|
// TODO: move this into storage engine!
|
||||||
// no such index should exist
|
if (EngineSelectorFeature::ENGINE->typeName() == "mmfiles") {
|
||||||
TRI_ASSERT(false);
|
costs.estimatedCosts = std::max(double(1.0), double(values));
|
||||||
|
} else {
|
||||||
|
costs.estimatedCosts = std::max(double(1.0),
|
||||||
|
std::log2(double(itemsInIndex)) * values);
|
||||||
|
if (idx->unique()) {
|
||||||
|
costs.estimatedCosts = std::max(double(1.0), double(itemsInIndex) * values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// add per-document processing cost
|
||||||
|
costs.estimatedCosts += costs.estimatedItems * 0.05;
|
||||||
|
// slightly prefer indexes that cover more attributes
|
||||||
|
costs.estimatedCosts -= (idx->fields().size() - 1) * 0.02;
|
||||||
|
|
||||||
|
// cost is already low... now slightly prioritize unique indexes
|
||||||
|
if (idx->unique() || idx->implicitlyUnique()) {
|
||||||
|
costs.estimatedCosts *= 0.995 - 0.05 * (idx->fields().size() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// box the estimated costs to [0 - inf
|
||||||
|
costs.estimatedCosts = std::max(double(0.0), costs.estimatedCosts);
|
||||||
}
|
}
|
||||||
|
|
||||||
return costs;
|
return costs;
|
||||||
|
|
|
@ -86,7 +86,8 @@ class SimpleAttributeEqualityMatcher {
|
||||||
/// comparable, and lower values mean lower costs
|
/// comparable, and lower values mean lower costs
|
||||||
Index::FilterCosts calculateIndexCosts(arangodb::Index const* index,
|
Index::FilterCosts calculateIndexCosts(arangodb::Index const* index,
|
||||||
arangodb::aql::AstNode const* attribute,
|
arangodb::aql::AstNode const* attribute,
|
||||||
size_t itemsInIndex, size_t coveredAttributes) const;
|
size_t itemsInIndex, size_t values,
|
||||||
|
size_t coveredAttributes) const;
|
||||||
|
|
||||||
/// @brief whether or not the access fits
|
/// @brief whether or not the access fits
|
||||||
bool accessFitsIndex(arangodb::Index const*, arangodb::aql::AstNode const*,
|
bool accessFitsIndex(arangodb::Index const*, arangodb::aql::AstNode const*,
|
||||||
|
|
|
@ -42,7 +42,7 @@ bool SortedIndexAttributeMatcher::accessFitsIndex(
|
||||||
arangodb::aql::AstNode const* op, // binary operation that is parent of access and other
|
arangodb::aql::AstNode const* op, // binary operation that is parent of access and other
|
||||||
arangodb::aql::Variable const* reference, // variable used in access(es)
|
arangodb::aql::Variable const* reference, // variable used in access(es)
|
||||||
std::unordered_map<size_t /*offset in idx->fields()*/, std::vector<arangodb::aql::AstNode const*> /*conjunct - operation*/>& found, // marks operations covered by index-fields
|
std::unordered_map<size_t /*offset in idx->fields()*/, std::vector<arangodb::aql::AstNode const*> /*conjunct - operation*/>& found, // marks operations covered by index-fields
|
||||||
std::unordered_set<std::string>& nonNullAttributes, // set of stringified op-childeren (access other) that may not be null
|
std::unordered_set<std::string>& nonNullAttributes, // set of stringified op-children (access other) that may not be null
|
||||||
bool isExecution // skip usage check in execution phase
|
bool isExecution // skip usage check in execution phase
|
||||||
) {
|
) {
|
||||||
if (!idx->canUseConditionPart(access, other, op, reference, nonNullAttributes, isExecution)) {
|
if (!idx->canUseConditionPart(access, other, op, reference, nonNullAttributes, isExecution)) {
|
||||||
|
@ -218,7 +218,7 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition(
|
||||||
size_t attributesCovered = 0;
|
size_t attributesCovered = 0;
|
||||||
size_t attributesCoveredByEquality = 0;
|
size_t attributesCoveredByEquality = 0;
|
||||||
double equalityReductionFactor = 20.0;
|
double equalityReductionFactor = 20.0;
|
||||||
double estimatedCosts = static_cast<double>(itemsInIndex);
|
double estimatedItems = static_cast<double>(itemsInIndex);
|
||||||
|
|
||||||
for (size_t i = 0; i < idx->fields().size(); ++i) {
|
for (size_t i = 0; i < idx->fields().size(); ++i) {
|
||||||
auto it = found.find(i);
|
auto it = found.find(i);
|
||||||
|
@ -244,7 +244,7 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition(
|
||||||
|
|
||||||
if (containsEquality) {
|
if (containsEquality) {
|
||||||
++attributesCoveredByEquality;
|
++attributesCoveredByEquality;
|
||||||
estimatedCosts /= equalityReductionFactor;
|
estimatedItems /= equalityReductionFactor;
|
||||||
|
|
||||||
// decrease the effect of the equality reduction factor
|
// decrease the effect of the equality reduction factor
|
||||||
equalityReductionFactor *= 0.25;
|
equalityReductionFactor *= 0.25;
|
||||||
|
@ -257,10 +257,10 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition(
|
||||||
if (nodes.size() >= 2) {
|
if (nodes.size() >= 2) {
|
||||||
// at least two (non-equality) conditions. probably a range with lower
|
// at least two (non-equality) conditions. probably a range with lower
|
||||||
// and upper bound defined
|
// and upper bound defined
|
||||||
estimatedCosts /= 7.5;
|
estimatedItems /= 7.5;
|
||||||
} else {
|
} else {
|
||||||
// one (non-equality). this is either a lower or a higher bound
|
// one (non-equality). this is either a lower or a higher bound
|
||||||
estimatedCosts /= 2.0;
|
estimatedItems /= 2.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -274,91 +274,87 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition(
|
||||||
Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex);
|
Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex);
|
||||||
costs.coveredAttributes = attributesCovered;
|
costs.coveredAttributes = attributesCovered;
|
||||||
|
|
||||||
if (attributesCoveredByEquality == idx->fields().size() &&
|
if (attributesCovered > 0 &&
|
||||||
(idx->unique() || idx->implicitlyUnique())) {
|
(!idx->sparse() || attributesCovered == idx->fields().size())) {
|
||||||
// index is unique and condition covers all attributes by equality
|
|
||||||
costs.supportsCondition = true;
|
|
||||||
|
|
||||||
if (itemsInIndex == 0) {
|
|
||||||
costs.estimatedItems = 0;
|
|
||||||
costs.estimatedCosts = 0.0;
|
|
||||||
} else {
|
|
||||||
costs.estimatedItems = values;
|
|
||||||
costs.estimatedCosts = (std::max)(static_cast<double>(1),
|
|
||||||
std::log2(static_cast<double>(itemsInIndex)) * values);
|
|
||||||
}
|
|
||||||
// cost is already low... now slightly prioritize unique indexes
|
|
||||||
costs.estimatedCosts *= 0.995 - 0.05 * (idx->fields().size() - 1);
|
|
||||||
} else if (attributesCovered > 0 &&
|
|
||||||
(!idx->sparse() || attributesCovered == idx->fields().size())) {
|
|
||||||
// if the condition contains at least one index attribute and is not sparse,
|
// if the condition contains at least one index attribute and is not sparse,
|
||||||
// or the index is sparse and all attributes are covered by the condition,
|
// or the index is sparse and all attributes are covered by the condition,
|
||||||
// then it can be used (note: additional checks for condition parts in
|
// then it can be used (note: additional checks for condition parts in
|
||||||
// sparse indexes are contained in Index::canUseConditionPart)
|
// sparse indexes are contained in Index::canUseConditionPart)
|
||||||
costs.supportsCondition = true;
|
costs.supportsCondition = true;
|
||||||
costs.estimatedItems = static_cast<size_t>(
|
|
||||||
(std::max)(static_cast<size_t>(estimatedCosts * values), static_cast<size_t>(1)));
|
|
||||||
|
|
||||||
// check if the index has a selectivity estimate ready
|
if (itemsInIndex > 0) {
|
||||||
if (idx->hasSelectivityEstimate() &&
|
costs.estimatedItems = static_cast<size_t>(estimatedItems * values);
|
||||||
attributesCoveredByEquality == idx->fields().size()) {
|
|
||||||
double estimate = idx->selectivityEstimate();
|
// check if the index has a selectivity estimate ready
|
||||||
if (estimate > 0.0) {
|
if (idx->hasSelectivityEstimate() &&
|
||||||
costs.estimatedItems = static_cast<size_t>(1.0 / estimate);
|
attributesCoveredByEquality == idx->fields().size()) {
|
||||||
}
|
double estimate = idx->selectivityEstimate();
|
||||||
} else if (attributesCoveredByEquality > 0) {
|
if (estimate > 0.0) {
|
||||||
TRI_ASSERT(attributesCovered > 0);
|
costs.estimatedItems = static_cast<size_t>(1.0 / estimate * values);
|
||||||
// the index either does not have a selectivity estimate, or not all
|
|
||||||
// of its attributes are covered by the condition using an equality lookup
|
|
||||||
// however, if the search condition uses equality lookups on the prefix
|
|
||||||
// of the index, then we can check if there is another index which is just
|
|
||||||
// indexing the prefix, and "steal" the selectivity estimate from that
|
|
||||||
// index for example, if the condition is "doc.a == 1 && doc.b > 2", and
|
|
||||||
// the current index is created on ["a", "b"], then we will not use the
|
|
||||||
// selectivity estimate of the current index (due to the range condition
|
|
||||||
// used for the second index attribute). however, if there is another
|
|
||||||
// index on just "a", we know that the current index is at least as
|
|
||||||
// selective as the index on the single attribute. and that the extra
|
|
||||||
// condition we have will make it even more selectivity. so in this case
|
|
||||||
// we will re-use the selectivity estimate from the other index, and are
|
|
||||||
// happy.
|
|
||||||
for (auto const& otherIdx : allIndexes) {
|
|
||||||
auto const* other = otherIdx.get();
|
|
||||||
if (other == idx || !other->hasSelectivityEstimate()) {
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
auto const& otherFields = other->fields();
|
} else if (attributesCoveredByEquality > 0) {
|
||||||
if (otherFields.size() >= attributesCovered) {
|
TRI_ASSERT(attributesCovered > 0);
|
||||||
// other index has more fields than we have, or the same amount.
|
// the index either does not have a selectivity estimate, or not all
|
||||||
// then it will not be helpful
|
// of its attributes are covered by the condition using an equality lookup
|
||||||
continue;
|
// however, if the search condition uses equality lookups on the prefix
|
||||||
}
|
// of the index, then we can check if there is another index which is just
|
||||||
size_t matches = 0;
|
// indexing the prefix, and "steal" the selectivity estimate from that
|
||||||
for (size_t i = 0; i < otherFields.size(); ++i) {
|
// index for example, if the condition is "doc.a == 1 && doc.b > 2", and
|
||||||
if (otherFields[i] != idx->fields()[i]) {
|
// the current index is created on ["a", "b"], then we will not use the
|
||||||
break;
|
// selectivity estimate of the current index (due to the range condition
|
||||||
|
// used for the second index attribute). however, if there is another
|
||||||
|
// index on just "a", we know that the current index is at least as
|
||||||
|
// selective as the index on the single attribute. and that the extra
|
||||||
|
// condition we have will make it even more selectivity. so in this case
|
||||||
|
// we will re-use the selectivity estimate from the other index, and are
|
||||||
|
// happy.
|
||||||
|
for (auto const& otherIdx : allIndexes) {
|
||||||
|
auto const* other = otherIdx.get();
|
||||||
|
if (other == idx || !other->hasSelectivityEstimate()) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
++matches;
|
auto const& otherFields = other->fields();
|
||||||
}
|
if (otherFields.size() >= attributesCovered) {
|
||||||
if (matches == otherFields.size()) {
|
// other index has more fields than we have, or the same amount.
|
||||||
double estimate = other->selectivityEstimate();
|
// then it will not be helpful
|
||||||
if (estimate > 0.0) {
|
continue;
|
||||||
// reuse the estimate from the other index
|
}
|
||||||
costs.estimatedItems = static_cast<size_t>(1.0 / estimate);
|
size_t matches = 0;
|
||||||
break;
|
for (size_t i = 0; i < otherFields.size(); ++i) {
|
||||||
|
if (otherFields[i] != idx->fields()[i]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++matches;
|
||||||
|
}
|
||||||
|
if (matches == otherFields.size()) {
|
||||||
|
double estimate = other->selectivityEstimate();
|
||||||
|
if (estimate > 0.0) {
|
||||||
|
// reuse the estimate from the other index
|
||||||
|
costs.estimatedItems = static_cast<size_t>(1.0 / estimate * values);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (itemsInIndex == 0) {
|
// costs.estimatedItems is always set here, make it at least 1
|
||||||
costs.estimatedCosts = 0.0;
|
costs.estimatedItems = std::max(size_t(1), costs.estimatedItems);
|
||||||
} else {
|
|
||||||
// lookup cost is O(log(n))
|
// seek cost is O(log(n))
|
||||||
costs.estimatedCosts = (std::max)(static_cast<double>(1),
|
costs.estimatedCosts = std::max(double(1.0),
|
||||||
std::log2(static_cast<double>(itemsInIndex)) * values);
|
std::log2(double(itemsInIndex)) * values);
|
||||||
|
// add per-document processing cost
|
||||||
|
costs.estimatedCosts += costs.estimatedItems * 0.05;
|
||||||
// slightly prefer indexes that cover more attributes
|
// slightly prefer indexes that cover more attributes
|
||||||
costs.estimatedCosts -= (attributesCovered - 1) * 0.02;
|
costs.estimatedCosts -= (attributesCovered - 1) * 0.02;
|
||||||
|
|
||||||
|
// cost is already low... now slightly prioritize unique indexes
|
||||||
|
if (idx->unique() || idx->implicitlyUnique()) {
|
||||||
|
costs.estimatedCosts *= 0.995 - 0.05 * (idx->fields().size() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// box the estimated costs to [0 - inf
|
||||||
|
costs.estimatedCosts = std::max(double(0.0), costs.estimatedCosts);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// index does not help for this condition
|
// index does not help for this condition
|
||||||
|
@ -397,7 +393,7 @@ Index::SortCosts SortedIndexAttributeMatcher::supportsSortCondition(
|
||||||
costs.supportsCondition = true;
|
costs.supportsCondition = true;
|
||||||
} else if (costs.coveredAttributes > 0) {
|
} else if (costs.coveredAttributes > 0) {
|
||||||
costs.estimatedCosts = (itemsInIndex / costs.coveredAttributes) *
|
costs.estimatedCosts = (itemsInIndex / costs.coveredAttributes) *
|
||||||
std::log2(static_cast<double>(itemsInIndex));
|
std::log2(double(itemsInIndex));
|
||||||
if (idx->isPersistent() && sortCondition->isDescending()) {
|
if (idx->isPersistent() && sortCondition->isDescending()) {
|
||||||
// reverse iteration is more expensive
|
// reverse iteration is more expensive
|
||||||
costs.estimatedCosts *= 4;
|
costs.estimatedCosts *= 4;
|
||||||
|
|
|
@ -634,14 +634,19 @@ std::pair<bool, bool> transaction::Methods::findIndexHandleForAndNode(
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TOPIC("7278d", TRACE, Logger::FIXME)
|
LOG_TOPIC("7278d", TRACE, Logger::FIXME)
|
||||||
<< "looking at index: " << idx.get() << ", isSorted: " << idx->isSorted()
|
<< "looking at index: " << idx.get()
|
||||||
<< ", isSparse: " << idx->sparse() << ", fields: " << idx->fields().size()
|
<< ", isSorted: " << idx->isSorted()
|
||||||
<< ", supportsFilter: " << supportsFilter << ", supportsSort: " << supportsSort
|
<< ", isSparse: " << idx->sparse()
|
||||||
<< ", filterCost: " << filterCost << ", sortCost: " << sortCost
|
<< ", fields: " << idx->fields().size()
|
||||||
<< ", totalCost: " << totalCost << ", isOnlyAttributeAccess: " << isOnlyAttributeAccess
|
<< ", supportsFilter: " << supportsFilter
|
||||||
|
<< ", supportsSort: " << supportsSort
|
||||||
|
<< ", filterCost: " << (supportsFilter ? filterCost : 0.0)
|
||||||
|
<< ", sortCost: " << (supportsSort ? sortCost : 0.0)
|
||||||
|
<< ", totalCost: " << totalCost
|
||||||
|
<< ", isOnlyAttributeAccess: " << isOnlyAttributeAccess
|
||||||
<< ", isUnidirectional: " << sortCondition.isUnidirectional()
|
<< ", isUnidirectional: " << sortCondition.isUnidirectional()
|
||||||
<< ", isOnlyEqualityMatch: " << node->isOnlyEqualityMatch()
|
<< ", isOnlyEqualityMatch: " << node->isOnlyEqualityMatch()
|
||||||
<< ", itemsInIndex: " << itemsInIndex;
|
<< ", itemsInIndex/estimatedItems: " << itemsInIndex;
|
||||||
|
|
||||||
if (bestIndex == nullptr || totalCost < bestCost) {
|
if (bestIndex == nullptr || totalCost < bestCost) {
|
||||||
bestIndex = idx;
|
bestIndex = idx;
|
||||||
|
|
|
@ -2082,8 +2082,13 @@ function complexFilteringSuite() {
|
||||||
assertEqual(stats.scannedFull, 0);
|
assertEqual(stats.scannedFull, 0);
|
||||||
// The lookup will be using the primary Index.
|
// The lookup will be using the primary Index.
|
||||||
// It will find 0 elements.
|
// It will find 0 elements.
|
||||||
assertEqual(stats.scannedIndex, 0);
|
if (mmfilesEngine) {
|
||||||
assertEqual(stats.filtered, 0);
|
assertEqual(stats.scannedIndex, 1);
|
||||||
|
assertEqual(stats.filtered, 1);
|
||||||
|
} else {
|
||||||
|
assertEqual(stats.scannedIndex, 0);
|
||||||
|
assertEqual(stats.filtered, 0);
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
testVertexLevel0: function () {
|
testVertexLevel0: function () {
|
||||||
|
|
|
@ -69,8 +69,8 @@ function ahuacatlSkiplistOverlappingTestSuite () {
|
||||||
collection.ensureIndex({type: 'skiplist', name: 'skip_b_a', fields: ['b', 'a']});
|
collection.ensureIndex({type: 'skiplist', name: 'skip_b_a', fields: ['b', 'a']});
|
||||||
|
|
||||||
const isMMFiles = db._engine().name === "mmfiles";
|
const isMMFiles = db._engine().name === "mmfiles";
|
||||||
defaultEqualityIndex = isMMFiles ? 'skip_a' : 'hash_a';
|
defaultEqualityIndex = isMMFiles ? 'hash_a' : 'hash_a';
|
||||||
alternateEqualityIndex = isMMFiles ? 'hash_a' : 'skip_a';
|
alternateEqualityIndex = isMMFiles ? 'skip_a' : 'skip_a';
|
||||||
defaultSortingIndex = isMMFiles ? 'skip_a' : 'hash_a';
|
defaultSortingIndex = isMMFiles ? 'skip_a' : 'hash_a';
|
||||||
alternateSortingIndex = 'skip_a_b';
|
alternateSortingIndex = 'skip_a_b';
|
||||||
},
|
},
|
||||||
|
|
|
@ -0,0 +1,236 @@
|
||||||
|
/*jshint globalstrict:false, strict:false */
|
||||||
|
/*global assertEqual, assertTrue, AQL_EXPLAIN */
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief test the index
|
||||||
|
///
|
||||||
|
/// @file
|
||||||
|
///
|
||||||
|
/// DISCLAIMER
|
||||||
|
///
|
||||||
|
/// Copyright 2018-2019 ArangoDB GmbH, Cologne, Germany
|
||||||
|
///
|
||||||
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
/// you may not use this file except in compliance with the License.
|
||||||
|
/// You may obtain a copy of the License at
|
||||||
|
///
|
||||||
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
///
|
||||||
|
/// Unless required by applicable law or agreed to in writing, software
|
||||||
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
/// See the License for the specific language governing permissions and
|
||||||
|
/// limitations under the License.
|
||||||
|
///
|
||||||
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||||
|
///
|
||||||
|
/// @author 2018 Jan Steemann
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
const jsunity = require("jsunity");
|
||||||
|
const internal = require("internal");
|
||||||
|
const db = internal.db;
|
||||||
|
|
||||||
|
function indexSelectivitySuite() {
|
||||||
|
'use strict';
|
||||||
|
const cn = "UnitTestsCollectionIdx";
|
||||||
|
|
||||||
|
let assertIndexUsed = function(expected, plan) {
|
||||||
|
let nodes = plan.nodes.filter(function(node) {
|
||||||
|
return node.type === 'IndexNode';
|
||||||
|
});
|
||||||
|
assertEqual(1, nodes.length);
|
||||||
|
let node = nodes[0];
|
||||||
|
assertEqual(expected, node.indexes[0].fields);
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
setUp : function () {
|
||||||
|
db._drop(cn);
|
||||||
|
db._create(cn);
|
||||||
|
},
|
||||||
|
|
||||||
|
tearDown : function () {
|
||||||
|
db._drop(cn);
|
||||||
|
},
|
||||||
|
|
||||||
|
testTwoIndexesSingleField: function () {
|
||||||
|
let c = db._collection(cn);
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["a"] });
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["b"] });
|
||||||
|
|
||||||
|
// index on "a" has lower selectivity than index on "b"
|
||||||
|
for (let i = 0; i < 1000; ++i) {
|
||||||
|
c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
|
||||||
|
}
|
||||||
|
|
||||||
|
internal.waitForEstimatorSync();
|
||||||
|
let indexes = c.indexes();
|
||||||
|
assertEqual(["a"], indexes[1].fields);
|
||||||
|
assertEqual(["b"], indexes[2].fields);
|
||||||
|
assertTrue(indexes[1].selectivityEstimate < indexes[2].selectivityEstimate);
|
||||||
|
|
||||||
|
let query, plan;
|
||||||
|
|
||||||
|
query = "FOR doc IN @@collection FILTER doc.a == @value RETURN doc";
|
||||||
|
plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
|
||||||
|
assertIndexUsed(["a"], plan);
|
||||||
|
|
||||||
|
query = "FOR doc IN @@collection FILTER doc.b == @value RETURN doc";
|
||||||
|
plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
|
||||||
|
assertIndexUsed(["b"], plan);
|
||||||
|
},
|
||||||
|
|
||||||
|
testTwoIndexesMultipleFields: function () {
|
||||||
|
let c = db._collection(cn);
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["a"] });
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["b"] });
|
||||||
|
|
||||||
|
// index on "a" has lower selectivity than index on "b"
|
||||||
|
for (let i = 0; i < 1000; ++i) {
|
||||||
|
c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
|
||||||
|
}
|
||||||
|
|
||||||
|
internal.waitForEstimatorSync();
|
||||||
|
let indexes = c.indexes();
|
||||||
|
assertEqual(["a"], indexes[1].fields);
|
||||||
|
assertEqual(["b"], indexes[2].fields);
|
||||||
|
assertTrue(indexes[1].selectivityEstimate < indexes[2].selectivityEstimate);
|
||||||
|
|
||||||
|
let query, plan;
|
||||||
|
|
||||||
|
query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value RETURN doc";
|
||||||
|
plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
|
||||||
|
assertIndexUsed(["b"], plan);
|
||||||
|
|
||||||
|
query = "FOR doc IN @@collection FILTER doc.b == @value && doc.a == @value RETURN doc";
|
||||||
|
plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
|
||||||
|
assertIndexUsed(["b"], plan);
|
||||||
|
},
|
||||||
|
|
||||||
|
testTwoIndexesMultipleFieldsOtherIndexCreationOrder: function () {
|
||||||
|
let c = db._collection(cn);
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["b"] });
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["a"] });
|
||||||
|
|
||||||
|
// index on "a" has lower selectivity than index on "b"
|
||||||
|
for (let i = 0; i < 1000; ++i) {
|
||||||
|
c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
|
||||||
|
}
|
||||||
|
|
||||||
|
internal.waitForEstimatorSync();
|
||||||
|
let indexes = c.indexes();
|
||||||
|
assertEqual(["b"], indexes[1].fields);
|
||||||
|
assertEqual(["a"], indexes[2].fields);
|
||||||
|
assertTrue(indexes[1].selectivityEstimate > indexes[2].selectivityEstimate);
|
||||||
|
|
||||||
|
let query, plan;
|
||||||
|
|
||||||
|
query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value RETURN doc";
|
||||||
|
plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
|
||||||
|
assertIndexUsed(["b"], plan);
|
||||||
|
|
||||||
|
query = "FOR doc IN @@collection FILTER doc.b == @value && doc.a == @value RETURN doc";
|
||||||
|
plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
|
||||||
|
assertIndexUsed(["b"], plan);
|
||||||
|
},
|
||||||
|
|
||||||
|
testTwoCompositeIndexesMultipleFields: function () {
|
||||||
|
let c = db._collection(cn);
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["a", "b"] });
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["a", "b", "c"] });
|
||||||
|
|
||||||
|
// index on "a" has lower selectivity than index on "b"
|
||||||
|
for (let i = 0; i < 1000; ++i) {
|
||||||
|
c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
|
||||||
|
}
|
||||||
|
|
||||||
|
internal.waitForEstimatorSync();
|
||||||
|
let indexes = c.indexes();
|
||||||
|
assertEqual(["a", "b"], indexes[1].fields);
|
||||||
|
assertEqual(["a", "b", "c"], indexes[2].fields);
|
||||||
|
assertTrue(indexes[1].selectivityEstimate < indexes[2].selectivityEstimate);
|
||||||
|
|
||||||
|
let query, plan;
|
||||||
|
|
||||||
|
query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value && doc.c == @value RETURN doc";
|
||||||
|
plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
|
||||||
|
assertIndexUsed(["a", "b", "c"], plan);
|
||||||
|
},
|
||||||
|
|
||||||
|
testTwoCompositeIndexesMultipleFieldsOtherIndexCreationOrder: function () {
|
||||||
|
let c = db._collection(cn);
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["a", "b", "c"] });
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["a", "b"] });
|
||||||
|
|
||||||
|
// index on "a" has lower selectivity than index on "b"
|
||||||
|
for (let i = 0; i < 1000; ++i) {
|
||||||
|
c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
|
||||||
|
}
|
||||||
|
|
||||||
|
internal.waitForEstimatorSync();
|
||||||
|
let indexes = c.indexes();
|
||||||
|
assertEqual(["a", "b", "c"], indexes[1].fields);
|
||||||
|
assertEqual(["a", "b"], indexes[2].fields);
|
||||||
|
assertTrue(indexes[1].selectivityEstimate > indexes[2].selectivityEstimate);
|
||||||
|
|
||||||
|
let query, plan;
|
||||||
|
|
||||||
|
query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value && doc.c == @value RETURN doc";
|
||||||
|
plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
|
||||||
|
assertIndexUsed(["a", "b", "c"], plan);
|
||||||
|
},
|
||||||
|
|
||||||
|
testTwoCompositeIndexesMultipleFieldsPartialLookup: function () {
|
||||||
|
let c = db._collection(cn);
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["a", "b"] });
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["a", "b", "c"] });
|
||||||
|
|
||||||
|
// index on "a" has lower selectivity than index on "b"
|
||||||
|
for (let i = 0; i < 1000; ++i) {
|
||||||
|
c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
|
||||||
|
}
|
||||||
|
|
||||||
|
internal.waitForEstimatorSync();
|
||||||
|
let indexes = c.indexes();
|
||||||
|
assertEqual(["a", "b"], indexes[1].fields);
|
||||||
|
assertEqual(["a", "b", "c"], indexes[2].fields);
|
||||||
|
assertTrue(indexes[1].selectivityEstimate < indexes[2].selectivityEstimate);
|
||||||
|
|
||||||
|
let query, plan;
|
||||||
|
|
||||||
|
query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value RETURN doc";
|
||||||
|
plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
|
||||||
|
assertIndexUsed(["a", "b"], plan);
|
||||||
|
},
|
||||||
|
|
||||||
|
testTwoCompositeIndexesMultipleFieldsPartialLookupOtherIndexCreationOrder: function () {
|
||||||
|
let c = db._collection(cn);
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["a", "b", "c"] });
|
||||||
|
c.ensureIndex({ type: "hash", fields: ["a", "b"] });
|
||||||
|
|
||||||
|
// index on "a" has lower selectivity than index on "b"
|
||||||
|
for (let i = 0; i < 1000; ++i) {
|
||||||
|
c.insert({ a: (i < 100 ? i : 100), b: (i < 200 ? i : 200), c: i });
|
||||||
|
}
|
||||||
|
|
||||||
|
internal.waitForEstimatorSync();
|
||||||
|
let indexes = c.indexes();
|
||||||
|
assertEqual(["a", "b", "c"], indexes[1].fields);
|
||||||
|
assertEqual(["a", "b"], indexes[2].fields);
|
||||||
|
assertTrue(indexes[1].selectivityEstimate > indexes[2].selectivityEstimate);
|
||||||
|
|
||||||
|
let query, plan;
|
||||||
|
|
||||||
|
query = "FOR doc IN @@collection FILTER doc.a == @value && doc.b == @value RETURN doc";
|
||||||
|
plan = AQL_EXPLAIN(query, { "@collection": cn, value: 2 }).plan;
|
||||||
|
assertIndexUsed(["a", "b"], plan);
|
||||||
|
},
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
jsunity.run(indexSelectivitySuite);
|
||||||
|
|
||||||
|
return jsunity.done();
|
Loading…
Reference in New Issue