1
0
Fork 0

don't unfairly favor the edge index when comparing it to more specialized indexes on `_from`/`_to` plus other attributes (#9362)

This commit is contained in:
Jan 2019-07-01 16:32:42 +02:00 committed by GitHub
parent 671380b8fb
commit fe19b8aaae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 107 additions and 118 deletions

View File

@ -224,36 +224,21 @@ Index::FilterCosts ClusterIndex::supportsFilterCondition(
arangodb::aql::AstNode const* node, arangodb::aql::Variable const* reference,
size_t itemsInIndex) const {
switch (_indexType) {
case TRI_IDX_TYPE_PRIMARY_INDEX: {
case TRI_IDX_TYPE_PRIMARY_INDEX: {
if (_engineType == ClusterEngineType::RocksDBEngine) {
std::unordered_map<size_t, std::vector<arangodb::aql::AstNode const*>> found;
std::unordered_set<std::string> nonNullAttributes;
std::size_t values = 0;
SortedIndexAttributeMatcher::matchAttributes(this, node, reference, found,
values, nonNullAttributes,
/*skip evaluation (during execution)*/ false);
Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex);
if (!found.empty()) {
costs.supportsCondition = true;
costs.coveredAttributes = found.size();
costs.estimatedItems = values;
costs.estimatedCosts = static_cast<double>(values);
}
return costs;
return SortedIndexAttributeMatcher::supportsFilterCondition(allIndexes, this, node, reference, itemsInIndex);
}
// MMFiles et al
SimpleAttributeEqualityMatcher matcher(PrimaryIndexAttributes);
return matcher.matchOne(this, node, reference, itemsInIndex);
}
case TRI_IDX_TYPE_GEO_INDEX:
case TRI_IDX_TYPE_GEO1_INDEX:
case TRI_IDX_TYPE_GEO2_INDEX:
case TRI_IDX_TYPE_FULLTEXT_INDEX:
case TRI_IDX_TYPE_IRESEARCH_LINK:
case TRI_IDX_TYPE_NO_ACCESS_INDEX: {
// should not be called for these indexes
return Index::supportsFilterCondition(allIndexes, node, reference, itemsInIndex);
case TRI_IDX_TYPE_EDGE_INDEX: {
if (_engineType == ClusterEngineType::RocksDBEngine) {
return SortedIndexAttributeMatcher::supportsFilterCondition(allIndexes, this, node, reference, itemsInIndex);
}
// MMFiles et al
SimpleAttributeEqualityMatcher matcher(this->_fields);
return matcher.matchOne(this, node, reference, itemsInIndex);
}
case TRI_IDX_TYPE_HASH_INDEX: {
if (_engineType == ClusterEngineType::MMFilesEngine) {
@ -265,11 +250,6 @@ Index::FilterCosts ClusterIndex::supportsFilterCondition(
}
break;
}
case TRI_IDX_TYPE_EDGE_INDEX: {
// same for both engines
SimpleAttributeEqualityMatcher matcher(this->_fields);
return matcher.matchOne(this, node, reference, itemsInIndex);
}
case TRI_IDX_TYPE_SKIPLIST_INDEX:
case TRI_IDX_TYPE_TTL_INDEX:
@ -278,6 +258,16 @@ Index::FilterCosts ClusterIndex::supportsFilterCondition(
return SortedIndexAttributeMatcher::supportsFilterCondition(allIndexes, this,
node, reference, itemsInIndex);
}
case TRI_IDX_TYPE_GEO_INDEX:
case TRI_IDX_TYPE_GEO1_INDEX:
case TRI_IDX_TYPE_GEO2_INDEX:
case TRI_IDX_TYPE_FULLTEXT_INDEX:
case TRI_IDX_TYPE_IRESEARCH_LINK:
case TRI_IDX_TYPE_NO_ACCESS_INDEX: {
// should not be called for these indexes
return Index::supportsFilterCondition(allIndexes, node, reference, itemsInIndex);
}
case TRI_IDX_TYPE_UNKNOWN:
break;

View File

@ -42,9 +42,12 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchOne(arangodb::Index cons
arangodb::aql::AstNode const* node,
arangodb::aql::Variable const* reference,
size_t itemsInIndex) {
size_t postFilterConditions = 0;
std::unordered_set<std::string> nonNullAttributes;
_found.clear();
Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex);
size_t const n = node->numMembers();
for (size_t i = 0; i < n; ++i) {
@ -75,13 +78,19 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchOne(arangodb::Index cons
}
if (which != nullptr) {
// we can use the index
return calculateIndexCosts(index, which, itemsInIndex * values, 1);
// we can use the index for the condition
costs = calculateIndexCosts(index, which, itemsInIndex * values, 1);
} else {
// we cannot use the index for the condition
++postFilterConditions;
}
}
// honor the costs of post-index filter conditions
costs.estimatedCosts += costs.estimatedItems * postFilterConditions;
// set to defaults
return Index::FilterCosts::defaultCosts(itemsInIndex);
return costs;
}
/// @brief match all of the attributes, in any order
@ -94,10 +103,12 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchAll(arangodb::Index cons
_found.clear();
arangodb::aql::AstNode const* which = nullptr;
size_t postFilterConditions = 0;
size_t values = 1;
size_t const n = node->numMembers();
for (size_t i = 0; i < n; ++i) {
bool matches = false;
auto op = node->getMemberUnchecked(i);
if (index->sparse() && (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_NE ||
@ -114,26 +125,29 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchAll(arangodb::Index cons
} else if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_EQ) {
TRI_ASSERT(op->numMembers() == 2);
if (accessFitsIndex(index, op->getMember(0), op->getMember(1), op,
if (accessFitsIndex(index, op->getMemberUnchecked(0), op->getMemberUnchecked(1), op,
reference, nonNullAttributes, false)) {
which = op->getMember(1);
} else if (accessFitsIndex(index, op->getMember(1), op->getMember(0), op,
which = op->getMemberUnchecked(1);
matches = true;
} else if (accessFitsIndex(index, op->getMemberUnchecked(1), op->getMemberUnchecked(0), op,
reference, nonNullAttributes, false)) {
which = op->getMember(0);
which = op->getMemberUnchecked(0);
matches = true;
}
} else if (op->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN) {
TRI_ASSERT(op->numMembers() == 2);
if (accessFitsIndex(index, op->getMember(0), op->getMember(1), op,
if (accessFitsIndex(index, op->getMemberUnchecked(0), op->getMemberUnchecked(1), op,
reference, nonNullAttributes, false)) {
which = op->getMember(0);
which = op->getMemberUnchecked(0);
values *= estimateNumberOfArrayMembers(op->getMember(1));
matches = true;
}
}
if (_found.size() == _attributes.size()) {
// got enough attributes
break;
if (!matches) {
// we cannot use the index for this part of the condition
++postFilterConditions;
}
}
@ -156,8 +170,10 @@ Index::FilterCosts SimpleAttributeEqualityMatcher::matchAll(arangodb::Index cons
costs = calculateIndexCosts(index, which, itemsInIndex * values, _found.size());
}
// honor the costs of post-index filter conditions
costs.estimatedCosts += costs.estimatedItems * postFilterConditions;
// return defaults
return costs;
}

View File

@ -56,6 +56,7 @@ bool SortedIndexAttributeMatcher::accessFitsIndex(
(!other->isConstant() || !(other->isIntValue() || other->isDoubleValue()))) {
// TTL index can only be used for numeric lookup values, no date strings or
// anything else
// TODO: move this into the specific index class
return false;
}
@ -141,13 +142,15 @@ void SortedIndexAttributeMatcher::matchAttributes(
arangodb::Index const* idx, arangodb::aql::AstNode const* node,
arangodb::aql::Variable const* reference,
std::unordered_map<size_t, std::vector<arangodb::aql::AstNode const*>>& found,
size_t& values, std::unordered_set<std::string>& nonNullAttributes, bool isExecution) {
size_t& postFilterConditions, size_t& values,
std::unordered_set<std::string>& nonNullAttributes, bool isExecution) {
// assert we have a proper formed conditiona - naray conjunction
TRI_ASSERT(node->type == arangodb::aql::NODE_TYPE_OPERATOR_NARY_AND);
// inspect the the conjuncts - allowed are binary comparisons and a contains check
for (size_t i = 0; i < node->numMembers(); ++i) {
auto op = node->getMember(i);
bool matches = false;
auto op = node->getMemberUnchecked(i);
switch (op->type) {
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_NE:
@ -157,21 +160,22 @@ void SortedIndexAttributeMatcher::matchAttributes(
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GT:
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_GE:
TRI_ASSERT(op->numMembers() == 2);
accessFitsIndex(idx, op->getMember(0), op->getMember(1), op, reference,
found, nonNullAttributes, isExecution);
accessFitsIndex(idx, op->getMember(1), op->getMember(0), op, reference,
found, nonNullAttributes, isExecution);
matches = accessFitsIndex(idx, op->getMemberUnchecked(0), op->getMemberUnchecked(1), op, reference,
found, nonNullAttributes, isExecution);
matches |= accessFitsIndex(idx, op->getMemberUnchecked(1), op->getMemberUnchecked(0), op, reference,
found, nonNullAttributes, isExecution);
break;
case arangodb::aql::NODE_TYPE_OPERATOR_BINARY_IN:
if (accessFitsIndex(idx, op->getMember(0), op->getMember(1), op,
if (accessFitsIndex(idx, op->getMemberUnchecked(0), op->getMemberUnchecked(1), op,
reference, found, nonNullAttributes, isExecution)) {
if (op->getMember(1)->isAttributeAccessForVariable(reference, /*indexed access*/ false)) {
matches = true;
if (op->getMemberUnchecked(1)->isAttributeAccessForVariable(reference, /*indexed access*/ false)) {
// 'abc' IN doc.attr[*]
++values;
} else {
size_t av = SimpleAttributeEqualityMatcher::estimateNumberOfArrayMembers(
op->getMember(1));
op->getMemberUnchecked(1));
if (av > 1) {
// attr IN [ a, b, c ] => this will produce multiple items, so
// count them!
@ -182,8 +186,14 @@ void SortedIndexAttributeMatcher::matchAttributes(
break;
default:
matches = false;
break;
}
if (!matches) {
// count the number of conditions we will not be able to satisfy
++postFilterConditions;
}
}
}
@ -201,7 +211,8 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition(
std::unordered_map<size_t, std::vector<arangodb::aql::AstNode const*>> found;
std::unordered_set<std::string> nonNullAttributes;
size_t values = 0;
matchAttributes(idx, node, reference, found, values, nonNullAttributes, false);
size_t postFilterConditions = 0;
matchAttributes(idx, node, reference, found, postFilterConditions, values, nonNullAttributes, false);
bool lastContainsEquality = true;
size_t attributesCovered = 0;
@ -212,10 +223,13 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition(
for (size_t i = 0; i < idx->fields().size(); ++i) {
auto it = found.find(i);
if (it == found.end()) {
// index attribute not covered by condition
if (it == found.end() || !lastContainsEquality) {
// index attribute not covered by condition, or unsupported condition.
// must abort
break;
}
++attributesCovered;
// check if the current condition contains an equality condition
auto const& nodes = (*it).second;
@ -228,12 +242,6 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition(
}
}
if (!lastContainsEquality) {
// unsupported condition. must abort
break;
}
++attributesCovered;
if (containsEquality) {
++attributesCoveredByEquality;
estimatedCosts /= equalityReductionFactor;
@ -276,18 +284,13 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition(
costs.estimatedCosts = 0.0;
} else {
costs.estimatedItems = values;
// ALTERNATIVE: estimatedCost = static_cast<double>(estimatedItems * values);
costs.estimatedCosts = (std::max)(static_cast<double>(1),
std::log2(static_cast<double>(itemsInIndex)) * values);
// cost is already low... now slightly prioritize unique indexes
costs.estimatedCosts *= 0.995 - 0.05 * (idx->fields().size() - 1);
}
return costs;
}
if (attributesCovered > 0 &&
(!idx->sparse() || attributesCovered == idx->fields().size())) {
// cost is already low... now slightly prioritize unique indexes
costs.estimatedCosts *= 0.995 - 0.05 * (idx->fields().size() - 1);
} else if (attributesCovered > 0 &&
(!idx->sparse() || attributesCovered == idx->fields().size())) {
// if the condition contains at least one index attribute and is not sparse,
// or the index is sparse and all attributes are covered by the condition,
// then it can be used (note: additional checks for condition parts in
@ -357,11 +360,14 @@ Index::FilterCosts SortedIndexAttributeMatcher::supportsFilterCondition(
// slightly prefer indexes that cover more attributes
costs.estimatedCosts -= (attributesCovered - 1) * 0.02;
}
return costs;
} else {
// index does not help for this condition
TRI_ASSERT(!costs.supportsCondition);
}
// honor the costs of post-index filter conditions
costs.estimatedCosts += costs.estimatedItems * postFilterConditions;
// index does not help for this condition
TRI_ASSERT(!costs.supportsCondition);
return costs;
}
@ -420,8 +426,9 @@ arangodb::aql::AstNode* SortedIndexAttributeMatcher::specializeCondition(
std::unordered_map<size_t, std::vector<arangodb::aql::AstNode const*>> found;
std::unordered_set<std::string> nonNullAttributes;
size_t values = 0;
matchAttributes(idx, node, reference, found, values, nonNullAttributes, false);
size_t values = 0; // ignored here
size_t postFilterConditions = 0; // ignored here
matchAttributes(idx, node, reference, found, postFilterConditions, values, nonNullAttributes, false);
std::vector<arangodb::aql::AstNode const*> children;
bool lastContainsEquality = true;
@ -429,13 +436,9 @@ arangodb::aql::AstNode* SortedIndexAttributeMatcher::specializeCondition(
for (size_t i = 0; i < idx->fields().size(); ++i) {
auto it = found.find(i);
if (it == found.end()) {
// index attribute not covered by condition
break;
}
if (!lastContainsEquality) {
// unsupported condition. must abort
if (it == found.end() || !lastContainsEquality) {
// index attribute not covered by condition, or unsupported condition.
// must abort
break;
}

View File

@ -58,8 +58,8 @@ arangodb::aql::AstNode* specializeCondition(arangodb::Index const* index,
void matchAttributes(arangodb::Index const* index, arangodb::aql::AstNode const* node,
arangodb::aql::Variable const* reference,
std::unordered_map<size_t, std::vector<arangodb::aql::AstNode const*>>& found,
size_t& values, std::unordered_set<std::string>& nonNullAttributes,
bool isExecution);
size_t& postFilterConditions, size_t& values,
std::unordered_set<std::string>& nonNullAttributes, bool isExecution);
/// @brief whether or not the access fits
bool accessFitsIndex(

View File

@ -709,8 +709,8 @@ std::unique_ptr<IndexIterator> MMFilesPersistentIndex::iteratorForCondition(
std::unordered_map<size_t, std::vector<arangodb::aql::AstNode const*>> found;
std::unordered_set<std::string> nonNullAttributes;
size_t unused = 0;
SortedIndexAttributeMatcher::matchAttributes(this, node, reference, found,
unused, nonNullAttributes, true);
SortedIndexAttributeMatcher::matchAttributes(this, node, reference, found, unused,
unused, nonNullAttributes, true);
// found contains all attributes that are relevant for this node.
// It might be less than fields().

View File

@ -30,7 +30,7 @@
#include "Basics/VelocyPackHelper.h"
#include "Cache/CachedValue.h"
#include "Cache/TransactionalCache.h"
#include "Indexes/SimpleAttributeEqualityMatcher.h"
#include "Indexes/SortedIndexAttributeMatcher.h"
#include "RocksDBEdgeIndex.h"
#include "RocksDBEngine/RocksDBCollection.h"
#include "RocksDBEngine/RocksDBCommon.h"
@ -523,8 +523,7 @@ Index::FilterCosts RocksDBEdgeIndex::supportsFilterCondition(
std::vector<std::shared_ptr<arangodb::Index>> const& allIndexes,
arangodb::aql::AstNode const* node, arangodb::aql::Variable const* reference,
size_t itemsInIndex) const {
SimpleAttributeEqualityMatcher matcher(this->_fields);
return matcher.matchOne(this, node, reference, itemsInIndex);
return SortedIndexAttributeMatcher::supportsFilterCondition(allIndexes, this, node, reference, itemsInIndex);
}
/// @brief creates an IndexIterator for the given Condition
@ -560,9 +559,7 @@ std::unique_ptr<IndexIterator> RocksDBEdgeIndex::iteratorForCondition(
/// @brief specializes the condition for use with the index
arangodb::aql::AstNode* RocksDBEdgeIndex::specializeCondition(
arangodb::aql::AstNode* node, arangodb::aql::Variable const* reference) const {
// SimpleAttributeEqualityMatcher matcher(IndexAttributes);
SimpleAttributeEqualityMatcher matcher(this->_fields);
return matcher.specializeOne(this, node, reference);
return SortedIndexAttributeMatcher::specializeCondition(this, node, reference);
}
static std::string FindMedian(rocksdb::Iterator* it, std::string const& start,

View File

@ -673,22 +673,7 @@ Index::FilterCosts RocksDBPrimaryIndex::supportsFilterCondition(
std::vector<std::shared_ptr<arangodb::Index>> const& allIndexes,
arangodb::aql::AstNode const* node, arangodb::aql::Variable const* reference,
size_t itemsInIndex) const {
std::unordered_map<size_t, std::vector<arangodb::aql::AstNode const*>> found;
std::unordered_set<std::string> nonNullAttributes;
std::size_t values = 0;
SortedIndexAttributeMatcher::matchAttributes(this, node, reference, found,
values, nonNullAttributes,
/*skip evaluation (during execution)*/ false);
Index::FilterCosts costs = Index::FilterCosts::defaultCosts(itemsInIndex);
if (!found.empty()) {
costs.supportsCondition = true;
costs.coveredAttributes = 1; // always a single attribute
costs.estimatedItems = values;
costs.estimatedCosts = static_cast<double>(values);
}
return costs;
return SortedIndexAttributeMatcher::supportsFilterCondition(allIndexes, this, node, reference, itemsInIndex);
}
Index::SortCosts RocksDBPrimaryIndex::supportsSortCondition(arangodb::aql::SortCondition const* sortCondition,

View File

@ -1079,7 +1079,7 @@ std::unique_ptr<IndexIterator> RocksDBVPackIndex::iteratorForCondition(
size_t unused = 0;
SortedIndexAttributeMatcher::matchAttributes(this, node, reference, found,
unused, nonNullAttributes, true);
unused, unused, nonNullAttributes, true);
// found contains all attributes that are relevant for this node.
// It might be less than fields().

View File

@ -2541,14 +2541,14 @@ function MeasurementsMovedFromAQLSuite() {
validateNumericValues(actual, expected);
},
testAbsoluteBetweness: function () {
testAbsoluteBetweeness: function () {
var actual = g._absoluteBetweenness(vertexIds.Anton);
var expected = { };
expected[vertexIds.Anton] = 0;
validateNumericValues(actual, expected);
},
testAbsoluteBetwenessAll: function () {
testAbsoluteBetweenessAll: function () {
var actual = g._absoluteBetweenness({});
var expected = { };
expected[vertexIds.Anton] = 0;
@ -2561,7 +2561,7 @@ function MeasurementsMovedFromAQLSuite() {
validateNumericValues(actual, expected);
},
testAbsoluteBetwenessExample: function () {
testAbsoluteBetweenessExample: function () {
var actual = g._absoluteBetweenness({gender: "female"});
var expected = { };
expected[vertexIds.Berta] = 8;
@ -2569,7 +2569,7 @@ function MeasurementsMovedFromAQLSuite() {
validateNumericValues(actual, expected);
},
testAbsoluteBetwenessAllOutbound: function () {
testAbsoluteBetweenessAllOutbound: function () {
var actual = g._absoluteBetweenness({}, {direction: "outbound"});
var expected = { };
expected[vertexIds.Anton] = 0;
@ -2582,7 +2582,7 @@ function MeasurementsMovedFromAQLSuite() {
validateNumericValues(actual, expected);
},
testAbsoluteBetwenessAllInbound: function () {
testAbsoluteBetweenessAllInbound: function () {
var actual = g._absoluteBetweenness({}, {direction: "inbound"});
var expected = { };
expected[vertexIds.Anton] = 0;
@ -2595,7 +2595,7 @@ function MeasurementsMovedFromAQLSuite() {
validateNumericValues(actual, expected);
},
testBetwenessAny: function () {
testBetweenessAny: function () {
var actual = g._betweenness();
var expected = { };
expected[vertexIds.Anton] = 0;
@ -2608,7 +2608,7 @@ function MeasurementsMovedFromAQLSuite() {
validateNumericValues(actual, expected);
},
testBetwenessOutbound: function () {
testBetweenessOutbound: function () {
var actual = g._betweenness({direction: "outbound"});
var expected = { };
expected[vertexIds.Anton] = 0;
@ -2621,7 +2621,7 @@ function MeasurementsMovedFromAQLSuite() {
validateNumericValues(actual, expected);
},
testBetwenessInbound: function () {
testBetweenessInbound: function () {
var actual = g._betweenness({direction: "inbound"});
var expected = { };
expected[vertexIds.Anton] = 0;
@ -2634,8 +2634,6 @@ function MeasurementsMovedFromAQLSuite() {
validateNumericValues(actual, expected);
}
};
}