1
0
Fork 0

fix scrambling of AstNodes that were in use multiple times (#7978)

This commit is contained in:
Jan 2019-01-19 18:54:08 +01:00 committed by GitHub
parent 3c828347dc
commit d394a6b988
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 88 additions and 39 deletions

View File

@ -1,10 +1,18 @@
v3.4.3 (XXXX-XX-XX)
-------------------
* fixed an issue with AQL query IN index lookup conditions being converted into
empty arrays when they were shared between multiple nodes of a lookup condition
that used an IN array lookup in an OR that was multiplied due to DNF transformations
This issue affected queries such as the following
FILTER (... && ...) || doc.indexAttribute IN non-empty-array
* added "peakMemoryUsage" in query results figures, showing the peak memory
usage of the executed query. In a cluster, the value contains the peak memory
usage across all shards, but it is not summed up across shards.
v3.4.2 (2019-01-15)
-------------------

View File

@ -1046,16 +1046,24 @@ AstNode* Ast::createNodeArray(size_t size) {
}
/// @brief create an AST unique array node, AND-merged from two other arrays
/// the resulting array has no particular order
AstNode* Ast::createNodeIntersectedArray(AstNode const* lhs, AstNode const* rhs) {
TRI_ASSERT(lhs->isArray() && lhs->isConstant());
TRI_ASSERT(rhs->isArray() && rhs->isConstant());
size_t const nl = lhs->numMembers();
size_t const nr = rhs->numMembers();
size_t nl = lhs->numMembers();
size_t nr = rhs->numMembers();
if (nl > nr) {
// we want lhs to be the shorter of the two arrays, so we use less
// memory for the lookup cache
std::swap(lhs, rhs);
std::swap(nl, nr);
}
std::unordered_map<VPackSlice, AstNode const*, arangodb::basics::VelocyPackHelper::VPackHash,
arangodb::basics::VelocyPackHelper::VPackEqual>
cache(nl + nr, arangodb::basics::VelocyPackHelper::VPackHash(),
cache(nl, arangodb::basics::VelocyPackHelper::VPackHash(),
arangodb::basics::VelocyPackHelper::VPackEqual());
for (size_t i = 0; i < nl; ++i) {
@ -1065,7 +1073,7 @@ AstNode* Ast::createNodeIntersectedArray(AstNode const* lhs, AstNode const* rhs)
cache.emplace(slice, member);
}
auto node = createNodeArray(nr / 2);
auto node = createNodeArray(cache.size() + nr);
for (size_t i = 0; i < nr; ++i) {
auto member = rhs->getMemberUnchecked(i);
@ -1082,6 +1090,7 @@ AstNode* Ast::createNodeIntersectedArray(AstNode const* lhs, AstNode const* rhs)
}
/// @brief create an AST unique array node, OR-merged from two other arrays
/// the resulting array will be sorted by value
AstNode* Ast::createNodeUnionizedArray(AstNode const* lhs, AstNode const* rhs) {
TRI_ASSERT(lhs->isArray() && lhs->isConstant());
TRI_ASSERT(rhs->isArray() && rhs->isConstant());
@ -1089,6 +1098,8 @@ AstNode* Ast::createNodeUnionizedArray(AstNode const* lhs, AstNode const* rhs) {
size_t const nl = lhs->numMembers();
size_t const nr = rhs->numMembers();
auto node = createNodeArray(nl + nr);
std::unordered_map<VPackSlice, AstNode const*, arangodb::basics::VelocyPackHelper::VPackHash,
arangodb::basics::VelocyPackHelper::VPackEqual>
cache(nl + nr, arangodb::basics::VelocyPackHelper::VPackHash(),
@ -1103,14 +1114,12 @@ AstNode* Ast::createNodeUnionizedArray(AstNode const* lhs, AstNode const* rhs) {
}
VPackSlice slice = member->computeValue();
cache.emplace(slice, member);
if (cache.emplace(slice, member).second) {
// only insert unique values
node->addMember(member);
}
}
auto node = createNodeArray(cache.size());
for (auto& it : cache) {
node->addMember(it.second);
}
node->sort();
return node;

View File

@ -30,6 +30,7 @@
#include "Basics/AttributeNameParser.h"
#include "Basics/Exceptions.h"
#include "Basics/NumberUtils.h"
#include "Basics/SmallVector.h"
#include "Basics/StaticStrings.h"
#include "Basics/StringUtils.h"
#include "Basics/VelocyPackHelper.h"
@ -323,14 +324,14 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
if (n != usedIndexes.size()) {
// sorting will break if the number of ORs is unequal to the number of
// indexes
// but we shouldn't have got here then
// indexes but we shouldn't have got here then
TRI_ASSERT(false);
return false;
}
typedef std::pair<arangodb::aql::AstNode*, transaction::Methods::IndexHandle> ConditionData;
std::vector<ConditionData*> conditionData;
SmallVector<ConditionData*>::allocator_type::arena_type a;
SmallVector<ConditionData*> conditionData{a};
auto cleanup = [&conditionData]() -> void {
for (auto& it : conditionData) {
@ -342,6 +343,8 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
std::vector<arangodb::aql::ConditionPart> parts;
parts.reserve(n);
std::pair<arangodb::aql::Variable const*, std::vector<arangodb::basics::AttributeName>> result;
for (size_t i = 0; i < n; ++i) {
// sort the conditions of each AND
@ -370,7 +373,8 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
auto rhs = operand->getMember(1);
if (lhs->type == arangodb::aql::AstNodeType::NODE_TYPE_ATTRIBUTE_ACCESS) {
std::pair<arangodb::aql::Variable const*, std::vector<arangodb::basics::AttributeName>> result;
result.first = nullptr;
result.second.clear();
if (rhs->isConstant() && lhs->isAttributeAccessForVariable(result) &&
result.first == variable &&
@ -383,15 +387,15 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
// vector is now responsible for data
auto p = data.release();
// also add the pointer to the (non-owning) parts vector
parts.emplace_back(
arangodb::aql::ConditionPart(result.first, result.second, operand,
arangodb::aql::AttributeSideType::ATTRIBUTE_LEFT, p));
parts.emplace_back(result.first, result.second, operand,
arangodb::aql::AttributeSideType::ATTRIBUTE_LEFT, p);
}
}
if (rhs->type == arangodb::aql::AstNodeType::NODE_TYPE_ATTRIBUTE_ACCESS ||
rhs->type == arangodb::aql::AstNodeType::NODE_TYPE_EXPANSION) {
std::pair<arangodb::aql::Variable const*, std::vector<arangodb::basics::AttributeName>> result;
result.first = nullptr;
result.second.clear();
if (lhs->isConstant() && rhs->isAttributeAccessForVariable(result) &&
result.first == variable) {
@ -402,9 +406,8 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
// vector is now responsible for data
auto p = data.release();
// also add the pointer to the (non-owning) parts vector
parts.emplace_back(
arangodb::aql::ConditionPart(result.first, result.second, operand,
arangodb::aql::AttributeSideType::ATTRIBUTE_RIGHT, p));
parts.emplace_back(result.first, result.second, operand,
arangodb::aql::AttributeSideType::ATTRIBUTE_RIGHT, p);
}
}
}
@ -415,8 +418,8 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
// check if all parts use the same variable and attribute
for (size_t i = 1; i < n; ++i) {
auto& lhs = parts[i - 1];
auto& rhs = parts[i];
auto const& lhs = parts[i - 1];
auto const& rhs = parts[i];
if (lhs.variable != rhs.variable || lhs.attributeName != rhs.attributeName) {
// oops, the different OR parts are on different variables or attributes
@ -428,7 +431,7 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
for (size_t i = 0; i < n; ++i) {
auto& p = parts[i];
if (p.operatorType == arangodb::aql::AstNodeType::NODE_TYPE_OPERATOR_BINARY_IN &&
p.valueNode->isArray()) {
TRI_ASSERT(p.valueNode->isConstant());
@ -439,27 +442,39 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
auto emptyArray = ast->createNodeArray();
auto mergedIn =
ast->createNodeUnionizedArray(parts[previousIn].valueNode, p.valueNode);
arangodb::aql::AstNode* clone = ast->clone(root->getMember(previousIn));
root->changeMember(previousIn, clone);
static_cast<ConditionData*>(parts[previousIn].data)->first = clone;
clone = ast->clone(root->getMember(i));
root->changeMember(i, clone);
static_cast<ConditionData*>(parts[i].data)->first = clone;
// can now edit nodes in place...
parts[previousIn].valueNode = mergedIn;
parts[i].valueNode = emptyArray;
// must edit nodes in place; TODO change so we can replace with copy
auto n1 = root->getMember(previousIn)->getMember(0);
TEMPORARILY_UNLOCK_NODE(n1);
n1->changeMember(1, mergedIn);
auto n2 = root->getMember(i)->getMember(0);
{
auto n1 = root->getMember(previousIn)->getMember(0);
TRI_ASSERT(n1->type == arangodb::aql::AstNodeType::NODE_TYPE_OPERATOR_BINARY_IN);
TEMPORARILY_UNLOCK_NODE(n1);
n1->changeMember(1, mergedIn);
}
p.valueNode = emptyArray;
{
auto n2 = root->getMember(i)->getMember(0);
TRI_ASSERT(n2->type == arangodb::aql::AstNodeType::NODE_TYPE_OPERATOR_BINARY_IN);
TEMPORARILY_UNLOCK_NODE(n2);
n2->changeMember(1, emptyArray);
}
} else {
// note first IN
previousIn = i;
}
}
}
// now sort all conditions by variable name, attribute name, attribute value
std::sort(parts.begin(), parts.end(),
[](arangodb::aql::ConditionPart const& lhs,
@ -511,13 +526,13 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
});
TRI_ASSERT(parts.size() == conditionData.size());
// clean up
usedIndexes.clear();
while (root->numMembers()) {
root->removeMemberUnchecked(0);
}
usedIndexes.clear();
std::unordered_set<std::string> seenIndexConditions;
// and rebuild
@ -552,7 +567,7 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
usedIndexes.emplace_back(conditionData->second);
}
}
return true;
}

View File

@ -663,8 +663,25 @@ function optimizerIndexesInOrTestSuite () {
assertTrue(results.stats.scannedIndex > 0);
assertEqual(0, results.stats.scannedFull);
});
}
},
testInOrReplacements : function() {
c.ensureSkiplist("foo");
c.insert({ value1: "testi", value2: "foobar", foo: 0 });
c.insert({ value1: "testi", value2: "foobar", foo: 1 });
c.insert({ value1: "testi", value2: "foobar", foo: 2 });
c.insert({ value1: "testi", value2: "foobar", foo: 5 });
c.insert({ value1: "testi", value2: "foobar", foo: 9 });
c.insert({ value1: "testi", value2: "foobar", foo: 10 });
c.insert({ value1: "testi", value2: "foobar", foo: 11 });
let query = "FOR doc IN " + c.name() + " FILTER (LIKE(doc.value1, '%testi%') OR LIKE(doc.value2, '%foobar%')) AND doc.foo IN [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] RETURN doc";
let results = AQL_EXECUTE(query);
assertEqual(5, results.json.length);
}
};
}