mirror of https://gitee.com/bigwinds/arangodb
fix scrambling of AstNodes that were in use multiple times (#7978)
This commit is contained in:
parent
3c828347dc
commit
d394a6b988
10
CHANGELOG
10
CHANGELOG
|
@ -1,10 +1,18 @@
|
|||
v3.4.3 (XXXX-XX-XX)
|
||||
-------------------
|
||||
|
||||
* fixed an issue with AQL query IN index lookup conditions being converted into
|
||||
empty arrays when they were shared between multiple nodes of a lookup condition
|
||||
that used an IN array lookup in an OR that was multiplied due to DNF transformations
|
||||
|
||||
This issue affected queries such as the following
|
||||
|
||||
FILTER (... && ...) || doc.indexAttribute IN non-empty-array
|
||||
|
||||
* added "peakMemoryUsage" in query results figures, showing the peak memory
|
||||
usage of the executed query. In a cluster, the value contains the peak memory
|
||||
usage across all shards, but it is not summed up across shards.
|
||||
|
||||
|
||||
|
||||
v3.4.2 (2019-01-15)
|
||||
-------------------
|
||||
|
|
|
@ -1046,16 +1046,24 @@ AstNode* Ast::createNodeArray(size_t size) {
|
|||
}
|
||||
|
||||
/// @brief create an AST unique array node, AND-merged from two other arrays
|
||||
/// the resulting array has no particular order
|
||||
AstNode* Ast::createNodeIntersectedArray(AstNode const* lhs, AstNode const* rhs) {
|
||||
TRI_ASSERT(lhs->isArray() && lhs->isConstant());
|
||||
TRI_ASSERT(rhs->isArray() && rhs->isConstant());
|
||||
|
||||
size_t const nl = lhs->numMembers();
|
||||
size_t const nr = rhs->numMembers();
|
||||
size_t nl = lhs->numMembers();
|
||||
size_t nr = rhs->numMembers();
|
||||
|
||||
if (nl > nr) {
|
||||
// we want lhs to be the shorter of the two arrays, so we use less
|
||||
// memory for the lookup cache
|
||||
std::swap(lhs, rhs);
|
||||
std::swap(nl, nr);
|
||||
}
|
||||
|
||||
std::unordered_map<VPackSlice, AstNode const*, arangodb::basics::VelocyPackHelper::VPackHash,
|
||||
arangodb::basics::VelocyPackHelper::VPackEqual>
|
||||
cache(nl + nr, arangodb::basics::VelocyPackHelper::VPackHash(),
|
||||
cache(nl, arangodb::basics::VelocyPackHelper::VPackHash(),
|
||||
arangodb::basics::VelocyPackHelper::VPackEqual());
|
||||
|
||||
for (size_t i = 0; i < nl; ++i) {
|
||||
|
@ -1065,7 +1073,7 @@ AstNode* Ast::createNodeIntersectedArray(AstNode const* lhs, AstNode const* rhs)
|
|||
cache.emplace(slice, member);
|
||||
}
|
||||
|
||||
auto node = createNodeArray(nr / 2);
|
||||
auto node = createNodeArray(cache.size() + nr);
|
||||
|
||||
for (size_t i = 0; i < nr; ++i) {
|
||||
auto member = rhs->getMemberUnchecked(i);
|
||||
|
@ -1082,6 +1090,7 @@ AstNode* Ast::createNodeIntersectedArray(AstNode const* lhs, AstNode const* rhs)
|
|||
}
|
||||
|
||||
/// @brief create an AST unique array node, OR-merged from two other arrays
|
||||
/// the resulting array will be sorted by value
|
||||
AstNode* Ast::createNodeUnionizedArray(AstNode const* lhs, AstNode const* rhs) {
|
||||
TRI_ASSERT(lhs->isArray() && lhs->isConstant());
|
||||
TRI_ASSERT(rhs->isArray() && rhs->isConstant());
|
||||
|
@ -1089,6 +1098,8 @@ AstNode* Ast::createNodeUnionizedArray(AstNode const* lhs, AstNode const* rhs) {
|
|||
size_t const nl = lhs->numMembers();
|
||||
size_t const nr = rhs->numMembers();
|
||||
|
||||
auto node = createNodeArray(nl + nr);
|
||||
|
||||
std::unordered_map<VPackSlice, AstNode const*, arangodb::basics::VelocyPackHelper::VPackHash,
|
||||
arangodb::basics::VelocyPackHelper::VPackEqual>
|
||||
cache(nl + nr, arangodb::basics::VelocyPackHelper::VPackHash(),
|
||||
|
@ -1103,14 +1114,12 @@ AstNode* Ast::createNodeUnionizedArray(AstNode const* lhs, AstNode const* rhs) {
|
|||
}
|
||||
VPackSlice slice = member->computeValue();
|
||||
|
||||
cache.emplace(slice, member);
|
||||
if (cache.emplace(slice, member).second) {
|
||||
// only insert unique values
|
||||
node->addMember(member);
|
||||
}
|
||||
}
|
||||
|
||||
auto node = createNodeArray(cache.size());
|
||||
|
||||
for (auto& it : cache) {
|
||||
node->addMember(it.second);
|
||||
}
|
||||
node->sort();
|
||||
|
||||
return node;
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "Basics/AttributeNameParser.h"
|
||||
#include "Basics/Exceptions.h"
|
||||
#include "Basics/NumberUtils.h"
|
||||
#include "Basics/SmallVector.h"
|
||||
#include "Basics/StaticStrings.h"
|
||||
#include "Basics/StringUtils.h"
|
||||
#include "Basics/VelocyPackHelper.h"
|
||||
|
@ -323,14 +324,14 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
|
|||
|
||||
if (n != usedIndexes.size()) {
|
||||
// sorting will break if the number of ORs is unequal to the number of
|
||||
// indexes
|
||||
// but we shouldn't have got here then
|
||||
// indexes but we shouldn't have got here then
|
||||
TRI_ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
typedef std::pair<arangodb::aql::AstNode*, transaction::Methods::IndexHandle> ConditionData;
|
||||
std::vector<ConditionData*> conditionData;
|
||||
SmallVector<ConditionData*>::allocator_type::arena_type a;
|
||||
SmallVector<ConditionData*> conditionData{a};
|
||||
|
||||
auto cleanup = [&conditionData]() -> void {
|
||||
for (auto& it : conditionData) {
|
||||
|
@ -342,6 +343,8 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
|
|||
|
||||
std::vector<arangodb::aql::ConditionPart> parts;
|
||||
parts.reserve(n);
|
||||
|
||||
std::pair<arangodb::aql::Variable const*, std::vector<arangodb::basics::AttributeName>> result;
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
// sort the conditions of each AND
|
||||
|
@ -370,7 +373,8 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
|
|||
auto rhs = operand->getMember(1);
|
||||
|
||||
if (lhs->type == arangodb::aql::AstNodeType::NODE_TYPE_ATTRIBUTE_ACCESS) {
|
||||
std::pair<arangodb::aql::Variable const*, std::vector<arangodb::basics::AttributeName>> result;
|
||||
result.first = nullptr;
|
||||
result.second.clear();
|
||||
|
||||
if (rhs->isConstant() && lhs->isAttributeAccessForVariable(result) &&
|
||||
result.first == variable &&
|
||||
|
@ -383,15 +387,15 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
|
|||
// vector is now responsible for data
|
||||
auto p = data.release();
|
||||
// also add the pointer to the (non-owning) parts vector
|
||||
parts.emplace_back(
|
||||
arangodb::aql::ConditionPart(result.first, result.second, operand,
|
||||
arangodb::aql::AttributeSideType::ATTRIBUTE_LEFT, p));
|
||||
parts.emplace_back(result.first, result.second, operand,
|
||||
arangodb::aql::AttributeSideType::ATTRIBUTE_LEFT, p);
|
||||
}
|
||||
}
|
||||
|
||||
if (rhs->type == arangodb::aql::AstNodeType::NODE_TYPE_ATTRIBUTE_ACCESS ||
|
||||
rhs->type == arangodb::aql::AstNodeType::NODE_TYPE_EXPANSION) {
|
||||
std::pair<arangodb::aql::Variable const*, std::vector<arangodb::basics::AttributeName>> result;
|
||||
result.first = nullptr;
|
||||
result.second.clear();
|
||||
|
||||
if (lhs->isConstant() && rhs->isAttributeAccessForVariable(result) &&
|
||||
result.first == variable) {
|
||||
|
@ -402,9 +406,8 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
|
|||
// vector is now responsible for data
|
||||
auto p = data.release();
|
||||
// also add the pointer to the (non-owning) parts vector
|
||||
parts.emplace_back(
|
||||
arangodb::aql::ConditionPart(result.first, result.second, operand,
|
||||
arangodb::aql::AttributeSideType::ATTRIBUTE_RIGHT, p));
|
||||
parts.emplace_back(result.first, result.second, operand,
|
||||
arangodb::aql::AttributeSideType::ATTRIBUTE_RIGHT, p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -415,8 +418,8 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
|
|||
|
||||
// check if all parts use the same variable and attribute
|
||||
for (size_t i = 1; i < n; ++i) {
|
||||
auto& lhs = parts[i - 1];
|
||||
auto& rhs = parts[i];
|
||||
auto const& lhs = parts[i - 1];
|
||||
auto const& rhs = parts[i];
|
||||
|
||||
if (lhs.variable != rhs.variable || lhs.attributeName != rhs.attributeName) {
|
||||
// oops, the different OR parts are on different variables or attributes
|
||||
|
@ -428,7 +431,7 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
|
|||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
auto& p = parts[i];
|
||||
|
||||
|
||||
if (p.operatorType == arangodb::aql::AstNodeType::NODE_TYPE_OPERATOR_BINARY_IN &&
|
||||
p.valueNode->isArray()) {
|
||||
TRI_ASSERT(p.valueNode->isConstant());
|
||||
|
@ -439,27 +442,39 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
|
|||
auto emptyArray = ast->createNodeArray();
|
||||
auto mergedIn =
|
||||
ast->createNodeUnionizedArray(parts[previousIn].valueNode, p.valueNode);
|
||||
|
||||
arangodb::aql::AstNode* clone = ast->clone(root->getMember(previousIn));
|
||||
root->changeMember(previousIn, clone);
|
||||
static_cast<ConditionData*>(parts[previousIn].data)->first = clone;
|
||||
|
||||
clone = ast->clone(root->getMember(i));
|
||||
root->changeMember(i, clone);
|
||||
static_cast<ConditionData*>(parts[i].data)->first = clone;
|
||||
|
||||
// can now edit nodes in place...
|
||||
parts[previousIn].valueNode = mergedIn;
|
||||
parts[i].valueNode = emptyArray;
|
||||
|
||||
// must edit nodes in place; TODO change so we can replace with copy
|
||||
|
||||
auto n1 = root->getMember(previousIn)->getMember(0);
|
||||
TEMPORARILY_UNLOCK_NODE(n1);
|
||||
n1->changeMember(1, mergedIn);
|
||||
|
||||
auto n2 = root->getMember(i)->getMember(0);
|
||||
{
|
||||
auto n1 = root->getMember(previousIn)->getMember(0);
|
||||
TRI_ASSERT(n1->type == arangodb::aql::AstNodeType::NODE_TYPE_OPERATOR_BINARY_IN);
|
||||
TEMPORARILY_UNLOCK_NODE(n1);
|
||||
n1->changeMember(1, mergedIn);
|
||||
}
|
||||
|
||||
p.valueNode = emptyArray;
|
||||
{
|
||||
auto n2 = root->getMember(i)->getMember(0);
|
||||
TRI_ASSERT(n2->type == arangodb::aql::AstNodeType::NODE_TYPE_OPERATOR_BINARY_IN);
|
||||
TEMPORARILY_UNLOCK_NODE(n2);
|
||||
n2->changeMember(1, emptyArray);
|
||||
}
|
||||
|
||||
} else {
|
||||
// note first IN
|
||||
previousIn = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// now sort all conditions by variable name, attribute name, attribute value
|
||||
std::sort(parts.begin(), parts.end(),
|
||||
[](arangodb::aql::ConditionPart const& lhs,
|
||||
|
@ -511,13 +526,13 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
|
|||
});
|
||||
|
||||
TRI_ASSERT(parts.size() == conditionData.size());
|
||||
|
||||
|
||||
// clean up
|
||||
usedIndexes.clear();
|
||||
while (root->numMembers()) {
|
||||
root->removeMemberUnchecked(0);
|
||||
}
|
||||
|
||||
usedIndexes.clear();
|
||||
std::unordered_set<std::string> seenIndexConditions;
|
||||
|
||||
// and rebuild
|
||||
|
@ -552,7 +567,7 @@ bool transaction::Methods::sortOrs(arangodb::aql::Ast* ast, arangodb::aql::AstNo
|
|||
usedIndexes.emplace_back(conditionData->second);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -663,8 +663,25 @@ function optimizerIndexesInOrTestSuite () {
|
|||
assertTrue(results.stats.scannedIndex > 0);
|
||||
assertEqual(0, results.stats.scannedFull);
|
||||
});
|
||||
}
|
||||
},
|
||||
|
||||
testInOrReplacements : function() {
|
||||
c.ensureSkiplist("foo");
|
||||
|
||||
c.insert({ value1: "testi", value2: "foobar", foo: 0 });
|
||||
c.insert({ value1: "testi", value2: "foobar", foo: 1 });
|
||||
c.insert({ value1: "testi", value2: "foobar", foo: 2 });
|
||||
c.insert({ value1: "testi", value2: "foobar", foo: 5 });
|
||||
c.insert({ value1: "testi", value2: "foobar", foo: 9 });
|
||||
c.insert({ value1: "testi", value2: "foobar", foo: 10 });
|
||||
c.insert({ value1: "testi", value2: "foobar", foo: 11 });
|
||||
|
||||
let query = "FOR doc IN " + c.name() + " FILTER (LIKE(doc.value1, '%testi%') OR LIKE(doc.value2, '%foobar%')) AND doc.foo IN [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] RETURN doc";
|
||||
|
||||
let results = AQL_EXECUTE(query);
|
||||
assertEqual(5, results.json.length);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue