1
0
Fork 0
arangodb/arangod/Aql/Condition.cpp

1794 lines
62 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Steemann
////////////////////////////////////////////////////////////////////////////////
#include "Condition.h"
#include "Aql/Ast.h"
#include "Aql/AstNode.h"
#include "Aql/Collection.h"
#include "Aql/ExecutionPlan.h"
#include "Aql/Quantifier.h"
#include "Aql/Query.h"
#include "Aql/SortCondition.h"
#include "Aql/Variable.h"
#include "Basics/AttributeNameParser.h"
#include "Basics/Exceptions.h"
#include "Logger/Logger.h"
#include "Transaction/Methods.h"
#ifdef _WIN32
// turn off warnings about too long type name for debug symbols blabla in MSVC
// only...
#pragma warning(disable : 4503)
#endif
using namespace arangodb;
using namespace arangodb::aql;
using CompareResult = ConditionPartCompareResult;
namespace {
// sort comparisons so that > and >= come before < and <=, and that
// != and > come before ==
// we use this to some advantage when we check the conditions for a sparse
// index later.
// if a sparse index is asked whether it can supported a condition such as
// `attr < value1`, this range would include `null`, which the sparse index
// cannot provide.
// however, if we first check other conditions we may find a condition on
// the same attribute, e.g. `attr > value2`.
// this other condition may exclude `null` so we then use the full range
// `value2 < attr < value1` and do not have to discard sub-conditions anymore
// we can also benefit from sorting != before == for hash indexes, if there
// is a condition that excludes null (e.g. != null). if this is tracked first,
// we are sure the index attribute value cannot be null and we can still use
// the sparse index
std::function<int(AstNode const*)> const operationWeight = [](AstNode const* node) {
switch (node->type) {
case NODE_TYPE_OPERATOR_BINARY_NE:
// != before ==, e.g. attr != null && attr == FUNC(abc) for hash
// indexes
return 1;
case NODE_TYPE_OPERATOR_BINARY_GT:
// > before others <, e.g. attr > null && attr < abc
return 2;
case NODE_TYPE_OPERATOR_BINARY_GE:
// >= before others <, e.g. attr >= null && attr < abc
return 3;
case NODE_TYPE_OPERATOR_BINARY_EQ:
// != before ==, e.g. attr != null && attr == FUNC(abc) for hash
// indexes
return 4;
case NODE_TYPE_OPERATOR_BINARY_IN:
return 5;
case NODE_TYPE_OPERATOR_BINARY_NIN:
return 6;
case NODE_TYPE_OPERATOR_BINARY_LT:
// < after others, e.g. attr > null && attr < abc
return 7;
case NODE_TYPE_OPERATOR_BINARY_LE:
// <= after others, e.g. attr >= null && attr <= abc
return 8;
default:
// non-comparison types can come after comparisons
return 9;
}
};
struct PermutationState {
PermutationState(arangodb::aql::AstNode const* value, size_t n)
: value(value), current(0), n(n) {}
arangodb::aql::AstNode const* getValue() const {
if (value->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_OR ||
value->type == arangodb::aql::NODE_TYPE_OPERATOR_NARY_OR) {
TRI_ASSERT(current < n);
return value->getMember(current);
}
TRI_ASSERT(current == 0);
return value;
}
arangodb::aql::AstNode const* value;
size_t current;
size_t const n;
};
//------------------------------------------------------------------------
// Rules for single-valued variables
//------------------------------------------------------------------------
// | | a == y | a != y | a < y | a <= y | a >= y | a > y
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | IMP | OIS | OIS | OIS | IMP | IMP
// x == y | a == x | OIS | IMP | IMP | OIS | OIS | IMP
// x > y | | IMP | OIS | IMP | IMP | OIS | OIS
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | SIO | DIJ | DIJ | DIJ | SIO | SIO
// x == y | a != x | IMP | OIS | SIO | DIJ | DIJ | SIO
// x > y | | SIO | DIJ | SIO | SIO | DIJ | DIJ
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | IMP | OIS | OIS | OIS | IMP | IMP
// x == y | a < x | IMP | OIS | OIS | OIS | IMP | IMP
// x > y | | SIO | DIJ | SIO | SIO | DIJ | DIJ
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | IMP | OIS | OIS | OIS | IMP | IMP
// x == y | a <= x | SIO | DIJ | SIO | OIS | CEQ | IMP
// x > y | | SIO | DIJ | SIO | SIO | DIJ | DIJ
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | SIO | DIJ | DIJ | DIJ | SIO | SIO
// x == y | a >= x | SIO | DIJ | IMP | CEQ | OIS | SIO
// x > y | | IMP | OIS | IMP | IMP | OIS | OIS
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | SIO | DIJ | DIJ | DIJ | SIO | SIO
// x == y | a > x | IMP | OIS | IMP | IMP | OIS | OIS
// x > y | | IMP | OIS | IMP | IMP | OIS | OIS
//------------------------------------------------------------------------
// the 7th column is here as fallback if the operation is not in the table
// above.
// IMP -> IMPOSSIBLE -> empty result -> the complete AND set of conditions can
// be dropped.
// CEQ -> CONVERT_EQUAL -> both conditions can be combined to a equals x.
// DIJ -> DISJOINT -> neither condition is a consequence of the other -> both
// have to stay in place.
// SIO -> SELF_CONTAINED_IN_OTHER -> the left condition is a consequence of the
// right condition
// OIS -> OTHER_CONTAINED_IN_SELF -> the right condition is a consequence of the
// left condition
// If a condition (A) is a consequence of another (B), the solution set of A is
// larger than that of B
// -> A can be dropped.
ConditionPartCompareResult const ResultsTable[3][7][7] = {
{// X < Y
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT,
SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT,
SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT,
SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}},
{// X == Y
{OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, SELF_CONTAINED_IN_OTHER, DISJOINT,
DISJOINT, SELF_CONTAINED_IN_OTHER, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER,
OTHER_CONTAINED_IN_SELF, CONVERT_EQUAL, IMPOSSIBLE, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, IMPOSSIBLE, CONVERT_EQUAL,
OTHER_CONTAINED_IN_SELF, SELF_CONTAINED_IN_OTHER, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE,
OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}},
{// X > Y
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE,
OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER,
SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER,
SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER,
SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE,
OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE,
OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}}};
//------------------------------------------------------------------------
// Rules for multi-valued variables
//------------------------------------------------------------------------
// | | a == y | a != y | a < y | a <= y | a >= y | a > y
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | DIJ | DIJ | OIS | OIS | DIJ | DIJ
// x == y | a == x | OIS | IMP | DIJ | OIS | OIS | DIJ
// x > y | | DIJ | DIJ | DIJ | DIJ | OIS | OIS
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | DIJ | DIJ | DIJ | DIJ | DIJ | DIJ
// x == y | a != x | IMP | OIS | DIJ | DIJ | DIJ | DIJ
// x > y | | DIJ | DIJ | DIJ | DIJ | DIJ | DIJ
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | DIJ | DIJ | OIS | OIS | DIJ | DIJ
// x == y | a < x | DIJ | DIJ | OIS | OIS | DIJ | DIJ
// x > y | | SIO | DIJ | SIO | SIO | DIJ | DIJ
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | DIJ | DIJ | OIS | OIS | DIJ | DIJ
// x == y | a <= x | SIO | DIJ | SIO | OIS | DIJ | DIJ
// x > y | | SIO | DIJ | SIO | SIO | DIJ | DIJ
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | SIO | DIJ | DIJ | DIJ | SIO | SIO
// x == y | a >= x | SIO | DIJ | DIJ | DIJ | OIS | SIO
// x > y | | DIJ | DIJ | DIJ | DIJ | OIS | OIS
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | SIO | DIJ | DIJ | DIJ | SIO | SIO
// x == y | a > x | DIJ | DIJ | DIJ | DIJ | OIS | OIS
// x > y | | DIJ | DIJ | DIJ | DIJ | OIS | OIS
//------------------------------------------------------------------------
// the 7th column is here as fallback if the operation is not in the table
// above.
// IMP -> IMPOSSIBLE -> empty result -> the complete AND set of conditions can
// be dropped.
// CEQ -> CONVERT_EQUAL -> both conditions can be combined to a equals x.
// DIJ -> DISJOINT -> neither condition is a consequence of the other -> both
// have to stay in place.
// SIO -> SELF_CONTAINED_IN_OTHER -> the left condition is a consequence of the
// right condition
// OIS -> OTHER_CONTAINED_IN_SELF -> the right condition is a consequence of the
// left condition
// If a condition (A) is a consequence of another (B), the solution set of A is
// larger than that of B
// -> A can be dropped.
ConditionPartCompareResult const ResultsTableMultiValued[3][7][7] = {
{// X < Y
{DISJOINT, DISJOINT, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT, DISJOINT,
DISJOINT, DISJOINT, DISJOINT},
{DISJOINT, DISJOINT, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT, DISJOINT},
{DISJOINT, DISJOINT, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT,
SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT,
SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}},
{// X == Y
{OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, DISJOINT, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT,
DISJOINT, DISJOINT, DISJOINT},
{DISJOINT, DISJOINT, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER,
OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT,
OTHER_CONTAINED_IN_SELF, SELF_CONTAINED_IN_OTHER, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT, DISJOINT,
OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}},
{// X > Y
{DISJOINT, DISJOINT, DISJOINT, DISJOINT,
OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT,
DISJOINT, DISJOINT, DISJOINT, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER,
SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER,
SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT, DISJOINT,
OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT, DISJOINT,
OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}}};
} // namespace
ConditionPart::ConditionPart(Variable const* variable, std::string const& attributeName,
AstNode const* operatorNode, AttributeSideType side, void* data)
: variable(variable),
attributeName(attributeName),
operatorType(operatorNode->type),
operatorNode(operatorNode),
valueNode(nullptr),
data(data),
isExpanded(false) {
if (side == ATTRIBUTE_LEFT) {
valueNode = operatorNode->getMember(1);
} else {
valueNode = operatorNode->getMember(0);
if (Ast::IsReversibleOperator(operatorType)) {
operatorType = Ast::ReverseOperator(operatorType);
}
}
isExpanded = (attributeName.find("[*]") != std::string::npos);
}
ConditionPart::ConditionPart(Variable const* variable,
std::vector<arangodb::basics::AttributeName> const& attributeNames,
AstNode const* operatorNode, AttributeSideType side, void* data)
: ConditionPart(variable, "", operatorNode, side, data) {
TRI_AttributeNamesToString(attributeNames, attributeName, false);
isExpanded = (attributeName.find("[*]") != std::string::npos);
}
ConditionPart::~ConditionPart() {}
/// @brief true if the condition is completely covered by the other condition
bool ConditionPart::isCoveredBy(ConditionPart const& other, bool isReversed) const {
if (variable != other.variable || attributeName != other.attributeName) {
return false;
}
if (!isExpanded && !other.isExpanded && other.operatorType == NODE_TYPE_OPERATOR_BINARY_IN &&
other.valueNode->isConstant() && isReversed) {
if (CompareAstNodes(other.valueNode, valueNode, false) == 0) {
return true;
}
}
TRI_ASSERT(valueNode != nullptr);
TRI_ASSERT(other.valueNode != nullptr);
if (!valueNode->isConstant() || !other.valueNode->isConstant()) {
return false;
}
// special cases for IN...
if (!isExpanded && !other.isExpanded && other.operatorType == NODE_TYPE_OPERATOR_BINARY_IN &&
other.valueNode->isConstant() && other.valueNode->isArray()) {
if (operatorType == NODE_TYPE_OPERATOR_BINARY_IN &&
valueNode->isConstant() && valueNode->isArray()) {
// compare IN with an IN
// this has quadratic complexity
size_t const n1 = valueNode->numMembers();
size_t const n2 = other.valueNode->numMembers();
// maximum number of comparisons that we will accept
// otherwise the optimization will be aborted
static size_t const MaxComparisons = 2048;
if (n1 * n2 < MaxComparisons) {
for (size_t i = 0; i < n1; ++i) {
auto v = valueNode->getMemberUnchecked(i);
for (size_t j = 0; j < n2; ++j) {
auto w = other.valueNode->getMemberUnchecked(j);
ConditionPartCompareResult res =
ResultsTable[CompareAstNodes(v, w, true) + 1][0][0];
if (res != CompareResult::OTHER_CONTAINED_IN_SELF &&
res != CompareResult::CONVERT_EQUAL && res != CompareResult::IMPOSSIBLE) {
return false;
}
}
}
} else {
std::unordered_set<AstNode const*, AstNodeValueHash, AstNodeValueEqual> values(
512, AstNodeValueHash(), AstNodeValueEqual());
for (size_t i = 0; i < n2; ++i) {
values.emplace(other.valueNode->getMemberUnchecked(i));
}
for (size_t i = 0; i < n1; ++i) {
auto node = valueNode->getMemberUnchecked(i);
if (values.find(node) == values.end()) {
return false;
}
}
}
return true;
}
return false;
}
if (isExpanded && other.isExpanded && operatorType == NODE_TYPE_OPERATOR_BINARY_IN &&
other.operatorType == NODE_TYPE_OPERATOR_BINARY_IN &&
other.valueNode->isConstant()) {
return CompareAstNodes(other.valueNode, valueNode, false) == 0;
}
bool a = operatorNode->isArrayComparisonOperator();
bool b = other.operatorNode->isArrayComparisonOperator();
if (a || b) {
if (a != b) {
return false;
}
TRI_ASSERT(operatorNode->numMembers() == 3 && other.operatorNode->numMembers() == 3);
AstNode* q1 = operatorNode->getMemberUnchecked(2);
TRI_ASSERT(q1->type == NODE_TYPE_QUANTIFIER);
AstNode* q2 = other.operatorNode->getMemberUnchecked(2);
TRI_ASSERT(q2->type == NODE_TYPE_QUANTIFIER);
// do only cover ALL and NONE when both sides have same quantifier
if (q1->getIntValue() != q2->getIntValue() || q1->getIntValue() == Quantifier::ANY) {
return false;
}
if (isExpanded && other.isExpanded && operatorType == NODE_TYPE_OPERATOR_BINARY_ARRAY_IN &&
other.operatorType == NODE_TYPE_OPERATOR_BINARY_ARRAY_IN &&
other.valueNode->isConstant()) {
return CompareAstNodes(other.valueNode, valueNode, false) == 0;
}
}
// Results are -1, 0, 1, move to 0, 1, 2 for the lookup:
ConditionPartCompareResult res =
ResultsTable[CompareAstNodes(other.valueNode, valueNode, true) + 1]
[other.whichCompareOperation()][whichCompareOperation()];
if (res == CompareResult::OTHER_CONTAINED_IN_SELF ||
res == CompareResult::CONVERT_EQUAL || res == CompareResult::IMPOSSIBLE) {
return true;
}
return false;
}
/// @brief clears the attribute access data
static inline void clearAttributeAccess(
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>>& parts) {
parts.first = nullptr;
parts.second.clear();
}
/// @brief create the condition
Condition::Condition(Ast* ast)
: _ast(ast), _root(nullptr), _isNormalized(false), _isSorted(false) {}
/*namespace {
size_t countNodes(AstNode* node) {
if (node == nullptr) {
return 0;
}
size_t n = node->numMembers();
size_t sum = 1;
for (size_t i = 0; i < n; i++) {
sum += countNodes(node->getMember(i));
}
return sum;
}
}*/
/// @brief destroy the condition
Condition::~Condition() {
// memory for nodes is not owned and thus not freed by the condition
// all nodes belong to the AST
// LOG_TOPIC("12fb9", ERR, Logger::FIXME) << "nodes in tree: " << ::countNodes(_root);
}
/// @brief export the condition as VelocyPack
void Condition::toVelocyPack(arangodb::velocypack::Builder& builder, bool verbose) const {
if (_root == nullptr) {
VPackObjectBuilder guard(&builder);
} else {
_root->toVelocyPack(builder, verbose);
}
}
/// @brief create a condition from VPack
Condition* Condition::fromVPack(ExecutionPlan* plan, arangodb::velocypack::Slice const& slice) {
auto condition = std::make_unique<Condition>(plan->getAst());
if (slice.isObject() && slice.length() != 0) {
// note: the AST is responsible for freeing the AstNode later!
AstNode* node = new AstNode(plan->getAst(), slice);
condition->andCombine(node);
}
condition->_isNormalized = true;
condition->_isSorted = false;
return condition.release();
}
/// @brief clone the condition
Condition* Condition::clone() const {
auto copy = std::make_unique<Condition>(_ast);
if (_root != nullptr) {
copy->_root = _root->clone(_ast);
}
copy->_isNormalized = _isNormalized;
return copy.release();
}
/// @brief add a sub-condition to the condition
/// the sub-condition will be AND-combined with the existing condition(s)
void Condition::andCombine(AstNode const* node) {
if (_isNormalized) {
// already normalized
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL,
"cannot and-combine normalized condition");
}
if (_root == nullptr) {
// condition was empty before
_root = _ast->clone(node);
} else {
// condition was not empty before, now AND-merge
_root = _ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND, _root,
_ast->clone(node));
}
TRI_ASSERT(_root != nullptr);
}
/// @brief locate indexes for each condition
/// return value is a pair indicating whether the index can be used for
/// filtering(first) and sorting(second)
std::pair<bool, bool> Condition::findIndexes(EnumerateCollectionNode const* node,
std::vector<transaction::Methods::IndexHandle>& usedIndexes,
SortCondition const* sortCondition) {
TRI_ASSERT(usedIndexes.empty());
Variable const* reference = node->outVariable();
std::string collectionName = node->collection()->name();
transaction::Methods* trx = _ast->query()->trx();
size_t itemsInIndex;
if (!collectionName.empty() && collectionName[0] == '_' &&
collectionName.substr(0, 11) == "_statistics") {
// use hard-coded number of items in index, because we are dealing with
// the statistics collection here. this saves a roundtrip to the DB servers
// for statistics queries that do not need a fully accurate collection count
itemsInIndex = 1024;
} else {
// estimate for the number of documents in the index. may be outdated...
itemsInIndex = node->collection()->count(trx);
}
if (_root == nullptr) {
size_t dummy;
return std::make_pair<bool, bool>(
false, trx->getIndexForSortCondition(collectionName, sortCondition, reference, itemsInIndex,
node->hint(), usedIndexes, dummy));
}
return trx->getBestIndexHandlesForFilterCondition(collectionName, _ast, _root,
reference, sortCondition,
itemsInIndex, node->hint(),
usedIndexes, _isSorted);
}
/// @brief get the attributes for a sub-condition that are const
/// (i.e. compared with equality)
std::vector<std::vector<arangodb::basics::AttributeName>> Condition::getConstAttributes(
Variable const* reference, bool includeNull) const {
std::vector<std::vector<arangodb::basics::AttributeName>> result;
if (_root == nullptr) {
return result;
}
TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR);
size_t n = _root->numMembers();
if (n != 1) {
// multiple ORs
return result;
}
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> parts;
AstNode const* node = _root->getMember(0);
n = node->numMembers();
for (size_t i = 0; i < n; ++i) {
auto member = node->getMember(i);
if (member->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
clearAttributeAccess(parts);
auto lhs = member->getMember(0);
auto rhs = member->getMember(1);
if (lhs->isAttributeAccessForVariable(parts) && parts.first == reference) {
if (includeNull || ((rhs->isConstant() || rhs->type == NODE_TYPE_REFERENCE) &&
!rhs->isNullValue())) {
result.emplace_back(std::move(parts.second));
}
} else if (rhs->isAttributeAccessForVariable(parts) && parts.first == reference) {
if (includeNull || ((lhs->isConstant() || lhs->type == NODE_TYPE_REFERENCE) &&
!lhs->isNullValue())) {
result.emplace_back(std::move(parts.second));
}
}
}
}
return result;
}
/// @brief get the attributes for a sub-condition that are not-null
arangodb::HashSet<std::vector<arangodb::basics::AttributeName>> Condition::getNonNullAttributes(
Variable const* reference) const {
arangodb::HashSet<std::vector<arangodb::basics::AttributeName>> result;
if (_root == nullptr) {
return result;
}
TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR);
size_t n = _root->numMembers();
if (n != 1) {
// multiple ORs
return result;
}
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> parts;
AstNode const* node = _root->getMember(0);
n = node->numMembers();
for (size_t i = 0; i < n; ++i) {
auto member = node->getMember(i);
if (member->type == NODE_TYPE_OPERATOR_BINARY_NE ||
member->type == NODE_TYPE_OPERATOR_BINARY_GT ||
member->type == NODE_TYPE_OPERATOR_BINARY_LT) {
clearAttributeAccess(parts);
AstNode const* lhs = member->getMember(0);
AstNode const* rhs = member->getMember(1);
AstNode const* check = nullptr;
if (lhs->isConstant() &&
lhs->isNullValue() &&
rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS &&
member->type != NODE_TYPE_OPERATOR_BINARY_GT) {
// null != doc.value
// null < doc.value
check = rhs;
} else if (rhs->isConstant() &&
rhs->isNullValue() &&
lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS &&
node->type != NODE_TYPE_OPERATOR_BINARY_LT) {
// doc.value != null
// doc.value > null
check = lhs;
}
if (check != nullptr &&
check->isAttributeAccessForVariable(parts, false) &&
parts.first == reference) {
result.emplace(std::move(parts.second));
}
}
}
return result;
}
/// @brief normalize the condition
/// this will convert the condition into its disjunctive normal form
void Condition::normalize(ExecutionPlan* plan, bool multivalued /*= false*/) {
if (_isNormalized) {
// already normalized
return;
}
_root = transformNodePreorder(_root);
_root = transformNodePostorder(_root);
_root = fixRoot(_root, 0);
optimize(plan, multivalued);
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
if (_root != nullptr) {
// _root->dump(0);
validateAst(_root, 0);
}
#endif
}
/// @brief normalize the condition
/// this will convert the condition into its disjunctive normal form
/// in this case we don't re-run the optimizer. Its expected that you
/// don't want to remove eventually unneccessary filters.
void Condition::normalize() {
if (_isNormalized) {
// already normalized
return;
}
_root = transformNodePreorder(_root);
_root = transformNodePostorder(_root);
_root = fixRoot(_root, 0);
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
if (_root != nullptr) {
validateAst(_root, 0);
}
#endif
}
void Condition::collectOverlappingMembers(ExecutionPlan const* plan, Variable const* variable,
AstNode const* andNode, AstNode const* otherAndNode,
arangodb::HashSet<size_t>& toRemove,
Index const* index, /* may be nullptr */
bool isFromTraverser) {
bool const isSparse = (index != nullptr && index->sparse());
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> result;
size_t const n = andNode->numMembers();
for (size_t i = 0; i < n; ++i) {
auto operand = andNode->getMemberUnchecked(i);
bool allowOps = operand->isComparisonOperator();
if (isSparse && allowOps && !isFromTraverser &&
(operand->type == NODE_TYPE_OPERATOR_BINARY_NE ||
operand->type == NODE_TYPE_OPERATOR_BINARY_GT)) {
// look for != null and > null
// these can be removed if we are working with a sparse index!
auto lhs = operand->getMember(0);
auto rhs = operand->getMember(1);
clearAttributeAccess(result);
// only remove the condition if the index is exactly on the same attribute
// as the condition
if (rhs->isNullValue() && lhs->isAttributeAccessForVariable(result, isFromTraverser) &&
result.first == variable && index->fields().size() == 1 &&
arangodb::basics::AttributeName::isIdentical(result.second,
index->fields()[0], false)) {
toRemove.emplace(i);
// removed, no need to go on below...
continue;
}
}
if (isFromTraverser) {
allowOps = allowOps || operand->isArrayComparisonOperator();
} else {
allowOps = allowOps && operand->type != NODE_TYPE_OPERATOR_BINARY_NE &&
operand->type != NODE_TYPE_OPERATOR_BINARY_NIN;
}
if (allowOps) {
auto lhs = operand->getMember(0);
auto rhs = operand->getMember(1);
if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
(isFromTraverser && lhs->type == NODE_TYPE_EXPANSION)) {
clearAttributeAccess(result);
if (lhs->isAttributeAccessForVariable(result, isFromTraverser) &&
result.first == variable) {
ConditionPart current(variable, result.second, operand, ATTRIBUTE_LEFT, nullptr);
if (canRemove(plan, current, otherAndNode, isFromTraverser)) {
toRemove.emplace(i);
}
}
}
if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS || rhs->type == NODE_TYPE_EXPANSION) {
clearAttributeAccess(result);
if (rhs->isAttributeAccessForVariable(result, isFromTraverser) &&
result.first == variable) {
ConditionPart current(variable, result.second, operand, ATTRIBUTE_RIGHT, nullptr);
if (canRemove(plan, current, otherAndNode, isFromTraverser)) {
toRemove.emplace(i);
}
}
}
}
}
}
/// @brief removes condition parts from another
AstNode* Condition::removeIndexCondition(ExecutionPlan const* plan, Variable const* variable,
AstNode const* condition, Index const* index) {
TRI_ASSERT(index != nullptr);
if (_root == nullptr || condition == nullptr) {
return _root;
}
TRI_ASSERT(_root != nullptr);
TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR);
TRI_ASSERT(condition != nullptr);
TRI_ASSERT(condition->type == NODE_TYPE_OPERATOR_NARY_OR);
if (condition->numMembers() != 1 && _root->numMembers() != 1) {
return _root;
}
auto andNode = _root->getMemberUnchecked(0);
TRI_ASSERT(andNode->type == NODE_TYPE_OPERATOR_NARY_AND);
size_t const n = andNode->numMembers();
auto conditionAndNode = condition->getMemberUnchecked(0);
TRI_ASSERT(conditionAndNode->type == NODE_TYPE_OPERATOR_NARY_AND);
arangodb::HashSet<size_t> toRemove;
collectOverlappingMembers(plan, variable, andNode, conditionAndNode, toRemove, index, false);
if (toRemove.empty()) {
return _root;
}
// build a new AST condition
AstNode* newNode = nullptr;
for (size_t i = 0; i < n; ++i) {
if (toRemove.find(i) == toRemove.end()) {
auto what = andNode->getMemberUnchecked(i);
if (newNode == nullptr) {
// the only node so far
newNode = what;
} else {
// AND-combine with existing node
newNode = _ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND,
newNode, what);
}
}
}
return newNode;
}
/// @brief remove filter conditions already covered by the traversal
AstNode* Condition::removeTraversalCondition(ExecutionPlan const* plan,
Variable const* variable, AstNode* other) {
if (_root == nullptr || other == nullptr) {
return _root;
}
TRI_ASSERT(_root != nullptr);
TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR);
TRI_ASSERT(other != nullptr);
TRI_ASSERT(other->type == NODE_TYPE_OPERATOR_NARY_OR);
if (other->numMembers() != 1 && _root->numMembers() != 1) {
return _root;
}
auto andNode = _root->getMemberUnchecked(0);
TRI_ASSERT(andNode->type == NODE_TYPE_OPERATOR_NARY_AND);
auto otherAndNode = other->getMemberUnchecked(0);
TRI_ASSERT(otherAndNode->type == NODE_TYPE_OPERATOR_NARY_AND);
size_t const n = andNode->numMembers();
arangodb::HashSet<size_t> toRemove;
collectOverlappingMembers(plan, variable, andNode, otherAndNode, toRemove, nullptr, true);
if (toRemove.empty()) {
return _root;
}
// build a new AST condition
AstNode* newNode = nullptr;
for (size_t i = 0; i < n; ++i) {
if (toRemove.find(i) == toRemove.end()) {
auto what = andNode->getMemberUnchecked(i);
if (newNode == nullptr) {
// the only node so far
newNode = what;
} else {
// AND-combine with existing node
newNode = _ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND,
newNode, what);
}
}
}
return newNode;
}
/// @brief remove (now) invalid variables from the condition
bool Condition::removeInvalidVariables(arangodb::HashSet<Variable const*> const& validVars) {
if (_root == nullptr) {
return false;
}
TRI_ASSERT(_root != nullptr);
TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR);
auto oldRoot = _root;
_root = _ast->shallowCopyForModify(oldRoot);
TRI_DEFER(FINALIZE_SUBTREE(_root));
bool isEmpty = false;
// handle sub nodes of top-level OR node
size_t const n = _root->numMembers();
arangodb::HashSet<Variable const*> varsUsed;
for (size_t i = 0; i < n; ++i) {
auto oldAndNode = _root->getMemberUnchecked(i);
auto andNode = _ast->shallowCopyForModify(oldAndNode);
TRI_DEFER(FINALIZE_SUBTREE(andNode));
_root->changeMember(i, andNode);
TRI_ASSERT(andNode->type == NODE_TYPE_OPERATOR_NARY_AND);
size_t nAnd = andNode->numMembers();
for (size_t j = 0; j < nAnd; /* no hoisting */) {
// check which variables are used in each AND
varsUsed.clear();
Ast::getReferencedVariables(andNode->getMemberUnchecked(j), varsUsed);
bool invalid = false;
for (auto& it : varsUsed) {
if (validVars.find(it) == validVars.end()) {
// found an invalid variable here...
invalid = true;
break;
}
}
if (invalid) {
andNode->removeMemberUnchecked(j);
// repeat with some member index
TRI_ASSERT(nAnd > 0);
--nAnd;
if (nAnd == 0) {
isEmpty = true;
}
} else {
++j;
}
}
}
return isEmpty;
}
/// @brief optimize the condition expression tree
void Condition::optimize(ExecutionPlan* plan, bool multivalued) {
if (_root == nullptr) {
return;
}
transaction::Methods* trx = plan->getAst()->query()->trx();
TRI_ASSERT(_root != nullptr);
TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR);
auto oldRoot = _root;
_root = _ast->shallowCopyForModify(oldRoot);
TRI_DEFER(FINALIZE_SUBTREE(_root));
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> varAccess;
// handle sub nodes of top-level OR node
size_t n = _root->numMembers();
size_t r = 0;
const auto* resultsTable = multivalued
? ResultsTableMultiValued
: ResultsTable;
while (r < n) { // foreach OR-Node
bool retry = false;
auto oldAnd = _root->getMemberUnchecked(r);
TRI_ASSERT(oldAnd->type == NODE_TYPE_OPERATOR_NARY_AND);
auto andNode = _ast->shallowCopyForModify(oldAnd);
_root->changeMember(r, andNode);
TRI_DEFER(FINALIZE_SUBTREE(andNode));
restartThisOrItem:
size_t andNumMembers = andNode->numMembers();
// deduplicate and sort all IN arrays
size_t inComparisons = 0;
for (size_t j = 0; j < andNumMembers; ++j) {
auto op = andNode->getMemberUnchecked(j);
if (op->type == NODE_TYPE_OPERATOR_BINARY_IN) {
++inComparisons;
auto deduplicated = deduplicateInOperation(op);
andNode->changeMember(j, deduplicated);
}
}
andNumMembers = andNode->numMembers();
if (andNumMembers <= 1) {
// simple AND item with 0 or 1 members. nothing to do
++r;
n = _root->numMembers();
continue;
}
TRI_ASSERT(andNumMembers > 1);
// sort AND parts of each sub-condition so > and >= come before < and <=
// we use this to some advantage when we check the conditions for a sparse
// index later.
// if a sparse index is asked whether it can supported a condition such as
// `attr < value1`, this range would include `null`, which the sparse index
// cannot provide.
// however, if we first check other conditions we may find a condition on
// the same attribute, e.g. `attr > value2`.
// this other condition may exclude `null` so we then use the full range
// `value2 < attr < value1`
// and do not have to discard sub-conditions anymore
andNode->sortMembers([](AstNode const* lhs, AstNode const* rhs) {
// try to re-order comparison operators
int l = ::operationWeight(lhs);
int r = ::operationWeight(rhs);
if (l != r) {
return l < r;
}
// all equal, now check if original types are different
if (lhs->type != rhs->type) {
return lhs->type < rhs->type;
}
// still all equal
return false;
});
if (inComparisons > 0) {
// move IN operations to the front to make comparison code below simpler
std::vector<AstNode*> stack;
size_t p = andNumMembers - 1;
for (size_t j = p;; --j) {
auto op = andNode->getMemberUnchecked(j);
if (op->type == NODE_TYPE_OPERATOR_BINARY_IN) {
stack.push_back(op);
} else {
if (p != j) {
andNode->changeMember(p, op);
}
--p;
}
if (j == 0) {
break;
}
}
p = 0;
while (!stack.empty()) {
auto it = stack.back();
andNode->changeMember(p++, it);
stack.pop_back();
}
}
// optimization is only necessary if an AND node has multiple members
VariableUsageType variableUsage;
for (size_t j = 0; j < andNumMembers; ++j) {
auto operand = andNode->getMemberUnchecked(j);
if (operand->isComparisonOperator()) {
AstNode const* lhs = operand->getMember(0);
AstNode const* rhs = operand->getMember(1);
if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
if (lhs->isConstant()) {
lhs = Ast::resolveConstAttributeAccess(lhs);
}
storeAttributeAccess(varAccess, variableUsage, lhs, j, ATTRIBUTE_LEFT);
}
if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS || rhs->type == NODE_TYPE_EXPANSION) {
if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS && rhs->isConstant()) {
rhs = Ast::resolveConstAttributeAccess(rhs);
}
storeAttributeAccess(varAccess, variableUsage, rhs, j, ATTRIBUTE_RIGHT);
}
}
}
// now find the variables and attributes for which there are multiple
// conditions
for (auto const& it : variableUsage) { // foreach sub-and-node
auto variable = it.first;
for (auto const& it2 : it.second) { // cross compare sub-and-nodes
auto const& attributeName = it2.first;
auto const& positions = it2.second;
if (positions.size() <= 1) {
// none or only one occurence of the attribute
continue;
}
// multiple occurrences of the same attribute
size_t leftPos = positions[0].first;
// copy & modify leftNode
auto oldLeft = andNode->getMemberUnchecked(leftPos);
auto leftNode = _ast->shallowCopyForModify(oldLeft);
TRI_DEFER(FINALIZE_SUBTREE(leftNode));
andNode->changeMember(leftPos, leftNode);
ConditionPart current(variable, attributeName, leftNode, positions[0].second, nullptr);
if (!current.valueNode->isConstant()) {
continue;
}
size_t j = 1;
while (j < positions.size()) {
TRI_ASSERT(j != 0);
auto rightPos = positions[j].first;
auto rightNode = andNode->getMemberUnchecked(rightPos);
ConditionPart other(variable, attributeName, rightNode, positions[j].second, nullptr);
if (!other.valueNode->isConstant()) {
++j;
continue;
}
// IN-merging
if (leftNode->type == NODE_TYPE_OPERATOR_BINARY_IN &&
leftNode->getMemberUnchecked(1)->isConstant() &&
!multivalued) {
TRI_ASSERT(leftNode->numMembers() == 2);
if (rightNode->type == NODE_TYPE_OPERATOR_BINARY_IN &&
rightNode->getMemberUnchecked(1)->isConstant()) {
// merge IN with IN on same attribute
TRI_ASSERT(rightNode->numMembers() == 2);
auto merged =
_ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_IN,
leftNode->getMemberUnchecked(0),
mergeInOperations(trx, leftNode, rightNode));
andNode->removeMemberUnchecked(rightPos);
andNode->changeMember(leftPos, merged);
goto restartThisOrItem;
} else if (rightNode->isSimpleComparisonOperator()) {
// merge other comparison operator with IN
TRI_ASSERT(rightNode->numMembers() == 2);
auto values = leftNode->getMemberUnchecked(1);
auto inNode = _ast->createNodeArray(values->numMembers());
// enumerate over IN list
for (size_t k = 0; k < values->numMembers(); ++k) {
auto value = values->getMemberUnchecked(k);
ConditionPartCompareResult res =
ResultsTable[CompareAstNodes(value, other.valueNode, true) + 1][0 /*NODE_TYPE_OPERATOR_BINARY_EQ*/]
[other.whichCompareOperation()];
bool const keep = (res == CompareResult::OTHER_CONTAINED_IN_SELF ||
res == CompareResult::CONVERT_EQUAL);
if (keep) {
inNode->addMember(value);
}
}
if (inNode->numMembers() == 0) {
// no values left after merging -> IMPOSSIBLE
_root->removeMemberUnchecked(r);
retry = true;
goto fastForwardToNextOrItem;
}
// use the new array of values
leftNode->changeMember(1, inNode);
// remove the other operator
andNode->removeMemberUnchecked(rightPos);
goto restartThisOrItem;
}
}
// end of IN-merging
// Results are -1, 0, 1, move to 0, 1, 2 for the lookup:
ConditionPartCompareResult res = resultsTable
[CompareAstNodes(current.valueNode, other.valueNode, true) + 1]
[current.whichCompareOperation()][other.whichCompareOperation()];
switch (res) {
case CompareResult::IMPOSSIBLE: {
// impossible condition
// j = positions.size();
// we remove this one, so fast forward the loops to their end:
_root->removeMemberUnchecked(r);
retry = true;
goto fastForwardToNextOrItem;
}
case CompareResult::SELF_CONTAINED_IN_OTHER: {
TRI_ASSERT(!positions.empty());
andNode->removeMemberUnchecked(positions.at(0).first);
goto restartThisOrItem;
}
case CompareResult::OTHER_CONTAINED_IN_SELF: {
TRI_ASSERT(j < positions.size());
andNode->removeMemberUnchecked(positions.at(j).first);
goto restartThisOrItem;
}
case CompareResult::CONVERT_EQUAL: { // both ok, now transform to a
// == x (== y)
TRI_ASSERT(!positions.empty());
TRI_ASSERT(j < positions.size());
andNode->removeMemberUnchecked(positions.at(j).first);
auto origNode = andNode->getMemberUnchecked(positions.at(0).first);
auto newNode = plan->getAst()->createNode(NODE_TYPE_OPERATOR_BINARY_EQ);
for (size_t iMemb = 0; iMemb < origNode->numMembers(); iMemb++) {
newNode->addMember(origNode->getMemberUnchecked(iMemb));
}
TRI_DEFER(FINALIZE_SUBTREE(newNode));
andNode->changeMember(positions.at(0).first, newNode);
goto restartThisOrItem;
}
case CompareResult::DISJOINT: {
break;
}
case CompareResult::UNKNOWN: {
break;
}
}
++j;
}
} // cross compare sub-and-nodes
} // foreach sub-and-node
fastForwardToNextOrItem:
if (!retry) {
// root nodes hasn't changed. go to next sub-node!
++r;
}
// number of root sub-nodes has probably changed.
// now recalculate the number and don't modify r!
n = _root->numMembers();
}
}
/// @brief registers an attribute access for a particular (collection) variable
void Condition::storeAttributeAccess(
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>>& varAccess,
VariableUsageType& variableUsage, AstNode const* node, size_t position,
AttributeSideType side) {
if (!node->isAttributeAccessForVariable(varAccess)) {
return;
}
auto variable = varAccess.first;
if (variable != nullptr) {
std::string attributeName;
TRI_AttributeNamesToString(varAccess.second, attributeName, false);
auto& dst = variableUsage[variable][attributeName];
if (!dst.empty() && dst.back().first == position) {
// already have this attribute for this variable. can happen in case a
// condition refers to itself (e.g. a.x == a.x)
// in this case, we won't optimize it
dst.erase(dst.begin() + dst.size() - 1);
} else {
dst.emplace_back(position, side);
}
}
}
/// @brief validate the condition's AST
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
void Condition::validateAst(AstNode const* node, int level) {
if (level == 0) {
TRI_ASSERT(node->type == NODE_TYPE_OPERATOR_NARY_OR);
}
size_t const n = node->numMembers();
for (size_t i = 0; i < n; ++i) {
auto sub = node->getMemberUnchecked(i);
if (level == 0) {
TRI_ASSERT(sub->type == NODE_TYPE_OPERATOR_NARY_AND);
} else {
TRI_ASSERT(sub->type != NODE_TYPE_OPERATOR_NARY_OR &&
sub->type != NODE_TYPE_OPERATOR_NARY_AND);
}
validateAst(sub, level + 1);
}
}
#endif
/// @brief checks if the current condition is covered by the other
bool Condition::canRemove(ExecutionPlan const* plan, ConditionPart const& me,
arangodb::aql::AstNode const* andNode, bool isFromTraverser) {
TRI_ASSERT(andNode != nullptr);
TRI_ASSERT(andNode->type == NODE_TYPE_OPERATOR_NARY_AND);
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> result;
size_t const n = andNode->numMembers();
auto normalize = [&plan](AstNode const* node) -> std::string {
if (node->type == NODE_TYPE_REFERENCE) {
auto setter =
plan->getVarSetBy(static_cast<Variable const*>(node->getData())->id);
if (setter != nullptr && setter->getType() == ExecutionNode::CALCULATION) {
auto cn = ExecutionNode::castTo<CalculationNode const*>(setter);
// use expression node instead
node = cn->expression()->node();
}
}
// return string representation
return node->toString();
};
std::string temp;
try {
for (size_t i = 0; i < n; ++i) {
auto operand = andNode->getMemberUnchecked(i);
if (operand->isComparisonOperator() ||
(isFromTraverser && operand->isArrayComparisonOperator())) {
auto lhs = operand->getMember(0);
auto rhs = operand->getMember(1);
if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
(isFromTraverser && lhs->type == NODE_TYPE_EXPANSION)) {
clearAttributeAccess(result);
if (lhs->isAttributeAccessForVariable(result, isFromTraverser)) {
temp.clear();
TRI_AttributeNamesToString(result.second, temp);
if (temp == me.attributeName) {
if (rhs->isConstant()) {
ConditionPart indexCondition(result.first, result.second,
operand, ATTRIBUTE_LEFT, nullptr);
if (me.isCoveredBy(indexCondition, false)) {
return true;
}
}
// non-constant condition
else if (me.operatorType == operand->type &&
normalize(me.valueNode) == normalize(rhs)) {
return true;
}
}
}
}
if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS || rhs->type == NODE_TYPE_EXPANSION) {
clearAttributeAccess(result);
if (rhs->isAttributeAccessForVariable(result, isFromTraverser)) {
temp.clear();
TRI_AttributeNamesToString(result.second, temp);
if (temp == me.attributeName) {
if (lhs->isConstant()) {
ConditionPart indexCondition(result.first, result.second,
operand, ATTRIBUTE_RIGHT, nullptr);
if (me.isCoveredBy(indexCondition, true)) {
return true;
}
}
// non-constant condition
else if (me.operatorType == operand->type &&
normalize(me.valueNode) == normalize(lhs)) {
return true;
}
}
}
}
}
}
} catch (...) {
// simply ignore any errors and return false
}
return false;
}
/// @brief deduplicate IN condition values (and sort them)
/// this may modify the node in place
AstNode* Condition::deduplicateInOperation(AstNode* operation) {
TRI_ASSERT(operation->numMembers() == 2);
auto rhs = operation->getMemberUnchecked(1);
if (!rhs->isArray() || !rhs->isConstant()) {
return operation;
}
auto deduplicated = _ast->deduplicateArray(rhs);
if (deduplicated != rhs) {
// there were duplicates
auto newOperation = _ast->shallowCopyForModify(operation);
TRI_DEFER(FINALIZE_SUBTREE(newOperation));
newOperation->changeMember(1, const_cast<AstNode*>(deduplicated));
return newOperation;
}
return operation;
}
/// @brief merge the values from two IN operations
AstNode* Condition::mergeInOperations(transaction::Methods* trx,
AstNode const* lhs, AstNode const* rhs) {
TRI_ASSERT(lhs->type == NODE_TYPE_OPERATOR_BINARY_IN);
TRI_ASSERT(rhs->type == NODE_TYPE_OPERATOR_BINARY_IN);
auto lValue = lhs->getMemberUnchecked(1);
auto rValue = rhs->getMemberUnchecked(1);
TRI_ASSERT(lValue->isArray() && lValue->isConstant());
TRI_ASSERT(rValue->isArray() && rValue->isConstant());
return _ast->createNodeIntersectedArray(lValue, rValue);
}
/// @brief merges the current node with the sub nodes of same type
AstNode* Condition::collapse(AstNode const* node) {
TRI_ASSERT(node->type == NODE_TYPE_OPERATOR_NARY_OR || node->type == NODE_TYPE_OPERATOR_NARY_AND);
auto newOperator = _ast->createNode(node->type);
size_t const n = node->numMembers();
for (size_t i = 0; i < n; ++i) {
auto sub = node->getMemberUnchecked(i);
bool const isSame = (node->type == sub->type) ||
(node->type == NODE_TYPE_OPERATOR_NARY_OR &&
sub->type == NODE_TYPE_OPERATOR_BINARY_OR) ||
(node->type == NODE_TYPE_OPERATOR_NARY_AND &&
sub->type == NODE_TYPE_OPERATOR_BINARY_AND);
if (isSame) {
// merge children one level up
for (size_t j = 0; j < sub->numMembers(); ++j) {
newOperator->addMember(sub->getMemberUnchecked(j));
}
} else {
newOperator->addMember(sub);
}
}
return newOperator;
}
// this may modify the node in place
AstNode* switchSidesInCompare(Ast* ast, AstNode* node) {
// switch members of BINARY_LT/GT/LE/GE_NODES
// and change operator accordingly
auto first = node->getMemberUnchecked(0);
auto second = node->getMemberUnchecked(1);
auto newOperator = ast->shallowCopyForModify(node);
TRI_DEFER(FINALIZE_SUBTREE(newOperator));
newOperator->changeMember(0, second);
newOperator->changeMember(1, first);
switch (node->type) {
case NODE_TYPE_OPERATOR_BINARY_LT:
newOperator->type = NODE_TYPE_OPERATOR_BINARY_GT;
break;
case NODE_TYPE_OPERATOR_BINARY_GT:
newOperator->type = NODE_TYPE_OPERATOR_BINARY_LT;
break;
case NODE_TYPE_OPERATOR_BINARY_LE:
newOperator->type = NODE_TYPE_OPERATOR_BINARY_GE;
break;
case NODE_TYPE_OPERATOR_BINARY_GE:
newOperator->type = NODE_TYPE_OPERATOR_BINARY_LE;
break;
default:
LOG_TOPIC("14324", ERR, Logger::QUERIES)
<< "normalize condition tries to swap children"
<< "of wrong node type - this needs to be fixed";
TRI_ASSERT(false);
}
return newOperator;
}
AstNode* normalizeCompare(Ast* ast, AstNode* node) {
// Moves attribute access to the LHS of a comparison.
// If there are 2 attribute accesses it does a
// string compare of the access path and makes sure
// the one that compares less ends up on the LHS
if (node->type != NODE_TYPE_OPERATOR_BINARY_LE &&
node->type != NODE_TYPE_OPERATOR_BINARY_LT && node->type != NODE_TYPE_OPERATOR_BINARY_GE &&
node->type != NODE_TYPE_OPERATOR_BINARY_GT) {
// no binary compare in node
return node;
}
auto first = node->getMemberUnchecked(0);
auto second = node->getMemberUnchecked(1);
if (second->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
if (first->type != NODE_TYPE_ATTRIBUTE_ACCESS) {
return switchSidesInCompare(ast, node);
}
// both are of type attribute access
if (first->toString() > second->toString()) {
return switchSidesInCompare(ast, node);
}
}
return node;
}
/// @brief converts binary to n-ary, comparision normal and negation normal form
AstNode* Condition::transformNodePreorder(AstNode* node) {
if (node == nullptr) {
return nullptr;
}
if (node->type == NODE_TYPE_OPERATOR_BINARY_AND || node->type == NODE_TYPE_OPERATOR_BINARY_OR) {
// convert binary AND/OR into n-ary AND/OR
TRI_ASSERT(node->numMembers() == 2);
auto old = node;
// create a new n-ary node
node = _ast->createNode(Ast::NaryOperatorType(old->type));
node->reserve(2);
node->addMember(transformNodePreorder(old->getMember(0)));
node->addMember(transformNodePreorder(old->getMember(1)));
return node;
}
if (node->type == NODE_TYPE_OPERATOR_UNARY_NOT) {
// push down logical negations
auto sub = node->getMemberUnchecked(0);
if (sub->type == NODE_TYPE_OPERATOR_NARY_AND || sub->type == NODE_TYPE_OPERATOR_BINARY_AND ||
sub->type == NODE_TYPE_OPERATOR_NARY_OR || sub->type == NODE_TYPE_OPERATOR_BINARY_OR) {
size_t const n = sub->numMembers();
AstNode* newOperator = nullptr;
if (sub->type == NODE_TYPE_OPERATOR_NARY_AND || sub->type == NODE_TYPE_OPERATOR_BINARY_AND) {
// ! (a && b) => (! a) || (! b)
newOperator = _ast->createNode(NODE_TYPE_OPERATOR_NARY_OR);
} else {
// ! (a || b) => (! a) && (! b)
newOperator = _ast->createNode(NODE_TYPE_OPERATOR_NARY_AND);
}
for (size_t i = 0; i < n; ++i) {
auto negated = transformNodePreorder(
_ast->createNodeUnaryOperator(NODE_TYPE_OPERATOR_UNARY_NOT,
sub->getMemberUnchecked(i)));
auto optimized = _ast->optimizeNotExpression(negated);
newOperator->addMember(optimized);
}
return newOperator;
}
if (sub->type == NODE_TYPE_OPERATOR_UNARY_NOT) {
// eliminate double-negatives
return transformNodePreorder(sub->getMemberUnchecked(0));
}
auto replacement = _ast->shallowCopyForModify(node);
replacement->changeMember(0, transformNodePreorder(sub));
return replacement;
}
// normalize any comparisons
return normalizeCompare(_ast, node);
}
/// @brief converts from negation normal to disjunctive normal form
AstNode* Condition::transformNodePostorder(AstNode* node) {
if (node == nullptr) {
return node;
}
if (node->type == NODE_TYPE_OPERATOR_NARY_AND) {
auto old = node;
node = _ast->shallowCopyForModify(old);
TRI_DEFER(FINALIZE_SUBTREE(node));
bool distributeOverChildren = false;
bool mustCollapse = false;
size_t n = node->numMembers();
for (size_t i = 0; i < n; ++i) {
// process subnodes first
auto sub = transformNodePostorder(node->getMemberUnchecked(i));
node->changeMember(i, sub);
if (sub->type == NODE_TYPE_OPERATOR_NARY_OR) {
distributeOverChildren = true;
} else if (sub->type == NODE_TYPE_OPERATOR_NARY_AND) {
mustCollapse = true;
}
}
if (mustCollapse) {
node = collapse(node);
// collapsing may change n
n = node->numMembers();
}
if (distributeOverChildren) {
// we found an AND with at least one OR child, e.g.
// AND
// OR c
// a b
//
// we need to move the OR to the top by converting the condition to:
// OR
// AND AND
// a c b c
//
auto newOperator = _ast->createNode(NODE_TYPE_OPERATOR_NARY_OR);
std::vector<::PermutationState> clauses;
clauses.reserve(n);
for (size_t i = 0; i < n; ++i) {
auto sub = node->getMemberUnchecked(i);
if (sub->type == NODE_TYPE_OPERATOR_NARY_OR) {
clauses.emplace_back(sub, sub->numMembers());
} else {
clauses.emplace_back(sub, 1);
}
}
size_t current = 0;
bool done = false;
size_t const numClauses = clauses.size();
while (!done) {
auto andOperator = _ast->createNode(NODE_TYPE_OPERATOR_NARY_AND);
andOperator->reserve(numClauses);
for (size_t i = 0; i < numClauses; ++i) {
auto const& clause = clauses[i];
auto sub = clause.getValue();
// make sure the subtree is finalized so we can avoid cloning it
FINALIZE_SUBTREE(sub);
if (sub->type == NODE_TYPE_OPERATOR_NARY_AND) {
// collapse, add children directly
for (size_t j = 0; j < sub->numMembers(); j++) {
andOperator->addMember(sub->getMember(j));
}
} else {
andOperator->addMember(sub);
}
}
newOperator->addMember(andOperator);
// now advance the clause permutation state
while (true) {
auto& currentClause = clauses[current];
if (++currentClause.current < currentClause.n) {
current = 0;
// still have at least one more permutation with current position
// in current clause
break;
}
// done with current clause, reset it
currentClause.current = 0;
// move on to next clause
if (++current >= n) {
// no more clauses left!
done = true;
break;
}
}
}
node = newOperator;
}
return node;
}
if (node->type == NODE_TYPE_OPERATOR_NARY_OR) {
auto old = node;
node = _ast->shallowCopyForModify(old);
TRI_DEFER(FINALIZE_SUBTREE(node));
size_t const n = node->numMembers();
bool mustCollapse = false;
for (size_t i = 0; i < n; ++i) {
auto sub = transformNodePostorder(node->getMemberUnchecked(i));
node->changeMember(i, sub);
if (sub->type == NODE_TYPE_OPERATOR_NARY_OR) {
mustCollapse = true;
}
}
if (mustCollapse) {
node = collapse(node);
}
}
// we only need to handle nary and/or, the rest was handled in preorder
return node;
}
/// @brief Creates a top-level OR node if it does not already exist, and make
/// sure that all second level nodes are AND nodes. Additionally, this step will
/// remove all NOP nodes.
AstNode* Condition::fixRoot(AstNode* node, int level) {
if (node == nullptr) {
return nullptr;
}
AstNodeType type;
if (level == 0) {
type = NODE_TYPE_OPERATOR_NARY_OR;
} else {
type = NODE_TYPE_OPERATOR_NARY_AND;
}
// check if first-level node is an OR node
if (node->type != type) {
// create new root node
node = _ast->createNodeNaryOperator(type, node);
}
size_t const n = node->numMembers();
size_t j = 0;
auto old = node;
node = _ast->shallowCopyForModify(old);
TRI_DEFER(FINALIZE_SUBTREE(node));
for (size_t i = 0; i < n; ++i) {
auto sub = node->getMemberUnchecked(i);
if (sub->type == NODE_TYPE_NOP) {
// ignore this node
continue;
}
if (level == 0) {
// recurse into next level
node->changeMember(j, fixRoot(sub, 1));
} else if (i != j) {
node->changeMember(j, sub);
}
++j;
}
if (j != n) {
// adjust number of members (because of the NOP nodes removes)
node->reduceMembers(j);
}
return node;
}