1
0
Fork 0
arangodb/arangod/Aql/Condition.cpp

1756 lines
57 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Steemann
////////////////////////////////////////////////////////////////////////////////
#include "Condition.h"
#include "Aql/Ast.h"
#include "Aql/AstNode.h"
#include "Aql/ExecutionPlan.h"
#include "Aql/Index.h"
#include "Aql/SortCondition.h"
#include "Aql/Variable.h"
#include "Basics/Exceptions.h"
#include "Basics/json.h"
#include "Basics/JsonHelper.h"
#ifdef _WIN32
// turn off warnings about too long type name for debug symbols blabla in MSVC
// only...
#pragma warning(disable : 4503)
#endif
using namespace arangodb::aql;
using CompareResult = ConditionPartCompareResult;
struct PermutationState {
PermutationState(arangodb::aql::AstNode const* value, size_t n)
: value(value), current(0), n(n) {}
arangodb::aql::AstNode const* getValue() const {
if (value->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_OR ||
value->type == arangodb::aql::NODE_TYPE_OPERATOR_NARY_OR) {
TRI_ASSERT(current < n);
return value->getMember(current);
}
TRI_ASSERT(current == 0);
return value;
}
arangodb::aql::AstNode const* value;
size_t current;
size_t const n;
};
// | | a == y | a != y | a < y | a <= y | a >= y | a > y
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | IMP | OIS | OIS | OIS | IMP | IMP
// x == y | a == x | OIS | IMP | IMP | OIS | OIS | IMP
// x > y | | IMP | OIS | IMP | IMP | OIS | OIS
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | SIO | DIJ | DIJ | DIJ | SIO | SIO
// x == y | a != x | IMP | OIS | SIO | DIJ | DIJ | SIO
// x > y | | SIO | DIJ | SIO | SIO | DIJ | DIJ
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | IMP | OIS | OIS | OIS | IMP | IMP
// x == y | a < x | IMP | OIS | OIS | OIS | IMP | IMP
// x > y | | SIO | DIJ | SIO | SIO | DIJ | DIJ
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | IMP | OIS | OIS | OIS | IMP | IMP
// x == y | a <= x | SIO | DIJ | SIO | OIS | CEQ | IMP
// x > y | | SIO | DIJ | SIO | SIO | DIJ | DIJ
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | SIO | DIJ | DIJ | DIJ | SIO | SIO
// x == y | a >= x | SIO | DIJ | IMP | CEQ | OIS | SIO
// x > y | | IMP | OIS | IMP | IMP | OIS | OIS
// -------|------------------|--------|--------|--------|--------|--------
// x < y | | SIO | DIJ | DIJ | DIJ | SIO | SIO
// x == y | a > x | IMP | OIS | IMP | IMP | OIS | OIS
// x > y | | IMP | OIS | IMP | IMP | OIS | OIS
// the 7th column is here as fallback if the operation is not in the table
// above.
// IMP -> IMPOSSIBLE -> empty result -> the complete AND set of conditions can
// be dropped.
// CEQ -> CONVERT_EQUAL -> both conditions can be combined to a equals x.
// DIJ -> DISJOINT -> neither condition is a consequence of the other -> both
// have to stay in place.
// SIO -> SELF_CONTAINED_IN_OTHER -> the left condition is a consequence of the
// right condition
// OIS -> OTHER_CONTAINED_IN_SELF -> the right condition is a consequence of the
// left condition
// If a condition (A) is a consequence of another (B), the solution set of A is
// larger than that of B
// -> A can be dropped.
ConditionPartCompareResult const ConditionPart::ResultsTable[3][7][7] = {
{// X < Y
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT,
SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT,
SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT,
SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}},
{// X == Y
{OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, SELF_CONTAINED_IN_OTHER, DISJOINT,
DISJOINT, SELF_CONTAINED_IN_OTHER, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF,
OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER,
OTHER_CONTAINED_IN_SELF, CONVERT_EQUAL, IMPOSSIBLE, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, IMPOSSIBLE, CONVERT_EQUAL,
OTHER_CONTAINED_IN_SELF, SELF_CONTAINED_IN_OTHER, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE,
OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}},
{// X > Y
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE,
OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER,
SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER,
SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT},
{SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER,
SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE,
OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT},
{IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE,
OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT},
{DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}}};
ConditionPart::ConditionPart(Variable const* variable,
std::string const& attributeName,
AstNode const* operatorNode,
AttributeSideType side, void* data)
: variable(variable),
attributeName(attributeName),
operatorType(operatorNode->type),
operatorNode(operatorNode),
valueNode(nullptr),
data(data),
isExpanded(false) {
if (side == ATTRIBUTE_LEFT) {
valueNode = operatorNode->getMember(1);
} else {
valueNode = operatorNode->getMember(0);
if (Ast::IsReversibleOperator(operatorType)) {
operatorType = Ast::ReverseOperator(operatorType);
}
}
isExpanded = (attributeName.find("[*]") != std::string::npos);
}
ConditionPart::ConditionPart(
Variable const* variable,
std::vector<arangodb::basics::AttributeName> const& attributeNames,
AstNode const* operatorNode, AttributeSideType side, void* data)
: ConditionPart(variable, "", operatorNode, side, data) {
TRI_AttributeNamesToString(attributeNames, attributeName, false);
isExpanded = (attributeName.find("[*]") != std::string::npos);
}
ConditionPart::~ConditionPart() {}
////////////////////////////////////////////////////////////////////////////////
/// @brief true if the condition is completely covered by the other condition
////////////////////////////////////////////////////////////////////////////////
bool ConditionPart::isCoveredBy(ConditionPart const& other) const {
if (variable != other.variable || attributeName != other.attributeName) {
return false;
}
// special cases for IN...
if (!isExpanded && !other.isExpanded &&
other.operatorType == NODE_TYPE_OPERATOR_BINARY_IN &&
other.valueNode->isConstant() && other.valueNode->isArray()) {
if (operatorType == NODE_TYPE_OPERATOR_BINARY_IN &&
valueNode->isConstant() && valueNode->isArray()) {
// compare IN with an IN
// this has quadratic complexity
size_t const n1 = valueNode->numMembers();
size_t const n2 = other.valueNode->numMembers();
// maximum number of comparisons that we will accept
// otherwise the optimization will be aborted
static size_t const MaxComparisons = 2048;
if (n1 * n2 < MaxComparisons) {
for (size_t i = 0; i < n1; ++i) {
auto v = valueNode->getMemberUnchecked(i);
for (size_t j = 0; j < n2; ++j) {
auto w = other.valueNode->getMemberUnchecked(j);
ConditionPartCompareResult res =
ConditionPart::ResultsTable[CompareAstNodes(v, w, true) +
1][0][0];
if (res != CompareResult::OTHER_CONTAINED_IN_SELF &&
res != CompareResult::CONVERT_EQUAL &&
res != CompareResult::IMPOSSIBLE) {
return false;
}
}
}
} else {
std::unordered_set<AstNode const*, AstNodeValueHash, AstNodeValueEqual>
values(512, AstNodeValueHash(), AstNodeValueEqual());
for (size_t i = 0; i < n2; ++i) {
values.emplace(other.valueNode->getMemberUnchecked(i));
}
for (size_t i = 0; i < n1; ++i) {
auto node = valueNode->getMemberUnchecked(i);
if (values.find(node) == values.end()) {
return false;
}
}
}
return true;
}
return false;
}
if (isExpanded && other.isExpanded &&
operatorType == NODE_TYPE_OPERATOR_BINARY_IN &&
other.operatorType == NODE_TYPE_OPERATOR_BINARY_IN &&
other.valueNode->isConstant()) {
if (CompareAstNodes(other.valueNode, valueNode, false) == 0) {
return true;
}
return false;
}
// Results are -1, 0, 1, move to 0, 1, 2 for the lookup:
ConditionPartCompareResult res = ConditionPart::ResultsTable
[CompareAstNodes(other.valueNode, valueNode, true) +
1][other.whichCompareOperation()][whichCompareOperation()];
if (res == CompareResult::OTHER_CONTAINED_IN_SELF ||
res == CompareResult::CONVERT_EQUAL || res == CompareResult::IMPOSSIBLE) {
return true;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief create the condition
////////////////////////////////////////////////////////////////////////////////
Condition::Condition(Ast* ast)
: _ast(ast), _root(nullptr), _isNormalized(false), _isSorted(false) {}
////////////////////////////////////////////////////////////////////////////////
/// @brief destroy the condition
////////////////////////////////////////////////////////////////////////////////
Condition::~Condition() {
// memory for nodes is not owned and thus not freed by the condition
// all nodes belong to the AST
}
//////////////////////////////////////////////////////////////////////////////
/// @brief export the condition as VelocyPack
//////////////////////////////////////////////////////////////////////////////
void Condition::toVelocyPack(arangodb::velocypack::Builder& builder,
bool verbose) const {
if (_root == nullptr) {
VPackObjectBuilder guard(&builder);
} else {
_root->toVelocyPack(builder, verbose);
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief create a condition from JSON
////////////////////////////////////////////////////////////////////////////////
Condition* Condition::fromJson(ExecutionPlan* plan,
arangodb::basics::Json const& json) {
auto condition = std::make_unique<Condition>(plan->getAst());
if (json.isObject() && json.members() != 0) {
// note: the AST is responsible for freeing the AstNode later!
AstNode* node = new AstNode(plan->getAst(), json);
condition->andCombine(node);
}
condition->_isNormalized = true;
condition->_isSorted = false;
return condition.release();
}
////////////////////////////////////////////////////////////////////////////////
/// @brief clone the condition
////////////////////////////////////////////////////////////////////////////////
Condition* Condition::clone() const {
auto copy = std::make_unique<Condition>(_ast);
if (_root != nullptr) {
copy->_root = _root->clone(_ast);
}
copy->_isNormalized = _isNormalized;
return copy.release();
}
////////////////////////////////////////////////////////////////////////////////
/// @brief add a sub-condition to the condition
/// the sub-condition will be AND-combined with the existing condition(s)
////////////////////////////////////////////////////////////////////////////////
void Condition::andCombine(AstNode const* node) {
if (_isNormalized) {
// already normalized
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL,
"cannot and-combine normalized condition");
}
if (_root == nullptr) {
// condition was empty before
_root = _ast->clone(node);
} else {
// condition was not empty before, now AND-merge
_root = _ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND, _root,
_ast->clone(node));
}
TRI_ASSERT(_root != nullptr);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief locate indexes for each condition
/// return value is a pair indicating whether the index can be used for
/// filtering(first) and sorting(second)
////////////////////////////////////////////////////////////////////////////////
std::pair<bool, bool> Condition::findIndexes(
EnumerateCollectionNode const* node, std::vector<Index const*>& usedIndexes,
SortCondition const* sortCondition) {
TRI_ASSERT(usedIndexes.empty());
Variable const* reference = node->outVariable();
if (_root == nullptr) {
// We do not have a condition. But we have a sort!
if (!sortCondition->isEmpty() && sortCondition->isOnlyAttributeAccess() &&
sortCondition->isUnidirectional()) {
size_t const itemsInIndex = node->collection()->count();
double bestCost = 0.0;
Index const* bestIndex = nullptr;
std::vector<Index const*> indexes = node->collection()->getIndexes();
for (auto const& idx : indexes) {
if (idx->sparse) {
// a sparse index may exclude some documents, so it can't be used to
// get a sorted view of the ENTIRE collection
continue;
}
double sortCost = 0.0;
if (indexSupportsSort(idx, reference, sortCondition, itemsInIndex,
sortCost)) {
if (bestIndex == nullptr || sortCost < bestCost) {
bestCost = sortCost;
bestIndex = idx;
}
}
}
if (bestIndex != nullptr) {
usedIndexes.emplace_back(bestIndex);
}
return std::make_pair(false, bestIndex != nullptr);
}
// No Index and no sort condition that
// can be supported by an index.
// Nothing to do here.
return std::make_pair(false, false);
}
// We can only start after DNF transformation
TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR);
bool canUseForFilter = (_root->numMembers() > 0);
bool canUseForSort = false;
for (size_t i = 0; i < _root->numMembers(); ++i) {
auto canUseIndex =
findIndexForAndNode(i, reference, node, usedIndexes, sortCondition);
if (canUseIndex.second && !canUseIndex.first) {
// index can be used for sorting only
// we need to abort further searching and only return one index
TRI_ASSERT(!usedIndexes.empty());
if (usedIndexes.size() > 1) {
auto sortIndex = usedIndexes.back();
usedIndexes.clear();
usedIndexes.emplace_back(sortIndex);
}
TRI_ASSERT(usedIndexes.size() == 1);
if (usedIndexes.back()->sparse) {
// cannot use a sparse index for sorting alone
usedIndexes.clear();
}
return std::make_pair(false, !usedIndexes.empty());
}
canUseForFilter &= canUseIndex.first;
canUseForSort |= canUseIndex.second;
}
if (canUseForFilter) {
_isSorted = sortOrs(reference, usedIndexes);
}
// should always be true here. maybe not in the future in case a collection
// has absolutely no indexes
return std::make_pair(canUseForFilter, canUseForSort);
}
bool Condition::indexSupportsSort(Index const* idx, Variable const* reference,
SortCondition const* sortCondition,
size_t itemsInIndex,
double& estimatedCost) const {
if (idx->isSorted() &&
idx->supportsSortCondition(sortCondition, reference, itemsInIndex,
estimatedCost)) {
// index supports the sort condition
return true;
}
// index does not support the sort condition
if (itemsInIndex > 0) {
estimatedCost = itemsInIndex * std::log2(static_cast<double>(itemsInIndex));
} else {
estimatedCost = 0.0;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get the attributes for a sub-condition that are const
/// (i.e. compared with equality)
////////////////////////////////////////////////////////////////////////////////
std::vector<std::vector<arangodb::basics::AttributeName>> Condition::getConstAttributes (Variable const* reference,
bool includeNull) {
std::vector<std::vector<arangodb::basics::AttributeName>> result;
if (_root == nullptr) {
return result;
}
size_t n = _root->numMembers();
if (n != 1) {
return result;
}
AstNode const* node = _root->getMember(0);
n = node->numMembers();
for (size_t i = 0; i < n; ++i) {
auto member = node->getMember(i);
if (member->type == NODE_TYPE_OPERATOR_BINARY_EQ) {
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>> parts;
auto lhs = member->getMember(0);
auto rhs = member->getMember(1);
if (lhs->isAttributeAccessForVariable(parts) &&
parts.first == reference) {
if (includeNull || (rhs->isConstant() && !rhs->isNullValue())) {
result.emplace_back(std::move(parts.second));
}
}
else if (rhs->isAttributeAccessForVariable(parts) &&
parts.first == reference) {
if (includeNull || (lhs->isConstant() && !lhs->isNullValue())) {
result.emplace_back(std::move(parts.second));
}
}
}
}
return result;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief finds the best index that can match this single node
////////////////////////////////////////////////////////////////////////////////
std::pair<bool, bool> Condition::findIndexForAndNode(
size_t position, Variable const* reference,
EnumerateCollectionNode const* colNode,
std::vector<Index const*>& usedIndexes,
SortCondition const* sortCondition) {
// We can only iterate through a proper DNF
auto node = _root->getMember(position);
TRI_ASSERT(node->type == NODE_TYPE_OPERATOR_NARY_AND);
// number of items in collection
size_t const itemsInCollection = colNode->collection()->count();
Index const* bestIndex = nullptr;
double bestCost = 0.0;
bool bestSupportsFilter = false;
bool bestSupportsSort = false;
std::vector<Index const*> indexes = colNode->collection()->getIndexes();
for (auto const& idx : indexes) {
double filterCost = 0.0;
double sortCost = 0.0;
size_t itemsInIndex = itemsInCollection;
bool supportsFilter = false;
bool supportsSort = false;
// check if the index supports the filter expression
double estimatedCost;
size_t estimatedItems;
if (idx->supportsFilterCondition(node, reference, itemsInIndex,
estimatedItems, estimatedCost)) {
// index supports the filter condition
filterCost = estimatedCost;
// this reduces the number of items left
itemsInIndex = estimatedItems;
supportsFilter = true;
} else {
// index does not support the filter condition
filterCost = itemsInIndex * 1.5;
}
bool const isOnlyAttributeAccess =
(!sortCondition->isEmpty() && sortCondition->isOnlyAttributeAccess());
if (sortCondition->isUnidirectional()) {
// only go in here if we actually have a sort condition and it can in
// general be supported by an index. for this, a sort condition must not
// be empty, must consist only of attribute access, and all attributes
// must be sorted in the direction
if (indexSupportsSort(idx, reference, sortCondition, itemsInIndex,
sortCost)) {
supportsSort = true;
}
}
if (!supportsSort && isOnlyAttributeAccess && node->isOnlyEqualityMatch()) {
// index cannot be used for sorting, but the filter condition consists
// only of equality lookups (==)
// now check if the index fields are the same as the sort condition fields
// e.g. FILTER c.value1 == 1 && c.value2 == 42 SORT c.value1, c.value2
size_t coveredFields =
sortCondition->coveredAttributes(reference, idx->fields);
if (coveredFields == sortCondition->numAttributes() &&
(idx->isSorted() ||
idx->fields.size() == sortCondition->numAttributes())) {
// no sorting needed
sortCost = 0.0;
}
}
// std::cout << "INDEX: " << idx << ", SUPPORTS FILTER: " << supportsFilter
// << ", SUPPORTS SORT: " << supportsSort << ", FILTER COST: " << filterCost
// << ", SORT COST: " << sortCost << "\n";
if (!supportsFilter && !supportsSort) {
continue;
}
double const totalCost = filterCost + sortCost;
if (bestIndex == nullptr || totalCost < bestCost) {
bestIndex = idx;
bestCost = totalCost;
bestSupportsFilter = supportsFilter;
bestSupportsSort = supportsSort;
}
}
if (bestIndex == nullptr) {
return std::make_pair(false, false);
}
_root->changeMember(position,
bestIndex->specializeCondition(node, reference));
usedIndexes.emplace_back(bestIndex);
return std::make_pair(bestSupportsFilter, bestSupportsSort);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief normalize the condition
/// this will convert the condition into its disjunctive normal form
////////////////////////////////////////////////////////////////////////////////
void Condition::normalize(ExecutionPlan* plan) {
if (_isNormalized) {
// already normalized
return;
}
_root = transformNode(_root);
_root = fixRoot(_root, 0);
optimize(plan);
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
if (_root != nullptr) {
// _root->dump(0);
validateAst(_root, 0);
}
#endif
}
////////////////////////////////////////////////////////////////////////////////
/// @brief normalize the condition
/// this will convert the condition into its disjunctive normal form
/// in this case we don't re-run the optimizer. Its expected that you
/// don't want to remove eventually unneccessary filters.
////////////////////////////////////////////////////////////////////////////////
void Condition::normalize() {
if (_isNormalized) {
// already normalized
return;
}
_root = transformNode(_root);
_root = fixRoot(_root, 0);
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
if (_root != nullptr) {
// _root->dump(0);
validateAst(_root, 0);
}
#endif
}
////////////////////////////////////////////////////////////////////////////////
/// @brief removes condition parts from another
////////////////////////////////////////////////////////////////////////////////
AstNode* Condition::removeIndexCondition(Variable const* variable,
AstNode* other) {
if (_root == nullptr || other == nullptr) {
return _root;
}
TRI_ASSERT(_root != nullptr);
TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR);
TRI_ASSERT(other != nullptr);
TRI_ASSERT(other->type == NODE_TYPE_OPERATOR_NARY_OR);
if (other->numMembers() != 1 && _root->numMembers() != 1) {
return _root;
}
auto andNode = _root->getMemberUnchecked(0);
TRI_ASSERT(andNode->type == NODE_TYPE_OPERATOR_NARY_AND);
size_t const n = andNode->numMembers();
std::unordered_set<size_t> toRemove;
for (size_t i = 0; i < n; ++i) {
auto operand = andNode->getMemberUnchecked(i);
if (operand->isComparisonOperator() &&
operand->type != NODE_TYPE_OPERATOR_BINARY_NE &&
operand->type != NODE_TYPE_OPERATOR_BINARY_NIN) {
auto lhs = operand->getMember(0);
auto rhs = operand->getMember(1);
if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>>
result;
if (lhs->isAttributeAccessForVariable(result) &&
result.first == variable) {
ConditionPart current(variable, result.second, operand,
ATTRIBUTE_LEFT, nullptr);
if (canRemove(current, other)) {
toRemove.emplace(i);
}
}
}
if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
rhs->type == NODE_TYPE_EXPANSION) {
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>>
result;
if (rhs->isAttributeAccessForVariable(result) &&
result.first == variable) {
ConditionPart current(variable, result.second, operand,
ATTRIBUTE_RIGHT, nullptr);
if (canRemove(current, other)) {
toRemove.emplace(i);
}
}
}
}
}
if (toRemove.empty()) {
return _root;
}
// build a new AST condition
AstNode* newNode = nullptr;
for (size_t i = 0; i < n; ++i) {
if (toRemove.find(i) == toRemove.end()) {
auto what = andNode->getMemberUnchecked(i);
if (newNode == nullptr) {
// the only node so far
newNode = what;
} else {
// AND-combine with existing node
newNode = _ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND,
newNode, what);
}
}
}
return newNode;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief remove (now) invalid variables from the condition
////////////////////////////////////////////////////////////////////////////////
bool Condition::removeInvalidVariables(
std::unordered_set<Variable const*> const& validVars) {
if (_root == nullptr) {
return false;
}
TRI_ASSERT(_root != nullptr);
TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR);
bool isEmpty = false;
// handle sub nodes of top-level OR node
size_t const n = _root->numMembers();
std::unordered_set<Variable const*> varsUsed;
for (size_t i = 0; i < n; ++i) {
auto andNode = _root->getMemberUnchecked(i);
TRI_ASSERT(andNode->type == NODE_TYPE_OPERATOR_NARY_AND);
size_t nAnd = andNode->numMembers();
for (size_t j = 0; j < nAnd; /* no hoisting */) {
// check which variables are used in each AND
varsUsed.clear();
Ast::getReferencedVariables(andNode->getMemberUnchecked(j), varsUsed);
bool invalid = false;
for (auto& it : varsUsed) {
if (validVars.find(it) == validVars.end()) {
// found an invalid variable here...
invalid = true;
break;
}
}
if (invalid) {
andNode->removeMemberUnchecked(j);
// repeat with some member index
TRI_ASSERT(nAnd > 0);
--nAnd;
if (nAnd == 0) {
isEmpty = true;
}
} else {
++j;
}
}
}
return isEmpty;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief sort ORs for the same attribute so they are in ascending value
/// order. this will only work if the condition is for a single attribute
/// the usedIndexes vector may also be re-sorted
////////////////////////////////////////////////////////////////////////////////
bool Condition::sortOrs(Variable const* variable,
std::vector<Index const*>& usedIndexes) {
if (_root == nullptr) {
return true;
}
size_t const n = _root->numMembers();
if (n < 2) {
return true;
}
if (n != usedIndexes.size()) {
// sorting will break if the number of ORs is unequal to the number of
// indexes
// but we shouldn't have got here then
TRI_ASSERT(false);
return false;
}
typedef std::pair<AstNode*, Index const*> ConditionData;
std::vector<ConditionData*> conditionData;
auto cleanup = [&conditionData]() -> void {
for (auto& it : conditionData) {
delete it;
}
};
TRI_DEFER(cleanup());
std::vector<ConditionPart> parts;
parts.reserve(n);
for (size_t i = 0; i < n; ++i) {
// sort the conditions of each AND
auto sub = _root->getMemberUnchecked(i);
TRI_ASSERT(sub != nullptr && sub->type == NODE_TYPE_OPERATOR_NARY_AND);
size_t const nAnd = sub->numMembers();
if (nAnd != 1) {
// we can't handle this one
return false;
}
auto operand = sub->getMemberUnchecked(0);
if (!operand->isComparisonOperator()) {
return false;
}
if (operand->type == NODE_TYPE_OPERATOR_BINARY_NE ||
operand->type == NODE_TYPE_OPERATOR_BINARY_NIN) {
return false;
}
auto lhs = operand->getMember(0);
auto rhs = operand->getMember(1);
if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>>
result;
if (rhs->isConstant() && lhs->isAttributeAccessForVariable(result) &&
result.first == variable &&
(operand->type != NODE_TYPE_OPERATOR_BINARY_IN || rhs->isArray())) {
// create the condition data struct on the heap
auto data = std::make_unique<ConditionData>(sub, usedIndexes[i]);
// push it into an owning vector
conditionData.emplace_back(data.get());
// vector is now responsible for data
auto p = data.release();
// also add the pointer to the (non-owning) parts vector
parts.emplace_back(ConditionPart(result.first, result.second, operand,
ATTRIBUTE_LEFT, p));
}
}
if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
rhs->type == NODE_TYPE_EXPANSION) {
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>>
result;
if (lhs->isConstant() && rhs->isAttributeAccessForVariable(result) &&
result.first == variable) {
// create the condition data struct on the heap
auto data = std::make_unique<ConditionData>(sub, usedIndexes[i]);
// push it into an owning vector
conditionData.emplace_back(data.get());
// vector is now responsible for data
auto p = data.release();
// also add the pointer to the (non-owning) parts vector
parts.emplace_back(ConditionPart(result.first, result.second, operand,
ATTRIBUTE_RIGHT, p));
}
}
}
if (parts.size() != _root->numMembers()) {
return false;
}
// check if all parts use the same variable and attribute
for (size_t i = 1; i < n; ++i) {
auto& lhs = parts[i - 1];
auto& rhs = parts[i];
if (lhs.variable != rhs.variable ||
lhs.attributeName != rhs.attributeName) {
// oops, the different OR parts are on different variables or attributes
return false;
}
}
size_t previousIn = SIZE_MAX;
for (size_t i = 0; i < n; ++i) {
auto& p = parts[i];
if (p.operatorType == NODE_TYPE_OPERATOR_BINARY_IN &&
p.valueNode->isArray()) {
TRI_ASSERT(p.valueNode->isConstant());
if (previousIn != SIZE_MAX) {
// merge IN with IN
TRI_ASSERT(previousIn < i);
auto emptyArray = _ast->createNodeArray();
auto mergedIn = _ast->createNodeUnionizedArray(
parts[previousIn].valueNode, p.valueNode);
parts[previousIn].valueNode = mergedIn;
parts[i].valueNode = emptyArray;
_root->getMember(previousIn)->getMember(0)->changeMember(1, mergedIn);
_root->getMember(i)->getMember(0)->changeMember(1, emptyArray);
} else {
// note first IN
previousIn = i;
}
}
}
// now sort all conditions by variable name, attribute name, attribute value
std::sort(parts.begin(), parts.end(),
[](ConditionPart const& lhs, ConditionPart const& rhs) -> bool {
// compare variable names first
auto res = lhs.variable->name.compare(rhs.variable->name);
if (res != 0) {
return res < 0;
}
// compare attribute names next
res = lhs.attributeName.compare(rhs.attributeName);
if (res != 0) {
return res < 0;
}
// compare attribute values next
auto ll = lhs.lowerBound();
auto lr = rhs.lowerBound();
if (ll == nullptr && lr != nullptr) {
// left lower bound is not set but right
return true;
} else if (ll != nullptr && lr == nullptr) {
// left lower bound is set but not right
return false;
}
if (ll != nullptr && lr != nullptr) {
// both lower bounds are set
res = CompareAstNodes(ll, lr, true);
if (res != 0) {
return res < 0;
}
}
if (lhs.isLowerInclusive() && !rhs.isLowerInclusive()) {
return true;
}
if (rhs.isLowerInclusive() && !lhs.isLowerInclusive()) {
return false;
}
// all things equal
return false;
});
/*
auto l = 0;
for (size_t r = 1; r < n; ++r) {
auto& l = parts[l].data;
auto& r = parts[r].data;
if (l.higher > r.higher ||
(l.higher == r.higher && (l.inclusive || ! r.inclusive)) {
// r is contained in l => remove r (i.e. do nothing)
r.data = nullptr;
}
else if (r.lower < l.higher || (r.lower == l.higher && (r.inclusive ||
l.inclusive))) {
// r extends l => fuse l.lower & r.higher
r.data = nullptr;
newOrNode->getMember(newor
}
else {
// disjoint ranges. simply add the node
newOrNode->addMember(r);
}
}
*/
TRI_ASSERT(parts.size() == conditionData.size());
// clean up
usedIndexes.clear();
while (_root->numMembers()) {
_root->removeMemberUnchecked(0);
}
// and rebuild
for (size_t i = 0; i < n; ++i) {
if (parts[i].operatorType == NODE_TYPE_OPERATOR_BINARY_IN &&
parts[i].valueNode->isArray() &&
parts[i].valueNode->numMembers() == 0) {
// can optimize away empty IN array
continue;
}
auto conditionData = static_cast<ConditionData*>(parts[i].data);
_root->addMember(conditionData->first);
usedIndexes.emplace_back(conditionData->second);
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief optimize the condition expression tree
////////////////////////////////////////////////////////////////////////////////
void Condition::optimize(ExecutionPlan* plan) {
if (_root == nullptr) {
return;
}
TRI_ASSERT(_root != nullptr);
TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR);
// handle sub nodes of top-level OR node
size_t n = _root->numMembers();
size_t r = 0;
while (r < n) { // foreach OR-Node
bool retry = false;
auto andNode = _root->getMemberUnchecked(r);
TRI_ASSERT(andNode->type == NODE_TYPE_OPERATOR_NARY_AND);
restartThisOrItem:
size_t andNumMembers = andNode->numMembers();
// deduplicate and sort all IN arrays
size_t inComparisons = 0;
for (size_t j = 0; j < andNumMembers; ++j) {
auto op = andNode->getMemberUnchecked(j);
if (op->type == NODE_TYPE_OPERATOR_BINARY_IN) {
++inComparisons;
}
deduplicateInOperation(op);
}
andNumMembers = andNode->numMembers();
if (andNumMembers <= 1) {
// simple AND item with 0 or 1 members. nothing to do
++r;
n = _root->numMembers();
continue;
}
TRI_ASSERT(andNumMembers > 1);
if (inComparisons > 0) {
// move IN operations to the front to make comparison code below simpler
std::vector<AstNode*> stack;
size_t p = andNumMembers - 1;
for (size_t j = p;; --j) {
auto op = andNode->getMemberUnchecked(j);
if (op->type == NODE_TYPE_OPERATOR_BINARY_IN) {
stack.push_back(op);
} else {
if (p != j) {
andNode->changeMember(p, op);
}
--p;
}
if (j == 0) {
break;
}
}
p = 0;
while (!stack.empty()) {
auto it = stack.back();
andNode->changeMember(p++, it);
stack.pop_back();
}
}
// optimization is only necessary if an AND node has multiple members
VariableUsageType variableUsage;
for (size_t j = 0; j < andNumMembers; ++j) {
auto operand = andNode->getMemberUnchecked(j);
if (operand->isComparisonOperator()) {
auto lhs = operand->getMember(0);
auto rhs = operand->getMember(1);
if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
storeAttributeAccess(variableUsage, lhs, j, ATTRIBUTE_LEFT);
}
if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
rhs->type == NODE_TYPE_EXPANSION) {
storeAttributeAccess(variableUsage, rhs, j, ATTRIBUTE_RIGHT);
}
}
}
// now find the variables and attributes for which there are multiple
// conditions
for (auto const& it : variableUsage) { // foreach sub-and-node
auto variable = it.first;
for (auto const& it2 : it.second) { // cross compare sub-and-nodes
auto const& attributeName = it2.first;
auto const& positions = it2.second;
if (positions.size() <= 1) {
// none or only one occurence of the attribute
continue;
}
// multiple occurrences of the same attribute
auto leftNode = andNode->getMemberUnchecked(positions[0].first);
ConditionPart current(variable, attributeName, leftNode,
positions[0].second, nullptr);
if (!current.valueNode->isConstant()) {
continue;
}
size_t j = 1;
while (j < positions.size()) {
TRI_ASSERT(j != 0);
auto rightNode = andNode->getMemberUnchecked(positions[j].first);
ConditionPart other(variable, attributeName, rightNode,
positions[j].second, nullptr);
if (!other.valueNode->isConstant()) {
++j;
continue;
}
// IN-merging
if (leftNode->type == NODE_TYPE_OPERATOR_BINARY_IN &&
leftNode->getMemberUnchecked(1)->isConstant()) {
TRI_ASSERT(leftNode->numMembers() == 2);
if (rightNode->type == NODE_TYPE_OPERATOR_BINARY_IN &&
rightNode->getMemberUnchecked(1)->isConstant()) {
// merge IN with IN on same attribute
TRI_ASSERT(rightNode->numMembers() == 2);
auto merged = _ast->createNodeBinaryOperator(
NODE_TYPE_OPERATOR_BINARY_IN, leftNode->getMemberUnchecked(0),
mergeInOperations(leftNode, rightNode));
andNode->removeMemberUnchecked(positions[j].first);
andNode->changeMember(positions[0].first, merged);
goto restartThisOrItem;
} else if (rightNode->isSimpleComparisonOperator()) {
// merge other comparison operator with IN
TRI_ASSERT(rightNode->numMembers() == 2);
auto inNode = _ast->createNodeArray();
auto values = leftNode->getMemberUnchecked(1);
// enumerate over IN list
for (size_t k = 0; k < values->numMembers(); ++k) {
auto value = values->getMemberUnchecked(k);
ConditionPartCompareResult res = ConditionPart::ResultsTable
[CompareAstNodes(value, other.valueNode, true) +
1][0 /*NODE_TYPE_OPERATOR_BINARY_EQ*/]
[other.whichCompareOperation()];
bool const keep =
(res == CompareResult::OTHER_CONTAINED_IN_SELF ||
res == CompareResult::CONVERT_EQUAL);
if (keep) {
inNode->addMember(value);
}
}
if (inNode->numMembers() == 0) {
// no values left after merging -> IMPOSSIBLE
_root->removeMemberUnchecked(r);
retry = true;
goto fastForwardToNextOrItem;
}
// use the new array of values
leftNode->changeMember(1, inNode);
// remove the other operator
andNode->removeMemberUnchecked(positions[j].first);
goto restartThisOrItem;
}
}
// end of IN-merging
// Results are -1, 0, 1, move to 0, 1, 2 for the lookup:
ConditionPartCompareResult res = ConditionPart::ResultsTable
[CompareAstNodes(current.valueNode, other.valueNode, true) + 1]
[current.whichCompareOperation()][other.whichCompareOperation()];
switch (res) {
case CompareResult::IMPOSSIBLE: {
// impossible condition
// j = positions.size();
// we remove this one, so fast forward the loops to their end:
_root->removeMemberUnchecked(r);
retry = true;
goto fastForwardToNextOrItem;
}
case CompareResult::SELF_CONTAINED_IN_OTHER: {
TRI_ASSERT(!positions.empty());
andNode->removeMemberUnchecked(positions.at(0).first);
goto restartThisOrItem;
}
case CompareResult::OTHER_CONTAINED_IN_SELF: {
TRI_ASSERT(j < positions.size());
andNode->removeMemberUnchecked(positions.at(j).first);
goto restartThisOrItem;
}
case CompareResult::CONVERT_EQUAL: { // both ok, now transform to a
// == x (== y)
TRI_ASSERT(!positions.empty());
TRI_ASSERT(j < positions.size());
andNode->removeMemberUnchecked(positions.at(j).first);
auto origNode =
andNode->getMemberUnchecked(positions.at(0).first);
auto newNode =
plan->getAst()->createNode(NODE_TYPE_OPERATOR_BINARY_EQ);
for (size_t iMemb = 0; iMemb < origNode->numMembers(); iMemb++) {
newNode->addMember(origNode->getMemberUnchecked(iMemb));
}
andNode->changeMember(positions.at(0).first, newNode);
goto restartThisOrItem;
}
case CompareResult::DISJOINT: {
break;
}
case CompareResult::UNKNOWN: {
break;
}
}
++j;
}
} // cross compare sub-and-nodes
} // foreach sub-and-node
fastForwardToNextOrItem:
if (!retry) {
// root nodes hasn't changed. go to next sub-node!
++r;
}
// number of root sub-nodes has probably changed.
// now recalculate the number and don't modify r!
n = _root->numMembers();
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief registers an attribute access for a particular (collection) variable
////////////////////////////////////////////////////////////////////////////////
void Condition::storeAttributeAccess(VariableUsageType& variableUsage,
AstNode const* node, size_t position,
AttributeSideType side) {
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>>
result;
if (!node->isAttributeAccessForVariable(result)) {
return;
}
auto variable = result.first;
if (variable != nullptr) {
auto it = variableUsage.find(variable);
if (it == variableUsage.end()) {
// nothing recorded yet for variable
it = variableUsage.emplace(variable, AttributeUsageType()).first;
}
std::string attributeName;
TRI_AttributeNamesToString(result.second, attributeName, false);
auto it2 = (*it).second.find(attributeName);
if (it2 == (*it).second.end()) {
// nothing recorded yet for attribute name in this variable
it2 = (*it).second.emplace(attributeName, UsagePositionType()).first;
}
auto& dst = (*it2).second;
if (!dst.empty() && dst.back().first == position) {
// already have this attribute for this variable. can happen in case a
// condition refers to itself (e.g. a.x == a.x)
// in this case, we won't optimize it
dst.erase(dst.begin() + dst.size() - 1);
} else {
dst.emplace_back(position, side);
}
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief validate the condition's AST
////////////////////////////////////////////////////////////////////////////////
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
void Condition::validateAst(AstNode const* node, int level) {
if (level == 0) {
TRI_ASSERT(node->type == NODE_TYPE_OPERATOR_NARY_OR);
}
size_t const n = node->numMembers();
for (size_t i = 0; i < n; ++i) {
auto sub = node->getMemberUnchecked(i);
if (level == 0) {
TRI_ASSERT(sub->type == NODE_TYPE_OPERATOR_NARY_AND);
} else {
TRI_ASSERT(sub->type != NODE_TYPE_OPERATOR_NARY_OR &&
sub->type != NODE_TYPE_OPERATOR_NARY_AND);
}
validateAst(sub, level + 1);
}
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// @brief checks if the current condition is covered by the other
////////////////////////////////////////////////////////////////////////////////
bool Condition::canRemove(ConditionPart const& me,
arangodb::aql::AstNode const* otherCondition) const {
TRI_ASSERT(otherCondition != nullptr);
TRI_ASSERT(otherCondition->type == NODE_TYPE_OPERATOR_NARY_OR);
auto andNode = otherCondition->getMemberUnchecked(0);
TRI_ASSERT(andNode->type == NODE_TYPE_OPERATOR_NARY_AND);
size_t const n = andNode->numMembers();
for (size_t i = 0; i < n; ++i) {
auto operand = andNode->getMemberUnchecked(i);
if (operand->isComparisonOperator()) {
auto lhs = operand->getMember(0);
auto rhs = operand->getMember(1);
if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>>
result;
if (lhs->isAttributeAccessForVariable(result)) {
if (rhs->isConstant()) {
ConditionPart indexCondition(result.first, result.second, operand,
ATTRIBUTE_LEFT, nullptr);
if (me.isCoveredBy(indexCondition)) {
return true;
}
}
// non-constant condition
else if (me.operatorType == operand->type &&
me.valueNode->toString() == rhs->toString()) {
return true;
}
}
}
if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS ||
rhs->type == NODE_TYPE_EXPANSION) {
std::pair<Variable const*, std::vector<arangodb::basics::AttributeName>>
result;
if (rhs->isAttributeAccessForVariable(result)) {
if (lhs->isConstant()) {
ConditionPart indexCondition(result.first, result.second, operand,
ATTRIBUTE_RIGHT, nullptr);
if (me.isCoveredBy(indexCondition)) {
return true;
}
}
// non-constant condition
else if (me.operatorType == operand->type &&
me.valueNode->toString() == lhs->toString()) {
return true;
}
}
}
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief deduplicate IN condition values (and sort them)
/// this may modify the node in place
////////////////////////////////////////////////////////////////////////////////
void Condition::deduplicateInOperation(AstNode* operation) {
if (operation->type != NODE_TYPE_OPERATOR_BINARY_IN) {
return;
}
// found an IN
TRI_ASSERT(operation->numMembers() == 2);
auto rhs = operation->getMemberUnchecked(1);
if (!rhs->isArray() || !rhs->isConstant()) {
return;
}
auto deduplicated = _ast->deduplicateArray(rhs);
if (deduplicated != rhs) {
// there were duplicates
operation->changeMember(1, const_cast<AstNode*>(deduplicated));
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief merge the values from two IN operations
////////////////////////////////////////////////////////////////////////////////
AstNode* Condition::mergeInOperations(AstNode const* lhs, AstNode const* rhs) {
TRI_ASSERT(lhs->type == NODE_TYPE_OPERATOR_BINARY_IN);
TRI_ASSERT(rhs->type == NODE_TYPE_OPERATOR_BINARY_IN);
auto lValue = lhs->getMemberUnchecked(1);
auto rValue = rhs->getMemberUnchecked(1);
TRI_ASSERT(lValue->isArray() && lValue->isConstant());
TRI_ASSERT(rValue->isArray() && rValue->isConstant());
return _ast->createNodeIntersectedArray(lValue, rValue);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief merges the current node with the sub nodes of same type
////////////////////////////////////////////////////////////////////////////////
AstNode* Condition::collapse(AstNode const* node) {
TRI_ASSERT(node->type == NODE_TYPE_OPERATOR_NARY_OR ||
node->type == NODE_TYPE_OPERATOR_NARY_AND);
auto newOperator = _ast->createNode(node->type);
size_t const n = node->numMembers();
for (size_t i = 0; i < n; ++i) {
auto sub = node->getMemberUnchecked(i);
bool const isSame = (node->type == sub->type) ||
(node->type == NODE_TYPE_OPERATOR_NARY_OR && sub->type == NODE_TYPE_OPERATOR_BINARY_OR) ||
(node->type == NODE_TYPE_OPERATOR_NARY_AND && sub->type == NODE_TYPE_OPERATOR_BINARY_AND);
if (isSame) {
// merge
for (size_t j = 0; j < sub->numMembers(); ++j) {
newOperator->addMember(sub->getMemberUnchecked(j));
}
} else {
newOperator->addMember(sub);
}
}
return newOperator;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief converts binary logical operators into n-ary operators
////////////////////////////////////////////////////////////////////////////////
AstNode* Condition::transformNode(AstNode* node) {
if (node == nullptr) {
return nullptr;
}
if (node->type == NODE_TYPE_OPERATOR_BINARY_AND ||
node->type == NODE_TYPE_OPERATOR_BINARY_OR) {
// convert binary AND/OR into n-ary AND/OR
TRI_ASSERT(node->numMembers() == 2);
auto old = node;
// create a new n-ary node
node = _ast->createNode(Ast::NaryOperatorType(old->type));
node->addMember(old->getMember(0));
node->addMember(old->getMember(1));
}
TRI_ASSERT(node->type != NODE_TYPE_OPERATOR_BINARY_AND &&
node->type != NODE_TYPE_OPERATOR_BINARY_OR);
if (node->type == NODE_TYPE_OPERATOR_NARY_AND) {
bool processChildren = false;
bool mustCollapse = false;
size_t const n = node->numMembers();
for (size_t i = 0; i < n; ++i) {
// process subnodes first
auto sub = transformNode(node->getMemberUnchecked(i));
node->changeMember(i, sub);
if (sub->type == NODE_TYPE_OPERATOR_NARY_OR ||
sub->type == NODE_TYPE_OPERATOR_BINARY_OR) {
processChildren = true;
} else if (sub->type == NODE_TYPE_OPERATOR_NARY_AND ||
sub->type == NODE_TYPE_OPERATOR_BINARY_AND) {
mustCollapse = true;
}
}
if (processChildren) {
// we found an AND with at least one OR child, e.g.
// AND
// OR c
// a b
//
// we need to move the OR to the top by converting the condition to:
// OR
// AND AND
// a c b c
//
auto newOperator = _ast->createNode(NODE_TYPE_OPERATOR_NARY_OR);
std::vector<PermutationState> permutationStates;
for (size_t i = 0; i < n; ++i) {
auto sub = node->getMemberUnchecked(i);
if (sub->type == NODE_TYPE_OPERATOR_NARY_OR) {
permutationStates.emplace_back(
PermutationState(sub, sub->numMembers()));
} else {
permutationStates.emplace_back(PermutationState(sub, 1));
}
}
size_t current = 0;
bool done = false;
size_t const numPermutations = permutationStates.size();
while (!done) {
auto andOperator = _ast->createNode(NODE_TYPE_OPERATOR_NARY_AND);
for (size_t i = 0; i < numPermutations; ++i) {
auto state = permutationStates[i];
andOperator->addMember(state.getValue()->clone(_ast));
}
newOperator->addMember(andOperator);
// now permute
while (true) {
if (++permutationStates[current].current <
permutationStates[current].n) {
current = 0;
// abort inner iteration
break;
}
permutationStates[current].current = 0;
if (++current >= n) {
done = true;
break;
}
// next inner iteration
}
}
node = transformNode(newOperator);
}
if (mustCollapse) {
node = collapse(node);
}
return node;
}
if (node->type == NODE_TYPE_OPERATOR_NARY_OR) {
size_t const n = node->numMembers();
bool mustCollapse = false;
for (size_t i = 0; i < n; ++i) {
auto sub = transformNode(node->getMemberUnchecked(i));
node->changeMember(i, sub);
if (sub->type == NODE_TYPE_OPERATOR_NARY_OR) {
mustCollapse = true;
}
}
if (mustCollapse) {
node = collapse(node);
}
return node;
}
if (node->type == NODE_TYPE_OPERATOR_UNARY_NOT) {
// push down logical negations
auto sub = node->getMemberUnchecked(0);
if (sub->type == NODE_TYPE_OPERATOR_NARY_AND ||
sub->type == NODE_TYPE_OPERATOR_BINARY_AND ||
sub->type == NODE_TYPE_OPERATOR_NARY_OR ||
sub->type == NODE_TYPE_OPERATOR_BINARY_OR) {
size_t const n = sub->numMembers();
AstNode* newOperator = nullptr;
if (sub->type == NODE_TYPE_OPERATOR_NARY_AND ||
sub->type == NODE_TYPE_OPERATOR_BINARY_AND) {
// ! (a && b) => (! a) || (! b)
newOperator = _ast->createNode(NODE_TYPE_OPERATOR_NARY_OR);
} else {
// ! (a || b) => (! a) && (! b)
newOperator = _ast->createNode(NODE_TYPE_OPERATOR_NARY_AND);
}
for (size_t i = 0; i < n; ++i) {
auto negated = transformNode(_ast->createNodeUnaryOperator(
NODE_TYPE_OPERATOR_UNARY_NOT, sub->getMemberUnchecked(i)));
auto optimized = _ast->optimizeNotExpression(negated);
newOperator->addMember(optimized);
}
return newOperator;
}
node->changeMember(0, transformNode(sub));
}
return node;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief Creates a top-level OR node if it does not already exist, and make
/// sure that all second level nodes are AND nodes. Additionally, this step will
/// remove all NOP nodes.
////////////////////////////////////////////////////////////////////////////////
AstNode* Condition::fixRoot(AstNode* node, int level) {
if (node == nullptr) {
return nullptr;
}
AstNodeType type;
if (level == 0) {
type = NODE_TYPE_OPERATOR_NARY_OR;
} else {
type = NODE_TYPE_OPERATOR_NARY_AND;
}
// check if first-level node is an OR node
if (node->type != type) {
// create new root node
node = _ast->createNodeNaryOperator(type, node);
}
size_t const n = node->numMembers();
size_t j = 0;
for (size_t i = 0; i < n; ++i) {
auto sub = node->getMemberUnchecked(i);
if (sub->type == NODE_TYPE_NOP) {
// ignore this node
continue;
}
if (level == 0) {
// recurse into next level
node->changeMember(j, fixRoot(sub, level + 1));
} else if (i != j) {
node->changeMember(j, sub);
}
++j;
}
if (j != n) {
// adjust number of members (because of the NOP nodes removes)
node->reduceMembers(j);
}
return node;
}