//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Jan Steemann //////////////////////////////////////////////////////////////////////////////// #include "Condition.h" #include "Aql/Ast.h" #include "Aql/AstNode.h" #include "Aql/Collection.h" #include "Aql/ExecutionPlan.h" #include "Aql/Quantifier.h" #include "Aql/Query.h" #include "Aql/SortCondition.h" #include "Aql/Variable.h" #include "Basics/AttributeNameParser.h" #include "Basics/Exceptions.h" #include "Logger/Logger.h" #include "Transaction/Methods.h" #ifdef _WIN32 // turn off warnings about too long type name for debug symbols blabla in MSVC // only... #pragma warning(disable : 4503) #endif using namespace arangodb; using namespace arangodb::aql; using CompareResult = ConditionPartCompareResult; namespace { // sort comparisons so that > and >= come before < and <=, and that // != and > come before == // we use this to some advantage when we check the conditions for a sparse // index later. // if a sparse index is asked whether it can supported a condition such as // `attr < value1`, this range would include `null`, which the sparse index // cannot provide. // however, if we first check other conditions we may find a condition on // the same attribute, e.g. `attr > value2`. // this other condition may exclude `null` so we then use the full range // `value2 < attr < value1` and do not have to discard sub-conditions anymore // we can also benefit from sorting != before == for hash indexes, if there // is a condition that excludes null (e.g. != null). if this is tracked first, // we are sure the index attribute value cannot be null and we can still use // the sparse index std::function const operationWeight = [](AstNode const* node) { switch (node->type) { case NODE_TYPE_OPERATOR_BINARY_NE: // != before ==, e.g. attr != null && attr == FUNC(abc) for hash // indexes return 1; case NODE_TYPE_OPERATOR_BINARY_GT: // > before others <, e.g. attr > null && attr < abc return 2; case NODE_TYPE_OPERATOR_BINARY_GE: // >= before others <, e.g. attr >= null && attr < abc return 3; case NODE_TYPE_OPERATOR_BINARY_EQ: // != before ==, e.g. attr != null && attr == FUNC(abc) for hash // indexes return 4; case NODE_TYPE_OPERATOR_BINARY_IN: return 5; case NODE_TYPE_OPERATOR_BINARY_NIN: return 6; case NODE_TYPE_OPERATOR_BINARY_LT: // < after others, e.g. attr > null && attr < abc return 7; case NODE_TYPE_OPERATOR_BINARY_LE: // <= after others, e.g. attr >= null && attr <= abc return 8; default: // non-comparison types can come after comparisons return 9; } }; struct PermutationState { PermutationState(arangodb::aql::AstNode const* value, size_t n) : value(value), current(0), n(n) {} arangodb::aql::AstNode const* getValue() const { if (value->type == arangodb::aql::NODE_TYPE_OPERATOR_BINARY_OR || value->type == arangodb::aql::NODE_TYPE_OPERATOR_NARY_OR) { TRI_ASSERT(current < n); return value->getMember(current); } TRI_ASSERT(current == 0); return value; } arangodb::aql::AstNode const* value; size_t current; size_t const n; }; //------------------------------------------------------------------------ // Rules for single-valued variables //------------------------------------------------------------------------ // | | a == y | a != y | a < y | a <= y | a >= y | a > y // -------|------------------|--------|--------|--------|--------|-------- // x < y | | IMP | OIS | OIS | OIS | IMP | IMP // x == y | a == x | OIS | IMP | IMP | OIS | OIS | IMP // x > y | | IMP | OIS | IMP | IMP | OIS | OIS // -------|------------------|--------|--------|--------|--------|-------- // x < y | | SIO | DIJ | DIJ | DIJ | SIO | SIO // x == y | a != x | IMP | OIS | SIO | DIJ | DIJ | SIO // x > y | | SIO | DIJ | SIO | SIO | DIJ | DIJ // -------|------------------|--------|--------|--------|--------|-------- // x < y | | IMP | OIS | OIS | OIS | IMP | IMP // x == y | a < x | IMP | OIS | OIS | OIS | IMP | IMP // x > y | | SIO | DIJ | SIO | SIO | DIJ | DIJ // -------|------------------|--------|--------|--------|--------|-------- // x < y | | IMP | OIS | OIS | OIS | IMP | IMP // x == y | a <= x | SIO | DIJ | SIO | OIS | CEQ | IMP // x > y | | SIO | DIJ | SIO | SIO | DIJ | DIJ // -------|------------------|--------|--------|--------|--------|-------- // x < y | | SIO | DIJ | DIJ | DIJ | SIO | SIO // x == y | a >= x | SIO | DIJ | IMP | CEQ | OIS | SIO // x > y | | IMP | OIS | IMP | IMP | OIS | OIS // -------|------------------|--------|--------|--------|--------|-------- // x < y | | SIO | DIJ | DIJ | DIJ | SIO | SIO // x == y | a > x | IMP | OIS | IMP | IMP | OIS | OIS // x > y | | IMP | OIS | IMP | IMP | OIS | OIS //------------------------------------------------------------------------ // the 7th column is here as fallback if the operation is not in the table // above. // IMP -> IMPOSSIBLE -> empty result -> the complete AND set of conditions can // be dropped. // CEQ -> CONVERT_EQUAL -> both conditions can be combined to a equals x. // DIJ -> DISJOINT -> neither condition is a consequence of the other -> both // have to stay in place. // SIO -> SELF_CONTAINED_IN_OTHER -> the left condition is a consequence of the // right condition // OIS -> OTHER_CONTAINED_IN_SELF -> the right condition is a consequence of the // left condition // If a condition (A) is a consequence of another (B), the solution set of A is // larger than that of B // -> A can be dropped. ConditionPartCompareResult const ResultsTable[3][7][7] = { {// X < Y {IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT, SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT}, {IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, DISJOINT}, {IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT, SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT, SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT}, {DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}}, {// X == Y {OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, DISJOINT}, {IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, SELF_CONTAINED_IN_OTHER, DISJOINT}, {IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER, OTHER_CONTAINED_IN_SELF, CONVERT_EQUAL, IMPOSSIBLE, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, IMPOSSIBLE, CONVERT_EQUAL, OTHER_CONTAINED_IN_SELF, SELF_CONTAINED_IN_OTHER, DISJOINT}, {IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT}, {DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}}, {// X > Y {IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT}, {IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT}, {IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT}, {DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}}}; //------------------------------------------------------------------------ // Rules for multi-valued variables //------------------------------------------------------------------------ // | | a == y | a != y | a < y | a <= y | a >= y | a > y // -------|------------------|--------|--------|--------|--------|-------- // x < y | | DIJ | DIJ | OIS | OIS | DIJ | DIJ // x == y | a == x | OIS | IMP | DIJ | OIS | OIS | DIJ // x > y | | DIJ | DIJ | DIJ | DIJ | OIS | OIS // -------|------------------|--------|--------|--------|--------|-------- // x < y | | DIJ | DIJ | DIJ | DIJ | DIJ | DIJ // x == y | a != x | IMP | OIS | DIJ | DIJ | DIJ | DIJ // x > y | | DIJ | DIJ | DIJ | DIJ | DIJ | DIJ // -------|------------------|--------|--------|--------|--------|-------- // x < y | | DIJ | DIJ | OIS | OIS | DIJ | DIJ // x == y | a < x | DIJ | DIJ | OIS | OIS | DIJ | DIJ // x > y | | SIO | DIJ | SIO | SIO | DIJ | DIJ // -------|------------------|--------|--------|--------|--------|-------- // x < y | | DIJ | DIJ | OIS | OIS | DIJ | DIJ // x == y | a <= x | SIO | DIJ | SIO | OIS | DIJ | DIJ // x > y | | SIO | DIJ | SIO | SIO | DIJ | DIJ // -------|------------------|--------|--------|--------|--------|-------- // x < y | | SIO | DIJ | DIJ | DIJ | SIO | SIO // x == y | a >= x | SIO | DIJ | DIJ | DIJ | OIS | SIO // x > y | | DIJ | DIJ | DIJ | DIJ | OIS | OIS // -------|------------------|--------|--------|--------|--------|-------- // x < y | | SIO | DIJ | DIJ | DIJ | SIO | SIO // x == y | a > x | DIJ | DIJ | DIJ | DIJ | OIS | OIS // x > y | | DIJ | DIJ | DIJ | DIJ | OIS | OIS //------------------------------------------------------------------------ // the 7th column is here as fallback if the operation is not in the table // above. // IMP -> IMPOSSIBLE -> empty result -> the complete AND set of conditions can // be dropped. // CEQ -> CONVERT_EQUAL -> both conditions can be combined to a equals x. // DIJ -> DISJOINT -> neither condition is a consequence of the other -> both // have to stay in place. // SIO -> SELF_CONTAINED_IN_OTHER -> the left condition is a consequence of the // right condition // OIS -> OTHER_CONTAINED_IN_SELF -> the right condition is a consequence of the // left condition // If a condition (A) is a consequence of another (B), the solution set of A is // larger than that of B // -> A can be dropped. ConditionPartCompareResult const ResultsTableMultiValued[3][7][7] = { {// X < Y {DISJOINT, DISJOINT, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT, DISJOINT}, {DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}, {DISJOINT, DISJOINT, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT, DISJOINT}, {DISJOINT, DISJOINT, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT, SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT, SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT}, {DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}}, {// X == Y {OTHER_CONTAINED_IN_SELF, IMPOSSIBLE, DISJOINT, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT}, {IMPOSSIBLE, OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}, {DISJOINT, DISJOINT, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER, OTHER_CONTAINED_IN_SELF, DISJOINT, DISJOINT, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT, OTHER_CONTAINED_IN_SELF, SELF_CONTAINED_IN_OTHER, DISJOINT}, {DISJOINT, DISJOINT, DISJOINT, DISJOINT, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT}, {DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}}, {// X > Y {DISJOINT, DISJOINT, DISJOINT, DISJOINT, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT}, {DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT}, {SELF_CONTAINED_IN_OTHER, DISJOINT, SELF_CONTAINED_IN_OTHER, SELF_CONTAINED_IN_OTHER, DISJOINT, DISJOINT, DISJOINT}, {DISJOINT, DISJOINT, DISJOINT, DISJOINT, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT}, {DISJOINT, DISJOINT, DISJOINT, DISJOINT, OTHER_CONTAINED_IN_SELF, OTHER_CONTAINED_IN_SELF, DISJOINT}, {DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT, DISJOINT}}}; } // namespace ConditionPart::ConditionPart(Variable const* variable, std::string const& attributeName, AstNode const* operatorNode, AttributeSideType side, void* data) : variable(variable), attributeName(attributeName), operatorType(operatorNode->type), operatorNode(operatorNode), valueNode(nullptr), data(data), isExpanded(false) { if (side == ATTRIBUTE_LEFT) { valueNode = operatorNode->getMember(1); } else { valueNode = operatorNode->getMember(0); if (Ast::IsReversibleOperator(operatorType)) { operatorType = Ast::ReverseOperator(operatorType); } } isExpanded = (attributeName.find("[*]") != std::string::npos); } ConditionPart::ConditionPart(Variable const* variable, std::vector const& attributeNames, AstNode const* operatorNode, AttributeSideType side, void* data) : ConditionPart(variable, "", operatorNode, side, data) { TRI_AttributeNamesToString(attributeNames, attributeName, false); isExpanded = (attributeName.find("[*]") != std::string::npos); } ConditionPart::~ConditionPart() {} /// @brief true if the condition is completely covered by the other condition bool ConditionPart::isCoveredBy(ConditionPart const& other, bool isReversed) const { if (variable != other.variable || attributeName != other.attributeName) { return false; } if (!isExpanded && !other.isExpanded && other.operatorType == NODE_TYPE_OPERATOR_BINARY_IN && other.valueNode->isConstant() && isReversed) { if (CompareAstNodes(other.valueNode, valueNode, false) == 0) { return true; } } TRI_ASSERT(valueNode != nullptr); TRI_ASSERT(other.valueNode != nullptr); if (!valueNode->isConstant() || !other.valueNode->isConstant()) { return false; } // special cases for IN... if (!isExpanded && !other.isExpanded && other.operatorType == NODE_TYPE_OPERATOR_BINARY_IN && other.valueNode->isConstant() && other.valueNode->isArray()) { if (operatorType == NODE_TYPE_OPERATOR_BINARY_IN && valueNode->isConstant() && valueNode->isArray()) { // compare IN with an IN // this has quadratic complexity size_t const n1 = valueNode->numMembers(); size_t const n2 = other.valueNode->numMembers(); // maximum number of comparisons that we will accept // otherwise the optimization will be aborted static size_t const MaxComparisons = 2048; if (n1 * n2 < MaxComparisons) { for (size_t i = 0; i < n1; ++i) { auto v = valueNode->getMemberUnchecked(i); for (size_t j = 0; j < n2; ++j) { auto w = other.valueNode->getMemberUnchecked(j); ConditionPartCompareResult res = ResultsTable[CompareAstNodes(v, w, true) + 1][0][0]; if (res != CompareResult::OTHER_CONTAINED_IN_SELF && res != CompareResult::CONVERT_EQUAL && res != CompareResult::IMPOSSIBLE) { return false; } } } } else { std::unordered_set values( 512, AstNodeValueHash(), AstNodeValueEqual()); for (size_t i = 0; i < n2; ++i) { values.emplace(other.valueNode->getMemberUnchecked(i)); } for (size_t i = 0; i < n1; ++i) { auto node = valueNode->getMemberUnchecked(i); if (values.find(node) == values.end()) { return false; } } } return true; } return false; } if (isExpanded && other.isExpanded && operatorType == NODE_TYPE_OPERATOR_BINARY_IN && other.operatorType == NODE_TYPE_OPERATOR_BINARY_IN && other.valueNode->isConstant()) { return CompareAstNodes(other.valueNode, valueNode, false) == 0; } bool a = operatorNode->isArrayComparisonOperator(); bool b = other.operatorNode->isArrayComparisonOperator(); if (a || b) { if (a != b) { return false; } TRI_ASSERT(operatorNode->numMembers() == 3 && other.operatorNode->numMembers() == 3); AstNode* q1 = operatorNode->getMemberUnchecked(2); TRI_ASSERT(q1->type == NODE_TYPE_QUANTIFIER); AstNode* q2 = other.operatorNode->getMemberUnchecked(2); TRI_ASSERT(q2->type == NODE_TYPE_QUANTIFIER); // do only cover ALL and NONE when both sides have same quantifier if (q1->getIntValue() != q2->getIntValue() || q1->getIntValue() == Quantifier::ANY) { return false; } if (isExpanded && other.isExpanded && operatorType == NODE_TYPE_OPERATOR_BINARY_ARRAY_IN && other.operatorType == NODE_TYPE_OPERATOR_BINARY_ARRAY_IN && other.valueNode->isConstant()) { return CompareAstNodes(other.valueNode, valueNode, false) == 0; } } // Results are -1, 0, 1, move to 0, 1, 2 for the lookup: ConditionPartCompareResult res = ResultsTable[CompareAstNodes(other.valueNode, valueNode, true) + 1] [other.whichCompareOperation()][whichCompareOperation()]; if (res == CompareResult::OTHER_CONTAINED_IN_SELF || res == CompareResult::CONVERT_EQUAL || res == CompareResult::IMPOSSIBLE) { return true; } return false; } /// @brief clears the attribute access data static inline void clearAttributeAccess( std::pair>& parts) { parts.first = nullptr; parts.second.clear(); } /// @brief create the condition Condition::Condition(Ast* ast) : _ast(ast), _root(nullptr), _isNormalized(false), _isSorted(false) {} /*namespace { size_t countNodes(AstNode* node) { if (node == nullptr) { return 0; } size_t n = node->numMembers(); size_t sum = 1; for (size_t i = 0; i < n; i++) { sum += countNodes(node->getMember(i)); } return sum; } }*/ /// @brief destroy the condition Condition::~Condition() { // memory for nodes is not owned and thus not freed by the condition // all nodes belong to the AST // LOG_TOPIC("12fb9", ERR, Logger::FIXME) << "nodes in tree: " << ::countNodes(_root); } /// @brief export the condition as VelocyPack void Condition::toVelocyPack(arangodb::velocypack::Builder& builder, bool verbose) const { if (_root == nullptr) { VPackObjectBuilder guard(&builder); } else { _root->toVelocyPack(builder, verbose); } } /// @brief create a condition from VPack Condition* Condition::fromVPack(ExecutionPlan* plan, arangodb::velocypack::Slice const& slice) { auto condition = std::make_unique(plan->getAst()); if (slice.isObject() && slice.length() != 0) { // note: the AST is responsible for freeing the AstNode later! AstNode* node = new AstNode(plan->getAst(), slice); condition->andCombine(node); } condition->_isNormalized = true; condition->_isSorted = false; return condition.release(); } /// @brief clone the condition Condition* Condition::clone() const { auto copy = std::make_unique(_ast); if (_root != nullptr) { copy->_root = _root->clone(_ast); } copy->_isNormalized = _isNormalized; return copy.release(); } /// @brief add a sub-condition to the condition /// the sub-condition will be AND-combined with the existing condition(s) void Condition::andCombine(AstNode const* node) { if (_isNormalized) { // already normalized THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "cannot and-combine normalized condition"); } if (_root == nullptr) { // condition was empty before _root = _ast->clone(node); } else { // condition was not empty before, now AND-merge _root = _ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND, _root, _ast->clone(node)); } TRI_ASSERT(_root != nullptr); } /// @brief locate indexes for each condition /// return value is a pair indicating whether the index can be used for /// filtering(first) and sorting(second) std::pair Condition::findIndexes(EnumerateCollectionNode const* node, std::vector& usedIndexes, SortCondition const* sortCondition) { TRI_ASSERT(usedIndexes.empty()); Variable const* reference = node->outVariable(); std::string collectionName = node->collection()->name(); transaction::Methods* trx = _ast->query()->trx(); size_t itemsInIndex; if (!collectionName.empty() && collectionName[0] == '_' && collectionName.substr(0, 11) == "_statistics") { // use hard-coded number of items in index, because we are dealing with // the statistics collection here. this saves a roundtrip to the DB servers // for statistics queries that do not need a fully accurate collection count itemsInIndex = 1024; } else { // estimate for the number of documents in the index. may be outdated... itemsInIndex = node->collection()->count(trx); } if (_root == nullptr) { size_t dummy; return std::make_pair( false, trx->getIndexForSortCondition(collectionName, sortCondition, reference, itemsInIndex, node->hint(), usedIndexes, dummy)); } return trx->getBestIndexHandlesForFilterCondition(collectionName, _ast, _root, reference, sortCondition, itemsInIndex, node->hint(), usedIndexes, _isSorted); } /// @brief get the attributes for a sub-condition that are const /// (i.e. compared with equality) std::vector> Condition::getConstAttributes( Variable const* reference, bool includeNull) const { std::vector> result; if (_root == nullptr) { return result; } TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR); size_t n = _root->numMembers(); if (n != 1) { // multiple ORs return result; } std::pair> parts; AstNode const* node = _root->getMember(0); n = node->numMembers(); for (size_t i = 0; i < n; ++i) { auto member = node->getMember(i); if (member->type == NODE_TYPE_OPERATOR_BINARY_EQ) { clearAttributeAccess(parts); auto lhs = member->getMember(0); auto rhs = member->getMember(1); if (lhs->isAttributeAccessForVariable(parts) && parts.first == reference) { if (includeNull || ((rhs->isConstant() || rhs->type == NODE_TYPE_REFERENCE) && !rhs->isNullValue())) { result.emplace_back(std::move(parts.second)); } } else if (rhs->isAttributeAccessForVariable(parts) && parts.first == reference) { if (includeNull || ((lhs->isConstant() || lhs->type == NODE_TYPE_REFERENCE) && !lhs->isNullValue())) { result.emplace_back(std::move(parts.second)); } } } } return result; } /// @brief get the attributes for a sub-condition that are not-null arangodb::HashSet> Condition::getNonNullAttributes( Variable const* reference) const { arangodb::HashSet> result; if (_root == nullptr) { return result; } TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR); size_t n = _root->numMembers(); if (n != 1) { // multiple ORs return result; } std::pair> parts; AstNode const* node = _root->getMember(0); n = node->numMembers(); for (size_t i = 0; i < n; ++i) { auto member = node->getMember(i); if (member->type == NODE_TYPE_OPERATOR_BINARY_NE || member->type == NODE_TYPE_OPERATOR_BINARY_GT || member->type == NODE_TYPE_OPERATOR_BINARY_LT) { clearAttributeAccess(parts); AstNode const* lhs = member->getMember(0); AstNode const* rhs = member->getMember(1); AstNode const* check = nullptr; if (lhs->isConstant() && lhs->isNullValue() && rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS && member->type != NODE_TYPE_OPERATOR_BINARY_GT) { // null != doc.value // null < doc.value check = rhs; } else if (rhs->isConstant() && rhs->isNullValue() && lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS && node->type != NODE_TYPE_OPERATOR_BINARY_LT) { // doc.value != null // doc.value > null check = lhs; } if (check != nullptr && check->isAttributeAccessForVariable(parts, false) && parts.first == reference) { result.emplace(std::move(parts.second)); } } } return result; } /// @brief normalize the condition /// this will convert the condition into its disjunctive normal form void Condition::normalize(ExecutionPlan* plan, bool multivalued /*= false*/) { if (_isNormalized) { // already normalized return; } _root = transformNodePreorder(_root); _root = transformNodePostorder(_root); _root = fixRoot(_root, 0); optimize(plan, multivalued); #ifdef ARANGODB_ENABLE_MAINTAINER_MODE if (_root != nullptr) { // _root->dump(0); validateAst(_root, 0); } #endif } /// @brief normalize the condition /// this will convert the condition into its disjunctive normal form /// in this case we don't re-run the optimizer. Its expected that you /// don't want to remove eventually unneccessary filters. void Condition::normalize() { if (_isNormalized) { // already normalized return; } _root = transformNodePreorder(_root); _root = transformNodePostorder(_root); _root = fixRoot(_root, 0); #ifdef ARANGODB_ENABLE_MAINTAINER_MODE if (_root != nullptr) { validateAst(_root, 0); } #endif } void Condition::collectOverlappingMembers(ExecutionPlan const* plan, Variable const* variable, AstNode const* andNode, AstNode const* otherAndNode, arangodb::HashSet& toRemove, Index const* index, /* may be nullptr */ bool isFromTraverser) { bool const isSparse = (index != nullptr && index->sparse()); std::pair> result; size_t const n = andNode->numMembers(); for (size_t i = 0; i < n; ++i) { auto operand = andNode->getMemberUnchecked(i); bool allowOps = operand->isComparisonOperator(); if (isSparse && allowOps && !isFromTraverser && (operand->type == NODE_TYPE_OPERATOR_BINARY_NE || operand->type == NODE_TYPE_OPERATOR_BINARY_GT)) { // look for != null and > null // these can be removed if we are working with a sparse index! auto lhs = operand->getMember(0); auto rhs = operand->getMember(1); clearAttributeAccess(result); // only remove the condition if the index is exactly on the same attribute // as the condition if (rhs->isNullValue() && lhs->isAttributeAccessForVariable(result, isFromTraverser) && result.first == variable && index->fields().size() == 1 && arangodb::basics::AttributeName::isIdentical(result.second, index->fields()[0], false)) { toRemove.emplace(i); // removed, no need to go on below... continue; } } if (isFromTraverser) { allowOps = allowOps || operand->isArrayComparisonOperator(); } else { allowOps = allowOps && operand->type != NODE_TYPE_OPERATOR_BINARY_NE && operand->type != NODE_TYPE_OPERATOR_BINARY_NIN; } if (allowOps) { auto lhs = operand->getMember(0); auto rhs = operand->getMember(1); if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS || (isFromTraverser && lhs->type == NODE_TYPE_EXPANSION)) { clearAttributeAccess(result); if (lhs->isAttributeAccessForVariable(result, isFromTraverser) && result.first == variable) { ConditionPart current(variable, result.second, operand, ATTRIBUTE_LEFT, nullptr); if (canRemove(plan, current, otherAndNode, isFromTraverser)) { toRemove.emplace(i); } } } if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS || rhs->type == NODE_TYPE_EXPANSION) { clearAttributeAccess(result); if (rhs->isAttributeAccessForVariable(result, isFromTraverser) && result.first == variable) { ConditionPart current(variable, result.second, operand, ATTRIBUTE_RIGHT, nullptr); if (canRemove(plan, current, otherAndNode, isFromTraverser)) { toRemove.emplace(i); } } } } } } /// @brief removes condition parts from another AstNode* Condition::removeIndexCondition(ExecutionPlan const* plan, Variable const* variable, AstNode const* condition, Index const* index) { TRI_ASSERT(index != nullptr); if (_root == nullptr || condition == nullptr) { return _root; } TRI_ASSERT(_root != nullptr); TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR); TRI_ASSERT(condition != nullptr); TRI_ASSERT(condition->type == NODE_TYPE_OPERATOR_NARY_OR); if (condition->numMembers() != 1 && _root->numMembers() != 1) { return _root; } auto andNode = _root->getMemberUnchecked(0); TRI_ASSERT(andNode->type == NODE_TYPE_OPERATOR_NARY_AND); size_t const n = andNode->numMembers(); auto conditionAndNode = condition->getMemberUnchecked(0); TRI_ASSERT(conditionAndNode->type == NODE_TYPE_OPERATOR_NARY_AND); arangodb::HashSet toRemove; collectOverlappingMembers(plan, variable, andNode, conditionAndNode, toRemove, index, false); if (toRemove.empty()) { return _root; } // build a new AST condition AstNode* newNode = nullptr; for (size_t i = 0; i < n; ++i) { if (toRemove.find(i) == toRemove.end()) { auto what = andNode->getMemberUnchecked(i); if (newNode == nullptr) { // the only node so far newNode = what; } else { // AND-combine with existing node newNode = _ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND, newNode, what); } } } return newNode; } /// @brief remove filter conditions already covered by the traversal AstNode* Condition::removeTraversalCondition(ExecutionPlan const* plan, Variable const* variable, AstNode* other) { if (_root == nullptr || other == nullptr) { return _root; } TRI_ASSERT(_root != nullptr); TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR); TRI_ASSERT(other != nullptr); TRI_ASSERT(other->type == NODE_TYPE_OPERATOR_NARY_OR); if (other->numMembers() != 1 && _root->numMembers() != 1) { return _root; } auto andNode = _root->getMemberUnchecked(0); TRI_ASSERT(andNode->type == NODE_TYPE_OPERATOR_NARY_AND); auto otherAndNode = other->getMemberUnchecked(0); TRI_ASSERT(otherAndNode->type == NODE_TYPE_OPERATOR_NARY_AND); size_t const n = andNode->numMembers(); arangodb::HashSet toRemove; collectOverlappingMembers(plan, variable, andNode, otherAndNode, toRemove, nullptr, true); if (toRemove.empty()) { return _root; } // build a new AST condition AstNode* newNode = nullptr; for (size_t i = 0; i < n; ++i) { if (toRemove.find(i) == toRemove.end()) { auto what = andNode->getMemberUnchecked(i); if (newNode == nullptr) { // the only node so far newNode = what; } else { // AND-combine with existing node newNode = _ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_AND, newNode, what); } } } return newNode; } /// @brief remove (now) invalid variables from the condition bool Condition::removeInvalidVariables(arangodb::HashSet const& validVars) { if (_root == nullptr) { return false; } TRI_ASSERT(_root != nullptr); TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR); auto oldRoot = _root; _root = _ast->shallowCopyForModify(oldRoot); TRI_DEFER(FINALIZE_SUBTREE(_root)); bool isEmpty = false; // handle sub nodes of top-level OR node size_t const n = _root->numMembers(); arangodb::HashSet varsUsed; for (size_t i = 0; i < n; ++i) { auto oldAndNode = _root->getMemberUnchecked(i); auto andNode = _ast->shallowCopyForModify(oldAndNode); TRI_DEFER(FINALIZE_SUBTREE(andNode)); _root->changeMember(i, andNode); TRI_ASSERT(andNode->type == NODE_TYPE_OPERATOR_NARY_AND); size_t nAnd = andNode->numMembers(); for (size_t j = 0; j < nAnd; /* no hoisting */) { // check which variables are used in each AND varsUsed.clear(); Ast::getReferencedVariables(andNode->getMemberUnchecked(j), varsUsed); bool invalid = false; for (auto& it : varsUsed) { if (validVars.find(it) == validVars.end()) { // found an invalid variable here... invalid = true; break; } } if (invalid) { andNode->removeMemberUnchecked(j); // repeat with some member index TRI_ASSERT(nAnd > 0); --nAnd; if (nAnd == 0) { isEmpty = true; } } else { ++j; } } } return isEmpty; } /// @brief optimize the condition expression tree void Condition::optimize(ExecutionPlan* plan, bool multivalued) { if (_root == nullptr) { return; } transaction::Methods* trx = plan->getAst()->query()->trx(); TRI_ASSERT(_root != nullptr); TRI_ASSERT(_root->type == NODE_TYPE_OPERATOR_NARY_OR); auto oldRoot = _root; _root = _ast->shallowCopyForModify(oldRoot); TRI_DEFER(FINALIZE_SUBTREE(_root)); std::pair> varAccess; // handle sub nodes of top-level OR node size_t n = _root->numMembers(); size_t r = 0; const auto* resultsTable = multivalued ? ResultsTableMultiValued : ResultsTable; while (r < n) { // foreach OR-Node bool retry = false; auto oldAnd = _root->getMemberUnchecked(r); TRI_ASSERT(oldAnd->type == NODE_TYPE_OPERATOR_NARY_AND); auto andNode = _ast->shallowCopyForModify(oldAnd); _root->changeMember(r, andNode); TRI_DEFER(FINALIZE_SUBTREE(andNode)); restartThisOrItem: size_t andNumMembers = andNode->numMembers(); // deduplicate and sort all IN arrays size_t inComparisons = 0; for (size_t j = 0; j < andNumMembers; ++j) { auto op = andNode->getMemberUnchecked(j); if (op->type == NODE_TYPE_OPERATOR_BINARY_IN) { ++inComparisons; auto deduplicated = deduplicateInOperation(op); andNode->changeMember(j, deduplicated); } } andNumMembers = andNode->numMembers(); if (andNumMembers <= 1) { // simple AND item with 0 or 1 members. nothing to do ++r; n = _root->numMembers(); continue; } TRI_ASSERT(andNumMembers > 1); // sort AND parts of each sub-condition so > and >= come before < and <= // we use this to some advantage when we check the conditions for a sparse // index later. // if a sparse index is asked whether it can supported a condition such as // `attr < value1`, this range would include `null`, which the sparse index // cannot provide. // however, if we first check other conditions we may find a condition on // the same attribute, e.g. `attr > value2`. // this other condition may exclude `null` so we then use the full range // `value2 < attr < value1` // and do not have to discard sub-conditions anymore andNode->sortMembers([](AstNode const* lhs, AstNode const* rhs) { // try to re-order comparison operators int l = ::operationWeight(lhs); int r = ::operationWeight(rhs); if (l != r) { return l < r; } // all equal, now check if original types are different if (lhs->type != rhs->type) { return lhs->type < rhs->type; } // still all equal return false; }); if (inComparisons > 0) { // move IN operations to the front to make comparison code below simpler std::vector stack; size_t p = andNumMembers - 1; for (size_t j = p;; --j) { auto op = andNode->getMemberUnchecked(j); if (op->type == NODE_TYPE_OPERATOR_BINARY_IN) { stack.push_back(op); } else { if (p != j) { andNode->changeMember(p, op); } --p; } if (j == 0) { break; } } p = 0; while (!stack.empty()) { auto it = stack.back(); andNode->changeMember(p++, it); stack.pop_back(); } } // optimization is only necessary if an AND node has multiple members VariableUsageType variableUsage; for (size_t j = 0; j < andNumMembers; ++j) { auto operand = andNode->getMemberUnchecked(j); if (operand->isComparisonOperator()) { AstNode const* lhs = operand->getMember(0); AstNode const* rhs = operand->getMember(1); if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS) { if (lhs->isConstant()) { lhs = Ast::resolveConstAttributeAccess(lhs); } storeAttributeAccess(varAccess, variableUsage, lhs, j, ATTRIBUTE_LEFT); } if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS || rhs->type == NODE_TYPE_EXPANSION) { if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS && rhs->isConstant()) { rhs = Ast::resolveConstAttributeAccess(rhs); } storeAttributeAccess(varAccess, variableUsage, rhs, j, ATTRIBUTE_RIGHT); } } } // now find the variables and attributes for which there are multiple // conditions for (auto const& it : variableUsage) { // foreach sub-and-node auto variable = it.first; for (auto const& it2 : it.second) { // cross compare sub-and-nodes auto const& attributeName = it2.first; auto const& positions = it2.second; if (positions.size() <= 1) { // none or only one occurence of the attribute continue; } // multiple occurrences of the same attribute size_t leftPos = positions[0].first; // copy & modify leftNode auto oldLeft = andNode->getMemberUnchecked(leftPos); auto leftNode = _ast->shallowCopyForModify(oldLeft); TRI_DEFER(FINALIZE_SUBTREE(leftNode)); andNode->changeMember(leftPos, leftNode); ConditionPart current(variable, attributeName, leftNode, positions[0].second, nullptr); if (!current.valueNode->isConstant()) { continue; } size_t j = 1; while (j < positions.size()) { TRI_ASSERT(j != 0); auto rightPos = positions[j].first; auto rightNode = andNode->getMemberUnchecked(rightPos); ConditionPart other(variable, attributeName, rightNode, positions[j].second, nullptr); if (!other.valueNode->isConstant()) { ++j; continue; } // IN-merging if (leftNode->type == NODE_TYPE_OPERATOR_BINARY_IN && leftNode->getMemberUnchecked(1)->isConstant() && !multivalued) { TRI_ASSERT(leftNode->numMembers() == 2); if (rightNode->type == NODE_TYPE_OPERATOR_BINARY_IN && rightNode->getMemberUnchecked(1)->isConstant()) { // merge IN with IN on same attribute TRI_ASSERT(rightNode->numMembers() == 2); auto merged = _ast->createNodeBinaryOperator(NODE_TYPE_OPERATOR_BINARY_IN, leftNode->getMemberUnchecked(0), mergeInOperations(trx, leftNode, rightNode)); andNode->removeMemberUnchecked(rightPos); andNode->changeMember(leftPos, merged); goto restartThisOrItem; } else if (rightNode->isSimpleComparisonOperator()) { // merge other comparison operator with IN TRI_ASSERT(rightNode->numMembers() == 2); auto values = leftNode->getMemberUnchecked(1); auto inNode = _ast->createNodeArray(values->numMembers()); // enumerate over IN list for (size_t k = 0; k < values->numMembers(); ++k) { auto value = values->getMemberUnchecked(k); ConditionPartCompareResult res = ResultsTable[CompareAstNodes(value, other.valueNode, true) + 1][0 /*NODE_TYPE_OPERATOR_BINARY_EQ*/] [other.whichCompareOperation()]; bool const keep = (res == CompareResult::OTHER_CONTAINED_IN_SELF || res == CompareResult::CONVERT_EQUAL); if (keep) { inNode->addMember(value); } } if (inNode->numMembers() == 0) { // no values left after merging -> IMPOSSIBLE _root->removeMemberUnchecked(r); retry = true; goto fastForwardToNextOrItem; } // use the new array of values leftNode->changeMember(1, inNode); // remove the other operator andNode->removeMemberUnchecked(rightPos); goto restartThisOrItem; } } // end of IN-merging // Results are -1, 0, 1, move to 0, 1, 2 for the lookup: ConditionPartCompareResult res = resultsTable [CompareAstNodes(current.valueNode, other.valueNode, true) + 1] [current.whichCompareOperation()][other.whichCompareOperation()]; switch (res) { case CompareResult::IMPOSSIBLE: { // impossible condition // j = positions.size(); // we remove this one, so fast forward the loops to their end: _root->removeMemberUnchecked(r); retry = true; goto fastForwardToNextOrItem; } case CompareResult::SELF_CONTAINED_IN_OTHER: { TRI_ASSERT(!positions.empty()); andNode->removeMemberUnchecked(positions.at(0).first); goto restartThisOrItem; } case CompareResult::OTHER_CONTAINED_IN_SELF: { TRI_ASSERT(j < positions.size()); andNode->removeMemberUnchecked(positions.at(j).first); goto restartThisOrItem; } case CompareResult::CONVERT_EQUAL: { // both ok, now transform to a // == x (== y) TRI_ASSERT(!positions.empty()); TRI_ASSERT(j < positions.size()); andNode->removeMemberUnchecked(positions.at(j).first); auto origNode = andNode->getMemberUnchecked(positions.at(0).first); auto newNode = plan->getAst()->createNode(NODE_TYPE_OPERATOR_BINARY_EQ); for (size_t iMemb = 0; iMemb < origNode->numMembers(); iMemb++) { newNode->addMember(origNode->getMemberUnchecked(iMemb)); } TRI_DEFER(FINALIZE_SUBTREE(newNode)); andNode->changeMember(positions.at(0).first, newNode); goto restartThisOrItem; } case CompareResult::DISJOINT: { break; } case CompareResult::UNKNOWN: { break; } } ++j; } } // cross compare sub-and-nodes } // foreach sub-and-node fastForwardToNextOrItem: if (!retry) { // root nodes hasn't changed. go to next sub-node! ++r; } // number of root sub-nodes has probably changed. // now recalculate the number and don't modify r! n = _root->numMembers(); } } /// @brief registers an attribute access for a particular (collection) variable void Condition::storeAttributeAccess( std::pair>& varAccess, VariableUsageType& variableUsage, AstNode const* node, size_t position, AttributeSideType side) { if (!node->isAttributeAccessForVariable(varAccess)) { return; } auto variable = varAccess.first; if (variable != nullptr) { std::string attributeName; TRI_AttributeNamesToString(varAccess.second, attributeName, false); auto& dst = variableUsage[variable][attributeName]; if (!dst.empty() && dst.back().first == position) { // already have this attribute for this variable. can happen in case a // condition refers to itself (e.g. a.x == a.x) // in this case, we won't optimize it dst.erase(dst.begin() + dst.size() - 1); } else { dst.emplace_back(position, side); } } } /// @brief validate the condition's AST #ifdef ARANGODB_ENABLE_MAINTAINER_MODE void Condition::validateAst(AstNode const* node, int level) { if (level == 0) { TRI_ASSERT(node->type == NODE_TYPE_OPERATOR_NARY_OR); } size_t const n = node->numMembers(); for (size_t i = 0; i < n; ++i) { auto sub = node->getMemberUnchecked(i); if (level == 0) { TRI_ASSERT(sub->type == NODE_TYPE_OPERATOR_NARY_AND); } else { TRI_ASSERT(sub->type != NODE_TYPE_OPERATOR_NARY_OR && sub->type != NODE_TYPE_OPERATOR_NARY_AND); } validateAst(sub, level + 1); } } #endif /// @brief checks if the current condition is covered by the other bool Condition::canRemove(ExecutionPlan const* plan, ConditionPart const& me, arangodb::aql::AstNode const* andNode, bool isFromTraverser) { TRI_ASSERT(andNode != nullptr); TRI_ASSERT(andNode->type == NODE_TYPE_OPERATOR_NARY_AND); std::pair> result; size_t const n = andNode->numMembers(); auto normalize = [&plan](AstNode const* node) -> std::string { if (node->type == NODE_TYPE_REFERENCE) { auto setter = plan->getVarSetBy(static_cast(node->getData())->id); if (setter != nullptr && setter->getType() == ExecutionNode::CALCULATION) { auto cn = ExecutionNode::castTo(setter); // use expression node instead node = cn->expression()->node(); } } // return string representation return node->toString(); }; std::string temp; try { for (size_t i = 0; i < n; ++i) { auto operand = andNode->getMemberUnchecked(i); if (operand->isComparisonOperator() || (isFromTraverser && operand->isArrayComparisonOperator())) { auto lhs = operand->getMember(0); auto rhs = operand->getMember(1); if (lhs->type == NODE_TYPE_ATTRIBUTE_ACCESS || (isFromTraverser && lhs->type == NODE_TYPE_EXPANSION)) { clearAttributeAccess(result); if (lhs->isAttributeAccessForVariable(result, isFromTraverser)) { temp.clear(); TRI_AttributeNamesToString(result.second, temp); if (temp == me.attributeName) { if (rhs->isConstant()) { ConditionPart indexCondition(result.first, result.second, operand, ATTRIBUTE_LEFT, nullptr); if (me.isCoveredBy(indexCondition, false)) { return true; } } // non-constant condition else if (me.operatorType == operand->type && normalize(me.valueNode) == normalize(rhs)) { return true; } } } } if (rhs->type == NODE_TYPE_ATTRIBUTE_ACCESS || rhs->type == NODE_TYPE_EXPANSION) { clearAttributeAccess(result); if (rhs->isAttributeAccessForVariable(result, isFromTraverser)) { temp.clear(); TRI_AttributeNamesToString(result.second, temp); if (temp == me.attributeName) { if (lhs->isConstant()) { ConditionPart indexCondition(result.first, result.second, operand, ATTRIBUTE_RIGHT, nullptr); if (me.isCoveredBy(indexCondition, true)) { return true; } } // non-constant condition else if (me.operatorType == operand->type && normalize(me.valueNode) == normalize(lhs)) { return true; } } } } } } } catch (...) { // simply ignore any errors and return false } return false; } /// @brief deduplicate IN condition values (and sort them) /// this may modify the node in place AstNode* Condition::deduplicateInOperation(AstNode* operation) { TRI_ASSERT(operation->numMembers() == 2); auto rhs = operation->getMemberUnchecked(1); if (!rhs->isArray() || !rhs->isConstant()) { return operation; } auto deduplicated = _ast->deduplicateArray(rhs); if (deduplicated != rhs) { // there were duplicates auto newOperation = _ast->shallowCopyForModify(operation); TRI_DEFER(FINALIZE_SUBTREE(newOperation)); newOperation->changeMember(1, const_cast(deduplicated)); return newOperation; } return operation; } /// @brief merge the values from two IN operations AstNode* Condition::mergeInOperations(transaction::Methods* trx, AstNode const* lhs, AstNode const* rhs) { TRI_ASSERT(lhs->type == NODE_TYPE_OPERATOR_BINARY_IN); TRI_ASSERT(rhs->type == NODE_TYPE_OPERATOR_BINARY_IN); auto lValue = lhs->getMemberUnchecked(1); auto rValue = rhs->getMemberUnchecked(1); TRI_ASSERT(lValue->isArray() && lValue->isConstant()); TRI_ASSERT(rValue->isArray() && rValue->isConstant()); return _ast->createNodeIntersectedArray(lValue, rValue); } /// @brief merges the current node with the sub nodes of same type AstNode* Condition::collapse(AstNode const* node) { TRI_ASSERT(node->type == NODE_TYPE_OPERATOR_NARY_OR || node->type == NODE_TYPE_OPERATOR_NARY_AND); auto newOperator = _ast->createNode(node->type); size_t const n = node->numMembers(); for (size_t i = 0; i < n; ++i) { auto sub = node->getMemberUnchecked(i); bool const isSame = (node->type == sub->type) || (node->type == NODE_TYPE_OPERATOR_NARY_OR && sub->type == NODE_TYPE_OPERATOR_BINARY_OR) || (node->type == NODE_TYPE_OPERATOR_NARY_AND && sub->type == NODE_TYPE_OPERATOR_BINARY_AND); if (isSame) { // merge children one level up for (size_t j = 0; j < sub->numMembers(); ++j) { newOperator->addMember(sub->getMemberUnchecked(j)); } } else { newOperator->addMember(sub); } } return newOperator; } // this may modify the node in place AstNode* switchSidesInCompare(Ast* ast, AstNode* node) { // switch members of BINARY_LT/GT/LE/GE_NODES // and change operator accordingly auto first = node->getMemberUnchecked(0); auto second = node->getMemberUnchecked(1); auto newOperator = ast->shallowCopyForModify(node); TRI_DEFER(FINALIZE_SUBTREE(newOperator)); newOperator->changeMember(0, second); newOperator->changeMember(1, first); switch (node->type) { case NODE_TYPE_OPERATOR_BINARY_LT: newOperator->type = NODE_TYPE_OPERATOR_BINARY_GT; break; case NODE_TYPE_OPERATOR_BINARY_GT: newOperator->type = NODE_TYPE_OPERATOR_BINARY_LT; break; case NODE_TYPE_OPERATOR_BINARY_LE: newOperator->type = NODE_TYPE_OPERATOR_BINARY_GE; break; case NODE_TYPE_OPERATOR_BINARY_GE: newOperator->type = NODE_TYPE_OPERATOR_BINARY_LE; break; default: LOG_TOPIC("14324", ERR, Logger::QUERIES) << "normalize condition tries to swap children" << "of wrong node type - this needs to be fixed"; TRI_ASSERT(false); } return newOperator; } AstNode* normalizeCompare(Ast* ast, AstNode* node) { // Moves attribute access to the LHS of a comparison. // If there are 2 attribute accesses it does a // string compare of the access path and makes sure // the one that compares less ends up on the LHS if (node->type != NODE_TYPE_OPERATOR_BINARY_LE && node->type != NODE_TYPE_OPERATOR_BINARY_LT && node->type != NODE_TYPE_OPERATOR_BINARY_GE && node->type != NODE_TYPE_OPERATOR_BINARY_GT) { // no binary compare in node return node; } auto first = node->getMemberUnchecked(0); auto second = node->getMemberUnchecked(1); if (second->type == NODE_TYPE_ATTRIBUTE_ACCESS) { if (first->type != NODE_TYPE_ATTRIBUTE_ACCESS) { return switchSidesInCompare(ast, node); } // both are of type attribute access if (first->toString() > second->toString()) { return switchSidesInCompare(ast, node); } } return node; } /// @brief converts binary to n-ary, comparision normal and negation normal form AstNode* Condition::transformNodePreorder(AstNode* node) { if (node == nullptr) { return nullptr; } if (node->type == NODE_TYPE_OPERATOR_BINARY_AND || node->type == NODE_TYPE_OPERATOR_BINARY_OR) { // convert binary AND/OR into n-ary AND/OR TRI_ASSERT(node->numMembers() == 2); auto old = node; // create a new n-ary node node = _ast->createNode(Ast::NaryOperatorType(old->type)); node->reserve(2); node->addMember(transformNodePreorder(old->getMember(0))); node->addMember(transformNodePreorder(old->getMember(1))); return node; } if (node->type == NODE_TYPE_OPERATOR_UNARY_NOT) { // push down logical negations auto sub = node->getMemberUnchecked(0); if (sub->type == NODE_TYPE_OPERATOR_NARY_AND || sub->type == NODE_TYPE_OPERATOR_BINARY_AND || sub->type == NODE_TYPE_OPERATOR_NARY_OR || sub->type == NODE_TYPE_OPERATOR_BINARY_OR) { size_t const n = sub->numMembers(); AstNode* newOperator = nullptr; if (sub->type == NODE_TYPE_OPERATOR_NARY_AND || sub->type == NODE_TYPE_OPERATOR_BINARY_AND) { // ! (a && b) => (! a) || (! b) newOperator = _ast->createNode(NODE_TYPE_OPERATOR_NARY_OR); } else { // ! (a || b) => (! a) && (! b) newOperator = _ast->createNode(NODE_TYPE_OPERATOR_NARY_AND); } for (size_t i = 0; i < n; ++i) { auto negated = transformNodePreorder( _ast->createNodeUnaryOperator(NODE_TYPE_OPERATOR_UNARY_NOT, sub->getMemberUnchecked(i))); auto optimized = _ast->optimizeNotExpression(negated); newOperator->addMember(optimized); } return newOperator; } if (sub->type == NODE_TYPE_OPERATOR_UNARY_NOT) { // eliminate double-negatives return transformNodePreorder(sub->getMemberUnchecked(0)); } auto replacement = _ast->shallowCopyForModify(node); replacement->changeMember(0, transformNodePreorder(sub)); return replacement; } // normalize any comparisons return normalizeCompare(_ast, node); } /// @brief converts from negation normal to disjunctive normal form AstNode* Condition::transformNodePostorder(AstNode* node) { if (node == nullptr) { return node; } if (node->type == NODE_TYPE_OPERATOR_NARY_AND) { auto old = node; node = _ast->shallowCopyForModify(old); TRI_DEFER(FINALIZE_SUBTREE(node)); bool distributeOverChildren = false; bool mustCollapse = false; size_t n = node->numMembers(); for (size_t i = 0; i < n; ++i) { // process subnodes first auto sub = transformNodePostorder(node->getMemberUnchecked(i)); node->changeMember(i, sub); if (sub->type == NODE_TYPE_OPERATOR_NARY_OR) { distributeOverChildren = true; } else if (sub->type == NODE_TYPE_OPERATOR_NARY_AND) { mustCollapse = true; } } if (mustCollapse) { node = collapse(node); // collapsing may change n n = node->numMembers(); } if (distributeOverChildren) { // we found an AND with at least one OR child, e.g. // AND // OR c // a b // // we need to move the OR to the top by converting the condition to: // OR // AND AND // a c b c // auto newOperator = _ast->createNode(NODE_TYPE_OPERATOR_NARY_OR); std::vector<::PermutationState> clauses; clauses.reserve(n); for (size_t i = 0; i < n; ++i) { auto sub = node->getMemberUnchecked(i); if (sub->type == NODE_TYPE_OPERATOR_NARY_OR) { clauses.emplace_back(sub, sub->numMembers()); } else { clauses.emplace_back(sub, 1); } } size_t current = 0; bool done = false; size_t const numClauses = clauses.size(); while (!done) { auto andOperator = _ast->createNode(NODE_TYPE_OPERATOR_NARY_AND); andOperator->reserve(numClauses); for (size_t i = 0; i < numClauses; ++i) { auto const& clause = clauses[i]; auto sub = clause.getValue(); // make sure the subtree is finalized so we can avoid cloning it FINALIZE_SUBTREE(sub); if (sub->type == NODE_TYPE_OPERATOR_NARY_AND) { // collapse, add children directly for (size_t j = 0; j < sub->numMembers(); j++) { andOperator->addMember(sub->getMember(j)); } } else { andOperator->addMember(sub); } } newOperator->addMember(andOperator); // now advance the clause permutation state while (true) { auto& currentClause = clauses[current]; if (++currentClause.current < currentClause.n) { current = 0; // still have at least one more permutation with current position // in current clause break; } // done with current clause, reset it currentClause.current = 0; // move on to next clause if (++current >= n) { // no more clauses left! done = true; break; } } } node = newOperator; } return node; } if (node->type == NODE_TYPE_OPERATOR_NARY_OR) { auto old = node; node = _ast->shallowCopyForModify(old); TRI_DEFER(FINALIZE_SUBTREE(node)); size_t const n = node->numMembers(); bool mustCollapse = false; for (size_t i = 0; i < n; ++i) { auto sub = transformNodePostorder(node->getMemberUnchecked(i)); node->changeMember(i, sub); if (sub->type == NODE_TYPE_OPERATOR_NARY_OR) { mustCollapse = true; } } if (mustCollapse) { node = collapse(node); } } // we only need to handle nary and/or, the rest was handled in preorder return node; } /// @brief Creates a top-level OR node if it does not already exist, and make /// sure that all second level nodes are AND nodes. Additionally, this step will /// remove all NOP nodes. AstNode* Condition::fixRoot(AstNode* node, int level) { if (node == nullptr) { return nullptr; } AstNodeType type; if (level == 0) { type = NODE_TYPE_OPERATOR_NARY_OR; } else { type = NODE_TYPE_OPERATOR_NARY_AND; } // check if first-level node is an OR node if (node->type != type) { // create new root node node = _ast->createNodeNaryOperator(type, node); } size_t const n = node->numMembers(); size_t j = 0; auto old = node; node = _ast->shallowCopyForModify(old); TRI_DEFER(FINALIZE_SUBTREE(node)); for (size_t i = 0; i < n; ++i) { auto sub = node->getMemberUnchecked(i); if (sub->type == NODE_TYPE_NOP) { // ignore this node continue; } if (level == 0) { // recurse into next level node->changeMember(j, fixRoot(sub, 1)); } else if (i != j) { node->changeMember(j, sub); } ++j; } if (j != n) { // adjust number of members (because of the NOP nodes removes) node->reduceMembers(j); } return node; }