//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Michael Hackstein //////////////////////////////////////////////////////////////////////////////// #include "Aql/Ast.h" #include "Aql/ExecutionPlan.h" #include "Aql/TraversalConditionFinder.h" #include "Aql/TraversalNode.h" using namespace arangodb::aql; using EN = arangodb::aql::ExecutionNode; static bool checkPathVariableAccessFeasible(CalculationNode const* cn, TraversalNode* tn, Variable const* var, bool& conditionIsImpossible) { auto node = cn->expression()->node(); if (node->containsNodeType(NODE_TYPE_OPERATOR_BINARY_OR)) { return false; } std::vector currentPath; std::vector> paths; node->findVariableAccess(currentPath, paths, var); for (auto const& onePath : paths) { size_t len = onePath.size(); bool isEdgeAccess = false; for (auto const & node : onePath) { if (node->type == NODE_TYPE_FCALL) { // // we currently don't know how to execute functions in the // traversal (-> TraverserExpression::recursiveCheck return false; } if (node->type == NODE_TYPE_OPERATOR_BINARY_IN || node->type == NODE_TYPE_OPERATOR_BINARY_NIN) { if (!node->getMember(0)->isAttributeAccessForVariable(var, true)) { return false; } } } if (onePath[len - 2]->type == NODE_TYPE_ATTRIBUTE_ACCESS) { isEdgeAccess = onePath[len - 2]->stringEquals("edges", false); if (!isEdgeAccess && !onePath[len - 2]->stringEquals("vertices", false)) { /* We can't catch all cases in which this error would occur, so we don't throw here. std::string message("TRAVERSAL: path only knows 'edges' and 'vertices', not "); message += onePath[len - 2]->getString(); THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_PARSE, message); */ return false; } } // we now need to check for p.edges[n] whether n is >= 0 if (onePath[len - 3]->type == NODE_TYPE_INDEXED_ACCESS) { auto indexAccessNode = onePath[len - 3]->getMember(1); if ((indexAccessNode->type != NODE_TYPE_VALUE) || (indexAccessNode->value.type != VALUE_TYPE_INT) || (indexAccessNode->value.value._int < 0)) { return false; } conditionIsImpossible = !tn->isInRange(indexAccessNode->value.value._int, isEdgeAccess); } else if ((onePath[len - 3]->type == NODE_TYPE_ITERATOR) && (onePath[len - 4]->type == NODE_TYPE_EXPANSION)) { // we now need to check for p.edges[*] which becomes a fancy structure return false; } else { return false; } } return true; } static bool extractSimplePathAccesses(AstNode const* node, TraversalNode* tn, Ast* ast) { std::vector currentPath; std::vector> paths; std::vector> clonePath; node->findVariableAccess(currentPath, paths, tn->pathOutVariable()); for (auto const& onePath : paths) { size_t len = onePath.size(); bool isEdgeAccess = false; size_t attrAccessTo = 0; TRI_ASSERT(len >= 3); if (onePath[len - 2]->type == NODE_TYPE_ATTRIBUTE_ACCESS) { isEdgeAccess = onePath[len - 2]->stringEquals("edges", false); } // we now need to check for p.edges[n] whether n is >= 0 if (onePath[len - 3]->type == NODE_TYPE_INDEXED_ACCESS) { auto indexAccessNode = onePath[len - 3]->getMember(1); attrAccessTo = indexAccessNode->value.value._int; } AstNode const* compareNode = nullptr; AstNode const* accessNodeBranch = nullptr; for (auto const& oneNode : onePath) { if (compareNode != nullptr && accessNodeBranch == nullptr) { accessNodeBranch = oneNode; } if ((oneNode->type == NODE_TYPE_OPERATOR_BINARY_EQ) || (oneNode->type == NODE_TYPE_OPERATOR_BINARY_NE) || (oneNode->type == NODE_TYPE_OPERATOR_BINARY_LT) || (oneNode->type == NODE_TYPE_OPERATOR_BINARY_LE) || (oneNode->type == NODE_TYPE_OPERATOR_BINARY_GT) || (oneNode->type == NODE_TYPE_OPERATOR_BINARY_GE) || (oneNode->type == NODE_TYPE_OPERATOR_BINARY_IN ) || (oneNode->type == NODE_TYPE_OPERATOR_BINARY_NIN)) { compareNode = oneNode; } } if (compareNode != nullptr) { AstNode const* pathAccessNode; AstNode* filterByNode; bool flipOperator = false; if (compareNode->getMember(0) == accessNodeBranch) { pathAccessNode = accessNodeBranch; filterByNode = compareNode->getMember(1); } else { flipOperator = (compareNode->type == NODE_TYPE_OPERATOR_BINARY_LT) || (compareNode->type == NODE_TYPE_OPERATOR_BINARY_LE) || (compareNode->type == NODE_TYPE_OPERATOR_BINARY_GT) || (compareNode->type == NODE_TYPE_OPERATOR_BINARY_GE); pathAccessNode = accessNodeBranch; filterByNode = compareNode->getMember(0); } // Hacki: I do not think that the nullptr check can ever fail because of // the structure of onePath if (accessNodeBranch != nullptr && accessNodeBranch->isSimple() && filterByNode->isDeterministic()) { currentPath.clear(); clonePath.clear(); filterByNode->findVariableAccess(currentPath, clonePath, tn->pathOutVariable()); if (!clonePath.empty()) { // Path variable access on the RHS? can't do that. continue; } AstNode* newNode = pathAccessNode->clone(ast); // since we just copied one path, we should only find one. currentPath.clear(); clonePath.clear(); newNode->findVariableAccess(currentPath, clonePath, tn->pathOutVariable()); if (clonePath.size() != 1) { continue; } auto len = clonePath[0].size(); if (len < 4) { continue; } AstNode* firstRefNode = (AstNode*)clonePath[0][len - 4]; TRI_ASSERT(firstRefNode->type == NODE_TYPE_ATTRIBUTE_ACCESS); // replace the path variable access by a variable access to edge/vertex // (then current to the iteration) auto varRefNode = new AstNode(NODE_TYPE_REFERENCE); try { ast->query()->addNode(varRefNode); } catch (...) { // prevent leak delete varRefNode; throw; } // We fake the variable at this point. // The reason is that we need a variable access (instead of indexed access) // for serialisation. However the content of this variable is never used. varRefNode->setData(tn->vertexOutVariable()); firstRefNode->changeMember(0, varRefNode); auto expressionOperator = compareNode->type; if (flipOperator) { if (expressionOperator == NODE_TYPE_OPERATOR_BINARY_LT) { expressionOperator = NODE_TYPE_OPERATOR_BINARY_GT; } else if (expressionOperator == NODE_TYPE_OPERATOR_BINARY_LE) { expressionOperator = NODE_TYPE_OPERATOR_BINARY_GE; } else if (expressionOperator == NODE_TYPE_OPERATOR_BINARY_GT) { expressionOperator = NODE_TYPE_OPERATOR_BINARY_LT; } else if (expressionOperator == NODE_TYPE_OPERATOR_BINARY_GE) { expressionOperator = NODE_TYPE_OPERATOR_BINARY_LE; } } tn->storeSimpleExpression(isEdgeAccess, attrAccessTo, expressionOperator, newNode, filterByNode); } } } return true; } bool TraversalConditionFinder::before(ExecutionNode* en) { if (!_variableDefinitions.empty() && en->canThrow()) { // we already found a FILTER and // something that can throw is not safe to optimize _filters.clear(); return true; } switch (en->getType()) { case EN::ENUMERATE_LIST: case EN::COLLECT: case EN::SCATTER: case EN::DISTRIBUTE: case EN::GATHER: case EN::REMOTE: case EN::SUBQUERY: case EN::INDEX: case EN::INSERT: case EN::REMOVE: case EN::REPLACE: case EN::UPDATE: case EN::UPSERT: case EN::RETURN: case EN::SORT: case EN::ENUMERATE_COLLECTION: case EN::LIMIT: // in these cases we simply ignore the intermediate nodes, note // that we have taken care of nodes that could throw exceptions // above. break; case EN::SINGLETON: case EN::NORESULTS: case EN::ILLEGAL: // in all these cases we better abort return true; case EN::FILTER: { std::vector&& invars = en->getVariablesUsedHere(); TRI_ASSERT(invars.size() == 1); // register which variable is used in a FILTER _filters.emplace(invars[0]->id, en); break; } case EN::CALCULATION: { auto outvars = en->getVariablesSetHere(); TRI_ASSERT(outvars.size() == 1); _variableDefinitions.emplace(outvars[0]->id, static_cast(en)); TRI_IF_FAILURE("ConditionFinder::variableDefinition") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } break; } case EN::TRAVERSAL: { auto node = static_cast(en); auto condition = std::make_unique(_plan->getAst()); bool foundCondition = false; auto const& varsValidInTraversal = node->getVarsValid(); std::unordered_set varsUsedByCondition; bool conditionIsImpossible = false; for (auto& it : _variableDefinitions) { auto f = _filters.find(it.first); if (f != _filters.end()) { // a variable used in a FILTER auto outVar = node->getVariablesSetHere(); if (outVar.size() != 1 || outVar[0]->id == f->first) { // now we know, this filter is used for our traversal node. auto cn = it.second; // check whether variables that are not in scope of the condition // are used: varsUsedByCondition.clear(); Ast::getReferencedVariables(cn->expression()->node(), varsUsedByCondition); bool unknownVariableFound = false; for (auto const& conditionVar : varsUsedByCondition) { bool found = false; for (auto const& traversalKnownVar : varsValidInTraversal) { if (conditionVar->id == traversalKnownVar->id) { found = true; break; } } if (!found) { unknownVariableFound = true; break; } } if (unknownVariableFound) { continue; } for (auto const& conditionVar : varsUsedByCondition) { // check whether conditionVar is one of those we emit int variableType = node->checkIsOutVariable(conditionVar->id); if (variableType >= 0) { if ((variableType == 2) && checkPathVariableAccessFeasible(cn, node, conditionVar, conditionIsImpossible)) { condition->andCombine( it.second->expression()->node()->clone(_plan->getAst())); foundCondition = true; } if (conditionIsImpossible) { break; } } } } } if (conditionIsImpossible) { break; } } if (!conditionIsImpossible) { conditionIsImpossible = !node->isRangeValid(); } // TODO: we can't execute if we condition->normalize(_plan); in // generateCodeNode if (!conditionIsImpossible) { // right now we're not clever enough to find impossible conditions... conditionIsImpossible = (foundCondition && condition->isEmpty()); } if (conditionIsImpossible) { // condition is always false for (auto const& x : node->getParents()) { auto noRes = new NoResultsNode(_plan, _plan->nextId()); _plan->registerNode(noRes); _plan->insertDependency(x, noRes); *_planAltered = true; } break; } if (foundCondition) { condition->normalize(); TRI_IF_FAILURE("ConditionFinder::normalizePlan") { THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); } extractSimplePathAccesses(condition->root(), node, _plan->getAst()); node->setCondition(condition.release()); *_planAltered = true; } break; } } return false; } bool TraversalConditionFinder::enterSubquery(ExecutionNode*, ExecutionNode*) { return false; }