1
0
Fork 0
arangodb/arangod/Aql/TraversalConditionFinder.cpp

399 lines
14 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Michael Hackstein
////////////////////////////////////////////////////////////////////////////////
#include "Aql/Ast.h"
#include "Aql/ExecutionPlan.h"
#include "Aql/TraversalConditionFinder.h"
#include "Aql/TraversalNode.h"
using namespace arangodb::aql;
using EN = arangodb::aql::ExecutionNode;
static bool checkPathVariableAccessFeasible(CalculationNode const* cn,
TraversalNode* tn,
Variable const* var,
bool& conditionIsImpossible) {
auto node = cn->expression()->node();
if (node->containsNodeType(NODE_TYPE_OPERATOR_BINARY_OR)) {
return false;
}
std::vector<AstNode const*> currentPath;
std::vector<std::vector<AstNode const*>> paths;
node->findVariableAccess(currentPath, paths, var);
for (auto const& onePath : paths) {
size_t len = onePath.size();
bool isEdgeAccess = false;
for (auto const & node : onePath) {
if (node->type == NODE_TYPE_FCALL) {
//
// we currently don't know how to execute functions in the
// traversal (-> TraverserExpression::recursiveCheck
return false;
}
if (node->type == NODE_TYPE_OPERATOR_BINARY_IN ||
node->type == NODE_TYPE_OPERATOR_BINARY_NIN) {
if (!node->getMember(0)->isAttributeAccessForVariable(var, true)) {
return false;
}
}
}
if (onePath[len - 2]->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
isEdgeAccess = onePath[len - 2]->stringEquals("edges", false);
if (!isEdgeAccess &&
!onePath[len - 2]->stringEquals("vertices", false)) {
/* We can't catch all cases in which this error would occur, so we don't
throw here.
std::string message("TRAVERSAL: path only knows 'edges' and
'vertices', not ");
message += onePath[len - 2]->getString();
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_PARSE, message);
*/
return false;
}
}
// we now need to check for p.edges[n] whether n is >= 0
if (onePath[len - 3]->type == NODE_TYPE_INDEXED_ACCESS) {
auto indexAccessNode = onePath[len - 3]->getMember(1);
if ((indexAccessNode->type != NODE_TYPE_VALUE) ||
(indexAccessNode->value.type != VALUE_TYPE_INT) ||
(indexAccessNode->value.value._int < 0)) {
return false;
}
conditionIsImpossible =
!tn->isInRange(indexAccessNode->value.value._int, isEdgeAccess);
} else if ((onePath[len - 3]->type == NODE_TYPE_ITERATOR) &&
(onePath[len - 4]->type == NODE_TYPE_EXPANSION)) {
// we now need to check for p.edges[*] which becomes a fancy structure
return false;
} else {
return false;
}
}
return true;
}
static bool extractSimplePathAccesses(AstNode const* node, TraversalNode* tn,
Ast* ast) {
std::vector<AstNode const*> currentPath;
std::vector<std::vector<AstNode const*>> paths;
std::vector<std::vector<AstNode const*>> clonePath;
node->findVariableAccess(currentPath, paths, tn->pathOutVariable());
for (auto const& onePath : paths) {
size_t len = onePath.size();
bool isEdgeAccess = false;
size_t attrAccessTo = 0;
TRI_ASSERT(len >= 3);
if (onePath[len - 2]->type == NODE_TYPE_ATTRIBUTE_ACCESS) {
isEdgeAccess = onePath[len - 2]->stringEquals("edges", false);
}
// we now need to check for p.edges[n] whether n is >= 0
if (onePath[len - 3]->type == NODE_TYPE_INDEXED_ACCESS) {
auto indexAccessNode = onePath[len - 3]->getMember(1);
attrAccessTo = indexAccessNode->value.value._int;
}
AstNode const* compareNode = nullptr;
AstNode const* accessNodeBranch = nullptr;
for (auto const& oneNode : onePath) {
if (compareNode != nullptr && accessNodeBranch == nullptr) {
accessNodeBranch = oneNode;
}
if ((oneNode->type == NODE_TYPE_OPERATOR_BINARY_EQ) ||
(oneNode->type == NODE_TYPE_OPERATOR_BINARY_NE) ||
(oneNode->type == NODE_TYPE_OPERATOR_BINARY_LT) ||
(oneNode->type == NODE_TYPE_OPERATOR_BINARY_LE) ||
(oneNode->type == NODE_TYPE_OPERATOR_BINARY_GT) ||
(oneNode->type == NODE_TYPE_OPERATOR_BINARY_GE) ||
(oneNode->type == NODE_TYPE_OPERATOR_BINARY_IN ) ||
(oneNode->type == NODE_TYPE_OPERATOR_BINARY_NIN))
{
compareNode = oneNode;
}
}
if (compareNode != nullptr) {
AstNode const* pathAccessNode;
AstNode* filterByNode;
bool flipOperator = false;
if (compareNode->getMember(0) == accessNodeBranch) {
pathAccessNode = accessNodeBranch;
filterByNode = compareNode->getMember(1);
} else {
flipOperator = (compareNode->type == NODE_TYPE_OPERATOR_BINARY_LT) ||
(compareNode->type == NODE_TYPE_OPERATOR_BINARY_LE) ||
(compareNode->type == NODE_TYPE_OPERATOR_BINARY_GT) ||
(compareNode->type == NODE_TYPE_OPERATOR_BINARY_GE);
pathAccessNode = accessNodeBranch;
filterByNode = compareNode->getMember(0);
}
// Hacki: I do not think that the nullptr check can ever fail because of
// the structure of onePath
if (accessNodeBranch != nullptr && accessNodeBranch->isSimple() &&
filterByNode->isDeterministic()) {
currentPath.clear();
clonePath.clear();
filterByNode->findVariableAccess(currentPath, clonePath,
tn->pathOutVariable());
if (!clonePath.empty()) {
// Path variable access on the RHS? can't do that.
continue;
}
AstNode* newNode = pathAccessNode->clone(ast);
// since we just copied one path, we should only find one.
currentPath.clear();
clonePath.clear();
newNode->findVariableAccess(currentPath, clonePath,
tn->pathOutVariable());
if (clonePath.size() != 1) {
continue;
}
auto len = clonePath[0].size();
if (len < 4) {
continue;
}
AstNode* firstRefNode = (AstNode*)clonePath[0][len - 4];
TRI_ASSERT(firstRefNode->type == NODE_TYPE_ATTRIBUTE_ACCESS);
// replace the path variable access by a variable access to edge/vertex
// (then current to the iteration)
auto varRefNode = new AstNode(NODE_TYPE_REFERENCE);
try {
ast->query()->addNode(varRefNode);
} catch (...) {
// prevent leak
delete varRefNode;
throw;
}
// We fake the variable at this point.
// The reason is that we need a variable access (instead of indexed access)
// for serialisation. However the content of this variable is never used.
varRefNode->setData(tn->vertexOutVariable());
firstRefNode->changeMember(0, varRefNode);
auto expressionOperator = compareNode->type;
if (flipOperator) {
if (expressionOperator == NODE_TYPE_OPERATOR_BINARY_LT) {
expressionOperator = NODE_TYPE_OPERATOR_BINARY_GT;
} else if (expressionOperator == NODE_TYPE_OPERATOR_BINARY_LE) {
expressionOperator = NODE_TYPE_OPERATOR_BINARY_GE;
} else if (expressionOperator == NODE_TYPE_OPERATOR_BINARY_GT) {
expressionOperator = NODE_TYPE_OPERATOR_BINARY_LT;
} else if (expressionOperator == NODE_TYPE_OPERATOR_BINARY_GE) {
expressionOperator = NODE_TYPE_OPERATOR_BINARY_LE;
}
}
tn->storeSimpleExpression(isEdgeAccess, attrAccessTo,
expressionOperator, newNode, filterByNode);
}
}
}
return true;
}
bool TraversalConditionFinder::before(ExecutionNode* en) {
if (!_variableDefinitions.empty() && en->canThrow()) {
// we already found a FILTER and
// something that can throw is not safe to optimize
_filters.clear();
return true;
}
switch (en->getType()) {
case EN::ENUMERATE_LIST:
case EN::COLLECT:
case EN::SCATTER:
case EN::DISTRIBUTE:
case EN::GATHER:
case EN::REMOTE:
case EN::SUBQUERY:
case EN::INDEX:
case EN::INSERT:
case EN::REMOVE:
case EN::REPLACE:
case EN::UPDATE:
case EN::UPSERT:
case EN::RETURN:
case EN::SORT:
case EN::ENUMERATE_COLLECTION:
case EN::LIMIT:
// in these cases we simply ignore the intermediate nodes, note
// that we have taken care of nodes that could throw exceptions
// above.
break;
case EN::SINGLETON:
case EN::NORESULTS:
case EN::ILLEGAL:
// in all these cases we better abort
return true;
case EN::FILTER: {
std::vector<Variable const*>&& invars = en->getVariablesUsedHere();
TRI_ASSERT(invars.size() == 1);
// register which variable is used in a FILTER
_filters.emplace(invars[0]->id, en);
break;
}
case EN::CALCULATION: {
auto outvars = en->getVariablesSetHere();
TRI_ASSERT(outvars.size() == 1);
_variableDefinitions.emplace(outvars[0]->id,
static_cast<CalculationNode const*>(en));
TRI_IF_FAILURE("ConditionFinder::variableDefinition") {
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
}
break;
}
case EN::TRAVERSAL: {
auto node = static_cast<TraversalNode*>(en);
auto condition = std::make_unique<Condition>(_plan->getAst());
bool foundCondition = false;
auto const& varsValidInTraversal = node->getVarsValid();
std::unordered_set<Variable const*> varsUsedByCondition;
bool conditionIsImpossible = false;
for (auto& it : _variableDefinitions) {
auto f = _filters.find(it.first);
if (f != _filters.end()) {
// a variable used in a FILTER
auto outVar = node->getVariablesSetHere();
if (outVar.size() != 1 || outVar[0]->id == f->first) {
// now we know, this filter is used for our traversal node.
auto cn = it.second;
// check whether variables that are not in scope of the condition
// are used:
varsUsedByCondition.clear();
Ast::getReferencedVariables(cn->expression()->node(),
varsUsedByCondition);
bool unknownVariableFound = false;
for (auto const& conditionVar : varsUsedByCondition) {
bool found = false;
for (auto const& traversalKnownVar : varsValidInTraversal) {
if (conditionVar->id == traversalKnownVar->id) {
found = true;
break;
}
}
if (!found) {
unknownVariableFound = true;
break;
}
}
if (unknownVariableFound) {
continue;
}
for (auto const& conditionVar : varsUsedByCondition) {
// check whether conditionVar is one of those we emit
int variableType = node->checkIsOutVariable(conditionVar->id);
if (variableType >= 0) {
if ((variableType == 2) &&
checkPathVariableAccessFeasible(cn, node, conditionVar,
conditionIsImpossible)) {
condition->andCombine(
it.second->expression()->node()->clone(_plan->getAst()));
foundCondition = true;
}
if (conditionIsImpossible) {
break;
}
}
}
}
}
if (conditionIsImpossible) {
break;
}
}
if (!conditionIsImpossible) {
conditionIsImpossible = !node->isRangeValid();
}
// TODO: we can't execute if we condition->normalize(_plan); in
// generateCodeNode
if (!conditionIsImpossible) {
// right now we're not clever enough to find impossible conditions...
conditionIsImpossible = (foundCondition && condition->isEmpty());
}
if (conditionIsImpossible) {
// condition is always false
for (auto const& x : node->getParents()) {
auto noRes = new NoResultsNode(_plan, _plan->nextId());
_plan->registerNode(noRes);
_plan->insertDependency(x, noRes);
*_planAltered = true;
}
break;
}
if (foundCondition) {
condition->normalize();
TRI_IF_FAILURE("ConditionFinder::normalizePlan") {
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
}
extractSimplePathAccesses(condition->root(), node, _plan->getAst());
node->setCondition(condition.release());
*_planAltered = true;
}
break;
}
}
return false;
}
bool TraversalConditionFinder::enterSubquery(ExecutionNode*, ExecutionNode*) {
return false;
}