1
0
Fork 0
arangodb/arangod/Aql/TraversalNode.cpp

682 lines
22 KiB
C++

/// @brief Implementation of Traversal Execution Node
///
/// @file arangod/Aql/TraversalNode.cpp
///
/// DISCLAIMER
///
/// Copyright 2010-2014 triagens GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Michael Hackstein
/// @author Copyright 2015, ArangoDB GmbH, Cologne, Germany
////////////////////////////////////////////////////////////////////////////////
#include "TraversalNode.h"
#include "Aql/ExecutionPlan.h"
#include "Aql/Ast.h"
#include "Aql/TraversalOptions.h"
#include "Indexes/Index.h"
#include <velocypack/Iterator.h>
#include <velocypack/velocypack-aliases.h>
using namespace arangodb::basics;
using namespace arangodb::aql;
static uint64_t checkTraversalDepthValue(AstNode const* node) {
if (!node->isNumericValue()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_PARSE,
"invalid traversal depth");
}
double v = node->getDoubleValue();
double intpart;
if (modf(v, &intpart) != 0.0 || v < 0.0) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_PARSE,
"invalid traversal depth");
}
return static_cast<uint64_t>(v);
}
SimpleTraverserExpression::SimpleTraverserExpression(arangodb::aql::Ast* ast,
arangodb::velocypack::Slice const& slice)
: TraverserExpression(), expression(nullptr) {
isEdgeAccess = slice.get("isEdgeAccess").getBoolean();
comparisonType = static_cast<aql::AstNodeType>(slice.get("comparisonType").getNumericValue<uint32_t>());
varAccess = new AstNode(ast, slice.get("varAccess"));
compareToNode = new AstNode(ast, slice.get("compareTo"));
}
SimpleTraverserExpression::~SimpleTraverserExpression() {
delete expression;
}
void SimpleTraverserExpression::toVelocyPack(VPackBuilder& builder) const {
TRI_ASSERT(builder.isOpenObject());
auto op = arangodb::aql::AstNode::Operators.find(comparisonType);
if (op == arangodb::aql::AstNode::Operators.end()) {
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_QUERY_PARSE,
"invalid operator for simpleTraverserExpression");
}
std::string const operatorStr = op->second;
builder.add("isEdgeAccess", VPackValue(isEdgeAccess));
builder.add("comparisonTypeStr", VPackValue(operatorStr));
builder.add("comparisonType", VPackValue(comparisonType));
builder.add(VPackValue("varAccess"));
varAccess->toVelocyPack(builder, true);
builder.add(VPackValue("compareTo"));
compareToNode->toVelocyPack(builder, true);
}
static TRI_edge_direction_e parseDirection (AstNode const* node) {
TRI_ASSERT(node->isIntValue());
auto dirNum = node->getIntValue();
switch (dirNum) {
case 0:
return TRI_EDGE_ANY;
case 1:
return TRI_EDGE_IN;
case 2:
return TRI_EDGE_OUT;
default:
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_QUERY_PARSE,
"direction can only be INBOUND, OUTBOUND or ANY");
}
}
TraversalNode::TraversalNode(ExecutionPlan* plan, size_t id,
TRI_vocbase_t* vocbase, AstNode const* direction,
AstNode const* start, AstNode const* graph,
TraversalOptions const& options)
: ExecutionNode(plan, id),
_vocbase(vocbase),
_vertexOutVariable(nullptr),
_edgeOutVariable(nullptr),
_pathOutVariable(nullptr),
_inVariable(nullptr),
_graphObj(nullptr),
_condition(nullptr),
_options(options),
_specializedNeighborsSearch(false) {
TRI_ASSERT(_vocbase != nullptr);
TRI_ASSERT(direction != nullptr);
TRI_ASSERT(start != nullptr);
TRI_ASSERT(graph != nullptr);
auto resolver = std::make_unique<CollectionNameResolver>(vocbase);
// Parse Steps and direction
TRI_ASSERT(direction->type == NODE_TYPE_DIRECTION);
TRI_ASSERT(direction->numMembers() == 2);
// Member 0 is the direction. Already the correct Integer.
// Is not inserted by user but by enum.
TRI_edge_direction_e baseDirection = parseDirection(direction->getMember(0));
auto steps = direction->getMember(1);
if (steps->isNumericValue()) {
// Check if a double value is integer
_minDepth = checkTraversalDepthValue(steps);
_maxDepth = _minDepth;
} else if (steps->type == NODE_TYPE_RANGE) {
// Range depth
_minDepth = checkTraversalDepthValue(steps->getMember(0));
_maxDepth = checkTraversalDepthValue(steps->getMember(1));
if (_maxDepth < _minDepth) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_PARSE,
"invalid traversal depth");
}
} else {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_PARSE,
"invalid traversal depth");
}
std::unordered_map<std::string, TRI_edge_direction_e> seenCollections;
if (graph->type == NODE_TYPE_COLLECTION_LIST) {
size_t edgeCollectionCount = graph->numMembers();
_graphInfo.openArray();
_edgeColls.reserve(edgeCollectionCount);
_directions.reserve(edgeCollectionCount);
// List of edge collection names
for (size_t i = 0; i < edgeCollectionCount; ++i) {
auto col = graph->getMember(i);
TRI_edge_direction_e dir = TRI_EDGE_ANY;
if (col->type == NODE_TYPE_DIRECTION) {
// We have a collection with special direction.
dir = parseDirection(col->getMember(0));
col = col->getMember(1);
} else {
dir = baseDirection;
}
std::string eColName = col->getString();
// now do some uniqueness checks for the specified collections
auto it = seenCollections.find(eColName);
if (it != seenCollections.end()) {
if ((*it).second != dir) {
std::string msg("conflicting directions specified for collection '" +
std::string(eColName));
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_ARANGO_COLLECTION_TYPE_INVALID,
msg);
}
// do not re-add the same collection!
continue;
}
seenCollections.emplace(eColName, dir);
auto eColType = resolver->getCollectionTypeCluster(eColName);
if (eColType != TRI_COL_TYPE_EDGE) {
std::string msg("collection type invalid for collection '" +
std::string(eColName) +
": expecting collection type 'edge'");
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_ARANGO_COLLECTION_TYPE_INVALID,
msg);
}
_directions.emplace_back(dir);
_graphInfo.add(VPackValue(eColName));
_edgeColls.emplace_back(eColName);
}
_graphInfo.close();
} else {
if (_edgeColls.empty()) {
if (graph->isStringValue()) {
std::string graphName = graph->getString();
_graphInfo.add(VPackValue(graphName));
_graphObj = plan->getAst()->query()->lookupGraphByName(graphName);
if (_graphObj == nullptr) {
THROW_ARANGO_EXCEPTION(TRI_ERROR_GRAPH_NOT_FOUND);
}
auto eColls = _graphObj->edgeCollections();
size_t length = eColls.size();
if (length == 0) {
THROW_ARANGO_EXCEPTION(TRI_ERROR_GRAPH_EMPTY);
}
_edgeColls.reserve(length);
_directions.reserve(length);
for (const auto& n : eColls) {
_edgeColls.push_back(n);
_directions.emplace_back(baseDirection);
}
}
}
}
// Parse start node
switch (start->type) {
case NODE_TYPE_REFERENCE:
_inVariable = static_cast<Variable*>(start->getData());
_vertexId = "";
break;
case NODE_TYPE_VALUE:
if (start->value.type != VALUE_TYPE_STRING) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_PARSE,
"invalid start vertex. Must either be "
"an _id string or an object with _id.");
}
_inVariable = nullptr;
_vertexId = start->getString();
break;
default:
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_PARSE,
"invalid start vertex. Must either be an "
"_id string or an object with _id.");
}
// Parse options node
}
/// @brief Internal constructor to clone the node.
TraversalNode::TraversalNode(
ExecutionPlan* plan, size_t id, TRI_vocbase_t* vocbase,
std::vector<std::string> const& edgeColls, Variable const* inVariable,
std::string const& vertexId, std::vector<TRI_edge_direction_e> const& directions,
uint64_t minDepth, uint64_t maxDepth, TraversalOptions const& options)
: ExecutionNode(plan, id),
_vocbase(vocbase),
_vertexOutVariable(nullptr),
_edgeOutVariable(nullptr),
_pathOutVariable(nullptr),
_inVariable(inVariable),
_vertexId(vertexId),
_minDepth(minDepth),
_maxDepth(maxDepth),
_directions(directions),
_graphObj(nullptr),
_condition(nullptr),
_options(options),
_specializedNeighborsSearch(false) {
_graphInfo.openArray();
for (auto& it : edgeColls) {
_edgeColls.emplace_back(it);
_graphInfo.add(VPackValue(it));
}
_graphInfo.close();
}
TraversalNode::TraversalNode(ExecutionPlan* plan,
arangodb::velocypack::Slice const& base)
: ExecutionNode(plan, base),
_vocbase(plan->getAst()->query()->vocbase()),
_vertexOutVariable(nullptr),
_edgeOutVariable(nullptr),
_pathOutVariable(nullptr),
_inVariable(nullptr),
_graphObj(nullptr),
_condition(nullptr),
_specializedNeighborsSearch(false) {
_minDepth = arangodb::basics::VelocyPackHelper::stringUInt64(base.get("minDepth"));
_maxDepth = arangodb::basics::VelocyPackHelper::stringUInt64(base.get("maxDepth"));
VPackSlice dirList = base.get("directions");
for (auto const& it : VPackArrayIterator(dirList)) {
uint64_t dir = arangodb::basics::VelocyPackHelper::stringUInt64(it);
TRI_edge_direction_e d;
switch (dir) {
case 0:
d = TRI_EDGE_ANY;
break;
case 1:
d = TRI_EDGE_IN;
break;
case 2:
d = TRI_EDGE_OUT;
break;
default:
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER,
"Invalid direction value");
break;
}
_directions.emplace_back(d);
}
// In Vertex
if (base.hasKey("inVariable")) {
_inVariable = varFromVPack(plan->getAst(), base, "inVariable");
} else {
VPackSlice v = base.get("vertexId");
if (!v.isString()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_BAD_JSON_PLAN,
"start vertex must be a string");
}
_vertexId = v.copyString();
if (_vertexId.empty()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_BAD_JSON_PLAN,
"start vertex mustn't be empty");
}
}
if (base.hasKey("condition")) {
VPackSlice condition = base.get("condition");
if (!condition.isObject()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_BAD_JSON_PLAN,
"condition must be an object");
}
_condition = Condition::fromVPack(plan, condition);
}
std::string graphName;
if (base.hasKey("graph") && (base.get("graph").isString())) {
graphName = base.get("graph").copyString();
if (base.hasKey("graphDefinition")) {
_graphObj = plan->getAst()->query()->lookupGraphByName(graphName);
if (_graphObj == nullptr) {
THROW_ARANGO_EXCEPTION(TRI_ERROR_GRAPH_NOT_FOUND);
}
auto eColls = _graphObj->edgeCollections();
for (auto const& n : eColls) {
_edgeColls.push_back(n);
}
} else {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_BAD_JSON_PLAN,
"missing graphDefinition.");
}
} else {
_graphInfo.add(base.get("graph"));
if (!_graphInfo.slice().isArray()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_BAD_JSON_PLAN,
"graph has to be an array.");
}
for (auto const& it : VPackArrayIterator(_graphInfo.slice())) {
// List of edge collection names
if (!it.isString()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_BAD_JSON_PLAN,
"graph has to be an array of strings.");
}
_edgeColls.emplace_back(it.copyString());
}
if (_edgeColls.empty()) {
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_QUERY_BAD_JSON_PLAN,
"graph has to be a non empty array of strings.");
}
}
if (base.hasKey("simpleExpressions")) {
VPackSlice simpleExpSet = base.get("simpleExpressions");
if (!simpleExpSet.isObject()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_BAD_JSON_PLAN,
"simpleExpressions has to be an array.");
}
// List of edge collection names
for (auto const& it : VPackObjectIterator(simpleExpSet)) {
if (it.value.length() == 0) {
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_QUERY_BAD_JSON_PLAN,
"simpleExpressions one expression set has to be an array.");
}
std::vector<arangodb::traverser::TraverserExpression*> oneExpressionSet;
oneExpressionSet.reserve(it.value.length());
size_t n = static_cast<size_t>(std::stoull(it.key.copyString()));
_expressions.emplace(n, oneExpressionSet);
auto it2 = _expressions.find(n);
for (auto const& it3 : VPackArrayIterator(it.value)) {
std::unique_ptr<SimpleTraverserExpression> oneX(
new SimpleTraverserExpression(plan->getAst(), it3));
it2->second.emplace_back(oneX.get());
oneX.release();
}
}
if (_expressions.empty()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_BAD_JSON_PLAN,
"simpleExpressions has to be non empty.");
}
}
// Out variables
if (base.hasKey("vertexOutVariable")) {
_vertexOutVariable = varFromVPack(plan->getAst(), base, "vertexOutVariable");
}
if (base.hasKey("edgeOutVariable")) {
_edgeOutVariable = varFromVPack(plan->getAst(), base, "edgeOutVariable");
}
if (base.hasKey("pathOutVariable")) {
_pathOutVariable = varFromVPack(plan->getAst(), base, "pathOutVariable");
}
// Flags
if (base.hasKey("traversalFlags")) {
_options = TraversalOptions(base);
}
_specializedNeighborsSearch = false;
if (base.hasKey("specializedNeighborsSearch")) {
_specializedNeighborsSearch = base.get("specializedNeighborsSearch").getBoolean();
}
}
int TraversalNode::checkIsOutVariable(size_t variableId) const {
if (_vertexOutVariable != nullptr && _vertexOutVariable->id == variableId) {
return 0;
}
if (_edgeOutVariable != nullptr && _edgeOutVariable->id == variableId) {
return 1;
}
if (_pathOutVariable != nullptr && _pathOutVariable->id == variableId) {
return 2;
}
return -1;
}
/// @brief check if all directions are equal
bool TraversalNode::allDirectionsEqual() const {
if (_directions.empty()) {
// no directions!
return false;
}
size_t const n = _directions.size();
TRI_edge_direction_e const expected = _directions[0];
for (size_t i = 1; i < n; ++i) {
if (_directions[i] != expected) {
return false;
}
}
return true;
}
void TraversalNode::specializeToNeighborsSearch() {
TRI_ASSERT(allDirectionsEqual());
TRI_ASSERT(!_directions.empty());
_specializedNeighborsSearch = true;
}
/// @brief toVelocyPack, for TraversalNode
void TraversalNode::toVelocyPackHelper(arangodb::velocypack::Builder& nodes,
bool verbose) const {
ExecutionNode::toVelocyPackHelperGeneric(nodes,
verbose); // call base class method
nodes.add("database", VPackValue(_vocbase->name()));
nodes.add("minDepth", VPackValue(_minDepth));
nodes.add("maxDepth", VPackValue(_maxDepth));
nodes.add("graph", _graphInfo.slice());
nodes.add(VPackValue("directions"));
{
VPackArrayBuilder guard(&nodes);
for (auto const& d : _directions) {
nodes.add(VPackValue(d));
}
}
// In variable
if (usesInVariable()) {
nodes.add(VPackValue("inVariable"));
inVariable()->toVelocyPack(nodes);
} else {
nodes.add("vertexId", VPackValue(_vertexId));
}
if (_condition != nullptr) {
nodes.add(VPackValue("condition"));
_condition->toVelocyPack(nodes, verbose);
}
if (_graphObj != nullptr) {
nodes.add(VPackValue("graphDefinition"));
_graphObj->toVelocyPack(nodes, verbose);
}
// Out variables
if (usesVertexOutVariable()) {
nodes.add(VPackValue("vertexOutVariable"));
vertexOutVariable()->toVelocyPack(nodes);
}
if (usesEdgeOutVariable()) {
nodes.add(VPackValue("edgeOutVariable"));
edgeOutVariable()->toVelocyPack(nodes);
}
if (usesPathOutVariable()) {
nodes.add(VPackValue("pathOutVariable"));
pathOutVariable()->toVelocyPack(nodes);
}
if (!_expressions.empty()) {
nodes.add(VPackValue("simpleExpressions"));
VPackObjectBuilder guard(&nodes);
for (auto const& map : _expressions) {
nodes.add(VPackValue(std::to_string(map.first)));
VPackArrayBuilder guard2(&nodes);
for (auto const& x : map.second) {
VPackObjectBuilder guard3(&nodes);
auto tmp = dynamic_cast<SimpleTraverserExpression*>(x);
if (tmp != nullptr) {
tmp->toVelocyPack(nodes);
}
}
}
}
nodes.add("specializedNeighborsSearch", VPackValue(_specializedNeighborsSearch));
nodes.add(VPackValue("traversalFlags"));
_options.toVelocyPack(nodes);
// And close it:
nodes.close();
}
/// @brief clone ExecutionNode recursively
ExecutionNode* TraversalNode::clone(ExecutionPlan* plan, bool withDependencies,
bool withProperties) const {
auto c =
new TraversalNode(plan, _id, _vocbase, _edgeColls, _inVariable, _vertexId,
_directions, _minDepth, _maxDepth, _options);
if (usesVertexOutVariable()) {
auto vertexOutVariable = _vertexOutVariable;
if (withProperties) {
vertexOutVariable =
plan->getAst()->variables()->createVariable(vertexOutVariable);
}
TRI_ASSERT(vertexOutVariable != nullptr);
c->setVertexOutput(vertexOutVariable);
}
if (usesEdgeOutVariable()) {
auto edgeOutVariable = _edgeOutVariable;
if (withProperties) {
edgeOutVariable =
plan->getAst()->variables()->createVariable(edgeOutVariable);
}
TRI_ASSERT(edgeOutVariable != nullptr);
c->setEdgeOutput(edgeOutVariable);
}
if (usesPathOutVariable()) {
auto pathOutVariable = _pathOutVariable;
if (withProperties) {
pathOutVariable =
plan->getAst()->variables()->createVariable(pathOutVariable);
}
TRI_ASSERT(pathOutVariable != nullptr);
c->setPathOutput(pathOutVariable);
}
if (_specializedNeighborsSearch) {
c->specializeToNeighborsSearch();
}
cloneHelper(c, plan, withDependencies, withProperties);
return static_cast<ExecutionNode*>(c);
}
/// @brief the cost of a traversal node
double TraversalNode::estimateCost(size_t& nrItems) const {
size_t incoming = 0;
double depCost = _dependencies.at(0)->getCost(incoming);
double expectedEdgesPerDepth = 0.0;
auto trx = _plan->getAst()->query()->trx();
auto collections = _plan->getAst()->query()->collections();
TRI_ASSERT(collections != nullptr);
for (auto const& it : _edgeColls) {
auto collection = collections->get(it);
if (collection == nullptr) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL,
"unexpected pointer for collection");
}
TRI_ASSERT(collection != nullptr);
auto indexes = trx->indexesForCollection(collection->name);
for (auto const& index : indexes) {
if (index->type() == arangodb::Index::IndexType::TRI_IDX_TYPE_EDGE_INDEX) {
// We can only use Edge Index
if (index->hasSelectivityEstimate()) {
expectedEdgesPerDepth += 1 / index->selectivityEstimate();
} else {
expectedEdgesPerDepth += 1000; // Hard-coded
}
break;
}
}
}
nrItems =
static_cast<size_t>(incoming * std::pow(expectedEdgesPerDepth, static_cast<double>(_maxDepth)));
if (nrItems == 0 && incoming > 0) {
nrItems = 1; // min value
}
return depCost + nrItems;
}
void TraversalNode::fillTraversalOptions(
arangodb::traverser::TraverserOptions& opts) const {
opts.minDepth = _minDepth;
opts.maxDepth = _maxDepth;
opts.setCollections(_edgeColls, _directions);
opts.useBreadthFirst = _options.useBreadthFirst;
opts.uniqueVertices = _options.uniqueVertices;
opts.uniqueEdges = _options.uniqueEdges;
}
/// @brief remember the condition to execute for early traversal abortion.
void TraversalNode::setCondition(arangodb::aql::Condition* condition) {
std::unordered_set<Variable const*> varsUsedByCondition;
Ast::getReferencedVariables(condition->root(), varsUsedByCondition);
for (auto const& oneVar : varsUsedByCondition) {
if ((_vertexOutVariable == nullptr || oneVar->id != _vertexOutVariable->id) &&
(_edgeOutVariable == nullptr || oneVar->id != _edgeOutVariable->id) &&
(_pathOutVariable == nullptr || oneVar->id != _pathOutVariable->id) &&
(_inVariable == nullptr || oneVar->id != _inVariable->id)) {
_conditionVariables.emplace_back(oneVar);
}
}
_condition = condition;
}
void TraversalNode::storeSimpleExpression(bool isEdgeAccess, size_t indexAccess,
AstNodeType comparisonType,
AstNode const* varAccess,
AstNode* compareToNode) {
auto it = _expressions.find(indexAccess);
if (it == _expressions.end()) {
std::vector<arangodb::traverser::TraverserExpression*> sec;
_expressions.emplace(indexAccess, sec);
it = _expressions.find(indexAccess);
}
auto e = std::make_unique<SimpleTraverserExpression>(
isEdgeAccess, comparisonType, varAccess, compareToNode);
it->second.push_back(e.get());
e.release();
}