1
0
Fork 0

issue #271: simple paths() function optimisations

When an "outbound" query is made with the paths() function, the optimiser will now use an index if a FILTER condition can exploit an index on .source.
Example query:

FOR p IN PATHS(users, relations, "outbound")
  FILTER p.source._id == "some-id"
  RETURN p

The index on p._id was not exploited before, because the optimiser did not know that variable "p" related to collection "users" and that the condition actually would filter on users._id (the variable name in the FILTER statement is "p.source._id).
Now, AQL functions can define callbacks which are called by the optimiser to try some function-specific optimisations.
The optimise callback for PATHS() tries to exploit indexes for filters that filter on the vertex collection (1st parameter to PATHS() call). Indexes can be exploited if the query is an "outbound" query and there are filters on "source", or if the query is an "inbound" query and there are filters on "destination". "anybound" queries will not be optimised
This commit is contained in:
Jan Steemann 2012-10-26 12:01:46 +02:00
parent 7af1337607
commit 792f57b760
5 changed files with 153 additions and 65 deletions

View File

@ -401,6 +401,8 @@ TRI_aql_node_t* TRI_CreateNodeCollectionAql (TRI_aql_context_t* const context,
// init collection hint
hint = TRI_CreateCollectionHintAql();
// attach the hint to the collection
node->_value._value._data = hint;
if (hint == NULL) {
@ -950,14 +952,14 @@ TRI_aql_node_t* TRI_CreateNodeFcallAql (TRI_aql_context_t* const context,
function = TRI_GetByExternalNameFunctionAql(functions, name);
if (!function) {
if (! function) {
// function name is unknown
TRI_SetErrorContextAql(context, TRI_ERROR_QUERY_FUNCTION_NAME_UNKNOWN, name);
return NULL;
}
// validate function call arguments
if (!TRI_ValidateArgsFunctionAql(context, function, parameters)) {
if (! TRI_ValidateArgsFunctionAql(context, function, parameters)) {
return NULL;
}

View File

@ -27,6 +27,8 @@
#include "Ahuacatl/ahuacatl-functions.h"
#include "Ahuacatl/ahuacatl-collections.h"
// -----------------------------------------------------------------------------
// --SECTION-- private macros
// -----------------------------------------------------------------------------
@ -40,15 +42,15 @@
/// @brief shorthand to register a query function and process the result
////////////////////////////////////////////////////////////////////////////////
#define REGISTER_FUNCTION(internalName, externalName, deterministic, group, argPattern) \
result &= TRI_RegisterFunctionAql(functions, internalName, "AHUACATL_" externalName, deterministic, group, argPattern);
#define REGISTER_FUNCTION(internalName, externalName, deterministic, group, argPattern, optimiseCallback) \
result &= TRI_RegisterFunctionAql(functions, internalName, "AHUACATL_" externalName, deterministic, group, argPattern, optimiseCallback)
////////////////////////////////////////////////////////////////////////////////
/// @brief shorthand to check an argument and return an error if it is invalid
////////////////////////////////////////////////////////////////////////////////
#define ARG_CHECK \
if (!CheckArgumentType(parameter, &allowed)) { \
if (! CheckArgumentType(parameter, &allowed)) { \
TRI_SetErrorContextAql(context, TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH, function->_externalName); \
return false; \
}
@ -112,7 +114,6 @@ static param_t InitParam (void) {
return param;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief check the type of an argument for a function call
////////////////////////////////////////////////////////////////////////////////
@ -224,7 +225,6 @@ static bool CheckArgumentType (TRI_aql_node_t* parameter,
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief determine minimum and maximum argument number for argument pattern
////////////////////////////////////////////////////////////////////////////////
@ -312,6 +312,75 @@ static bool EqualName (TRI_associative_pointer_t* array,
/// @}
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
/// @brief optimise callback function for PATHS() AQL function
////////////////////////////////////////////////////////////////////////////////
static void OptimisePaths (const TRI_aql_node_t* const fcallNode,
TRI_aql_context_t* const context,
TRI_aql_field_access_t* fieldAccess) {
TRI_aql_collection_hint_t* hint;
TRI_aql_node_t* args;
TRI_aql_node_t* vertexCollection;
TRI_aql_node_t* edgeCollection;
TRI_aql_node_t* direction;
char* directionValue;
char* name;
const char* lookFor;
size_t len;
size_t n;
args = TRI_AQL_NODE_MEMBER(fcallNode, 0);
if (args == NULL) {
return;
}
assert(args->_members._length >= 3);
vertexCollection = TRI_AQL_NODE_MEMBER(args, 0);
edgeCollection = TRI_AQL_NODE_MEMBER(args, 1);
direction = TRI_AQL_NODE_MEMBER(args, 2);
assert(vertexCollection);
assert(edgeCollection);
assert(direction);
assert(fieldAccess);
n = strlen(fieldAccess->_fullName);
name = fieldAccess->_fullName + fieldAccess->_variableNameLength;
directionValue = TRI_AQL_NODE_STRING(direction);
if (TRI_EqualString(directionValue, "outbound")) {
lookFor = ".source.";
len = strlen(lookFor);
}
else if (TRI_EqualString(directionValue, "inbound")) {
lookFor = ".destination.";
len = strlen(lookFor);
}
else {
lookFor = NULL;
len = 0;
}
if (len > 0 &&
n > fieldAccess->_variableNameLength + len &&
memcmp((void*) lookFor, (void*) name, len) == 0) {
// field name is collection.source.XXX, e.g. users.source._id
LOG_DEBUG("optimising PATHS() field access %s", fieldAccess->_fullName);
// we can now modify this fieldaccess in place to collection.XXX, e.g. users._id
// copy trailing \0 byte as well
memmove(name, name + len - 1, n - fieldAccess->_variableNameLength - len + 2);
// attach the modified fieldAccess to the collection
hint = (TRI_aql_collection_hint_t*) (TRI_AQL_NODE_DATA(vertexCollection));
hint->_ranges = TRI_AddAccessAql(context, hint->_ranges, fieldAccess);
}
}
// -----------------------------------------------------------------------------
// --SECTION-- public functions
// -----------------------------------------------------------------------------
@ -331,7 +400,7 @@ TRI_associative_pointer_t* TRI_InitialiseFunctionsAql (void) {
functions = (TRI_associative_pointer_t*) TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_associative_pointer_t), false);
if (!functions) {
if (functions == NULL) {
return NULL;
}
@ -354,66 +423,66 @@ TRI_associative_pointer_t* TRI_InitialiseFunctionsAql (void) {
// a = array
// type check functions
REGISTER_FUNCTION("IS_NULL", "IS_NULL", true, false, ".");
REGISTER_FUNCTION("IS_BOOL", "IS_BOOL", true, false, ".");
REGISTER_FUNCTION("IS_NUMBER", "IS_NUMBER", true, false, ".");
REGISTER_FUNCTION("IS_STRING", "IS_STRING", true, false, ".");
REGISTER_FUNCTION("IS_LIST", "IS_LIST", true, false, ".");
REGISTER_FUNCTION("IS_DOCUMENT", "IS_DOCUMENT", true, false, ".");
REGISTER_FUNCTION("IS_NULL", "IS_NULL", true, false, ".", NULL);
REGISTER_FUNCTION("IS_BOOL", "IS_BOOL", true, false, ".", NULL);
REGISTER_FUNCTION("IS_NUMBER", "IS_NUMBER", true, false, ".", NULL);
REGISTER_FUNCTION("IS_STRING", "IS_STRING", true, false, ".", NULL);
REGISTER_FUNCTION("IS_LIST", "IS_LIST", true, false, ".", NULL);
REGISTER_FUNCTION("IS_DOCUMENT", "IS_DOCUMENT", true, false, ".", NULL);
// cast functions
REGISTER_FUNCTION("TO_NUMBER", "CAST_NUMBER", true, false, ".");
REGISTER_FUNCTION("TO_STRING", "CAST_STRING", true, false, ".");
REGISTER_FUNCTION("TO_BOOL", "CAST_BOOL", true, false, ".");
REGISTER_FUNCTION("TO_LIST", "CAST_LIST", true, false, ".");
REGISTER_FUNCTION("TO_NUMBER", "CAST_NUMBER", true, false, ".", NULL);
REGISTER_FUNCTION("TO_STRING", "CAST_STRING", true, false, ".", NULL);
REGISTER_FUNCTION("TO_BOOL", "CAST_BOOL", true, false, ".", NULL);
REGISTER_FUNCTION("TO_LIST", "CAST_LIST", true, false, ".", NULL);
// string functions
REGISTER_FUNCTION("CONCAT", "STRING_CONCAT", true, false, "sz,sz|+");
REGISTER_FUNCTION("CONCAT_SEPARATOR", "STRING_CONCAT_SEPARATOR", true, false, "s,sz,sz|+");
REGISTER_FUNCTION("CHAR_LENGTH", "STRING_LENGTH", true, false, "s");
REGISTER_FUNCTION("LOWER", "STRING_LOWER", true, false, "s");
REGISTER_FUNCTION("UPPER", "STRING_UPPER", true, false, "s");
REGISTER_FUNCTION("SUBSTRING", "STRING_SUBSTRING", true, false, "s,n|n");
REGISTER_FUNCTION("CONTAINS", "STRING_CONTAINS", true, false, "s,s");
REGISTER_FUNCTION("CONCAT", "STRING_CONCAT", true, false, "sz,sz|+", NULL);
REGISTER_FUNCTION("CONCAT_SEPARATOR", "STRING_CONCAT_SEPARATOR", true, false, "s,sz,sz|+", NULL);
REGISTER_FUNCTION("CHAR_LENGTH", "STRING_LENGTH", true, false, "s", NULL);
REGISTER_FUNCTION("LOWER", "STRING_LOWER", true, false, "s", NULL);
REGISTER_FUNCTION("UPPER", "STRING_UPPER", true, false, "s", NULL);
REGISTER_FUNCTION("SUBSTRING", "STRING_SUBSTRING", true, false, "s,n|n", NULL);
REGISTER_FUNCTION("CONTAINS", "STRING_CONTAINS", true, false, "s,s", NULL);
// numeric functions
REGISTER_FUNCTION("FLOOR", "NUMBER_FLOOR", true, false, "n");
REGISTER_FUNCTION("CEIL", "NUMBER_CEIL", true, false, "n");
REGISTER_FUNCTION("ROUND", "NUMBER_ROUND", true, false, "n");
REGISTER_FUNCTION("ABS", "NUMBER_ABS", true, false, "n");
REGISTER_FUNCTION("RAND", "NUMBER_RAND", false, false, "");
REGISTER_FUNCTION("FLOOR", "NUMBER_FLOOR", true, false, "n", NULL);
REGISTER_FUNCTION("CEIL", "NUMBER_CEIL", true, false, "n", NULL);
REGISTER_FUNCTION("ROUND", "NUMBER_ROUND", true, false, "n", NULL);
REGISTER_FUNCTION("ABS", "NUMBER_ABS", true, false, "n", NULL);
REGISTER_FUNCTION("RAND", "NUMBER_RAND", false, false, "", NULL);
// list functions
REGISTER_FUNCTION("UNION", "UNION", true, false, "l,l|+");
REGISTER_FUNCTION("LENGTH", "LENGTH", true, true, "l");
REGISTER_FUNCTION("MIN", "MIN", true, true, "l");
REGISTER_FUNCTION("MAX", "MAX", true, true, "l");
REGISTER_FUNCTION("SUM", "SUM", true, true, "l");
REGISTER_FUNCTION("UNIQUE", "UNIQUE", true, false, "l");
REGISTER_FUNCTION("REVERSE", "REVERSE", true, false, "l");
REGISTER_FUNCTION("FIRST", "FIRST", true, false, "l");
REGISTER_FUNCTION("LAST", "LAST", true, false, "l");
REGISTER_FUNCTION("UNION", "UNION", true, false, "l,l|+", NULL);
REGISTER_FUNCTION("LENGTH", "LENGTH", true, true, "l", NULL);
REGISTER_FUNCTION("MIN", "MIN", true, true, "l", NULL);
REGISTER_FUNCTION("MAX", "MAX", true, true, "l", NULL);
REGISTER_FUNCTION("SUM", "SUM", true, true, "l", NULL);
REGISTER_FUNCTION("UNIQUE", "UNIQUE", true, false, "l", NULL);
REGISTER_FUNCTION("REVERSE", "REVERSE", true, false, "l", NULL);
REGISTER_FUNCTION("FIRST", "FIRST", true, false, "l", NULL);
REGISTER_FUNCTION("LAST", "LAST", true, false, "l", NULL);
// document functions
REGISTER_FUNCTION("HAS", "HAS", true, false, "az,s");
REGISTER_FUNCTION("MERGE", "MERGE", true, false, "a,a|+");
REGISTER_FUNCTION("MERGE_RECURSIVE", "MERGE_RECURSIVE", true, false, "a,a|+");
REGISTER_FUNCTION("HAS", "HAS", true, false, "az,s", NULL);
REGISTER_FUNCTION("MERGE", "MERGE", true, false, "a,a|+", NULL);
REGISTER_FUNCTION("MERGE_RECURSIVE", "MERGE_RECURSIVE", true, false, "a,a|+", NULL);
// geo functions
REGISTER_FUNCTION("NEAR", "GEO_NEAR", false, false, "h,n,n,n|s");
REGISTER_FUNCTION("WITHIN", "GEO_WITHIN", false, false, "h,n,n,n|s");
REGISTER_FUNCTION("NEAR", "GEO_NEAR", false, false, "h,n,n,n|s", NULL);
REGISTER_FUNCTION("WITHIN", "GEO_WITHIN", false, false, "h,n,n,n|s", NULL);
// graph functions
REGISTER_FUNCTION("PATHS", "GRAPH_PATHS", false, false, "c,h|s,b");
REGISTER_FUNCTION("PATHS", "GRAPH_PATHS", false, false, "c,h|s,b", &OptimisePaths);
// misc functions
REGISTER_FUNCTION("FAIL", "FAIL", false, false, "|s"); // FAIL is non-deterministic, otherwise query optimisation will fail!
REGISTER_FUNCTION("PASSTHRU", "PASSTHRU", false, false, "."); // simple non-deterministic wrapper to avoid optimisations at parse time
REGISTER_FUNCTION("COLLECTIONS", "COLLECTIONS", false, false, "");
REGISTER_FUNCTION("NOT_NULL", "NOT_NULL", true, false, ".,.");
REGISTER_FUNCTION("NOT_LIST", "NOT_LIST", true, false, ".,.");
REGISTER_FUNCTION("FAIL", "FAIL", false, false, "|s", NULL); // FAIL is non-deterministic, otherwise query optimisation will fail!
REGISTER_FUNCTION("PASSTHRU", "PASSTHRU", false, false, ".", NULL); // simple non-deterministic wrapper to avoid optimisations at parse time
REGISTER_FUNCTION("COLLECTIONS", "COLLECTIONS", false, false, "", NULL);
REGISTER_FUNCTION("NOT_NULL", "NOT_NULL", true, false, ".,.", NULL);
REGISTER_FUNCTION("NOT_LIST", "NOT_LIST", true, false, ".,.", NULL);
if (!result) {
if (! result) {
TRI_FreeFunctionsAql(functions);
return NULL;
}
@ -498,7 +567,8 @@ bool TRI_RegisterFunctionAql (TRI_associative_pointer_t* functions,
const char* const internalName,
const bool isDeterministic,
const bool isGroup,
const char* const argPattern) {
const char* const argPattern,
void (*optimise)(const TRI_aql_node_t* const, TRI_aql_context_t* const, TRI_aql_field_access_t*)) {
TRI_aql_function_t* function;
function = (TRI_aql_function_t*) TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_aql_function_t), false);
@ -532,6 +602,7 @@ bool TRI_RegisterFunctionAql (TRI_associative_pointer_t* functions,
function->_isDeterministic = isDeterministic;
function->_isGroup = isGroup;
function->_argPattern = argPattern;
function->optimise = optimise;
// set minArgs and maxArgs
SetArgumentCount(function);

View File

@ -35,6 +35,7 @@
#include "Ahuacatl/ahuacatl-context.h"
#include "Ahuacatl/ahuacatl-ast-node.h"
#include "Ahuacatl/ahuacatl-access-optimiser.h"
#ifdef __cplusplus
extern "C" {
@ -57,6 +58,7 @@ typedef struct TRI_aql_function_s {
const char* _argPattern;
size_t _minArgs;
size_t _maxArgs;
void (*optimise)(const TRI_aql_node_t* const, TRI_aql_context_t* const, TRI_aql_field_access_t*);
}
TRI_aql_function_t;
@ -101,7 +103,8 @@ bool TRI_RegisterFunctionAql (TRI_associative_pointer_t*,
const char* const,
const bool,
const bool,
const char* const);
const char* const,
void (*)(const TRI_aql_node_t* const, TRI_aql_context_t* const, TRI_aql_field_access_t*));
////////////////////////////////////////////////////////////////////////////////
/// @brief check whether a function argument must be converted to another type

View File

@ -189,7 +189,7 @@ static TRI_aql_node_t* AnnotateNode (TRI_aql_statement_walker_t* const walker,
if (node->_type != TRI_AQL_NODE_COLLECTION) {
return node;
}
optimiser = (aql_optimiser_t*) walker->_data;
AttachCollectionHint(optimiser->_context, node);
@ -1030,6 +1030,17 @@ static void PatchVariables (TRI_aql_statement_walker_t* const walker) {
}
if (expressionNode != NULL) {
if (expressionNode->_type == TRI_AQL_NODE_FCALL) {
// the defining node is a function call
// get the function name
TRI_aql_function_t* function = TRI_AQL_NODE_DATA(expressionNode);
if (function->optimise != NULL) {
// call the function's optimise callback
function->optimise(expressionNode, context, fieldAccess);
}
}
if (expressionNode->_type == TRI_AQL_NODE_COLLECTION) {
TRI_aql_collection_hint_t* hint = (TRI_aql_collection_hint_t*) (TRI_AQL_NODE_DATA(expressionNode));

View File

@ -2042,11 +2042,11 @@ function AHUACATL_GRAPH_PATHS () {
}
var searchAttributes = {
"edgeCollection" : internal.db[edgeCollection],
"minLength" : minLength,
"maxLength" : maxLength,
"direction" : searchDirection,
"followCycles" : followCycles,
edgeCollection : internal.db[edgeCollection],
minLength : minLength,
maxLength : maxLength,
direction : searchDirection,
followCycles : followCycles,
};
// TODO: restrict allEdges to edges with certain _from values etc.
@ -2056,6 +2056,7 @@ function AHUACATL_GRAPH_PATHS () {
for (var i = 0; i < n; ++i) {
var vertex = vertices[i];
var visited = { };
visited[vertex._id] = true;
var connected = AHUACATL_GRAPH_SUBNODES(searchAttributes, vertex._id, visited, [ ], [ vertex ], 0);
for (j = 0; j < connected.length; ++j) {
@ -2075,10 +2076,10 @@ function AHUACATL_GRAPH_SUBNODES (searchAttributes, vertexId, visited, edges, ve
if (level >= searchAttributes.minLength) {
result.push({
"vertices" : vertices,
"edges" : edges,
"source" : vertices[0],
"destination" : vertices[vertices.length - 1],
vertices : vertices,
edges : edges,
source : vertices[0],
destination : vertices[vertices.length - 1],
});
}
@ -2112,7 +2113,7 @@ function AHUACATL_GRAPH_SUBNODES (searchAttributes, vertexId, visited, edges, ve
for (var j = 0; j < targets.length; ++j) {
var targetId = targets[j];
if (!searchAttributes.followCycles) {
if (! searchAttributes.followCycles) {
if (visited[targetId]) {
continue;
}
@ -2134,7 +2135,7 @@ function AHUACATL_GRAPH_SUBNODES (searchAttributes, vertexId, visited, edges, ve
result.push(connected[k]);
}
if (!searchAttributes.followCycles) {
if (! searchAttributes.followCycles) {
delete visited[targetId];
}
}