1
0
Fork 0

issue #271: simple paths() function optimisations

When an "outbound" query is made with the paths() function, the optimiser will now use an index if a FILTER condition can exploit an index on .source.
Example query:

FOR p IN PATHS(users, relations, "outbound")
  FILTER p.source._id == "some-id"
  RETURN p

The index on p._id was not exploited before, because the optimiser did not know that variable "p" related to collection "users" and that the condition actually would filter on users._id (the variable name in the FILTER statement is "p.source._id).
Now, AQL functions can define callbacks which are called by the optimiser to try some function-specific optimisations.
The optimise callback for PATHS() tries to exploit indexes for filters that filter on the vertex collection (1st parameter to PATHS() call). Indexes can be exploited if the query is an "outbound" query and there are filters on "source", or if the query is an "inbound" query and there are filters on "destination". "anybound" queries will not be optimised
This commit is contained in:
Jan Steemann 2012-10-26 12:01:46 +02:00
parent 7af1337607
commit 792f57b760
5 changed files with 153 additions and 65 deletions

View File

@ -401,6 +401,8 @@ TRI_aql_node_t* TRI_CreateNodeCollectionAql (TRI_aql_context_t* const context,
// init collection hint // init collection hint
hint = TRI_CreateCollectionHintAql(); hint = TRI_CreateCollectionHintAql();
// attach the hint to the collection
node->_value._value._data = hint; node->_value._value._data = hint;
if (hint == NULL) { if (hint == NULL) {
@ -950,14 +952,14 @@ TRI_aql_node_t* TRI_CreateNodeFcallAql (TRI_aql_context_t* const context,
function = TRI_GetByExternalNameFunctionAql(functions, name); function = TRI_GetByExternalNameFunctionAql(functions, name);
if (!function) { if (! function) {
// function name is unknown // function name is unknown
TRI_SetErrorContextAql(context, TRI_ERROR_QUERY_FUNCTION_NAME_UNKNOWN, name); TRI_SetErrorContextAql(context, TRI_ERROR_QUERY_FUNCTION_NAME_UNKNOWN, name);
return NULL; return NULL;
} }
// validate function call arguments // validate function call arguments
if (!TRI_ValidateArgsFunctionAql(context, function, parameters)) { if (! TRI_ValidateArgsFunctionAql(context, function, parameters)) {
return NULL; return NULL;
} }

View File

@ -27,6 +27,8 @@
#include "Ahuacatl/ahuacatl-functions.h" #include "Ahuacatl/ahuacatl-functions.h"
#include "Ahuacatl/ahuacatl-collections.h"
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// --SECTION-- private macros // --SECTION-- private macros
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
@ -40,15 +42,15 @@
/// @brief shorthand to register a query function and process the result /// @brief shorthand to register a query function and process the result
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
#define REGISTER_FUNCTION(internalName, externalName, deterministic, group, argPattern) \ #define REGISTER_FUNCTION(internalName, externalName, deterministic, group, argPattern, optimiseCallback) \
result &= TRI_RegisterFunctionAql(functions, internalName, "AHUACATL_" externalName, deterministic, group, argPattern); result &= TRI_RegisterFunctionAql(functions, internalName, "AHUACATL_" externalName, deterministic, group, argPattern, optimiseCallback)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief shorthand to check an argument and return an error if it is invalid /// @brief shorthand to check an argument and return an error if it is invalid
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
#define ARG_CHECK \ #define ARG_CHECK \
if (!CheckArgumentType(parameter, &allowed)) { \ if (! CheckArgumentType(parameter, &allowed)) { \
TRI_SetErrorContextAql(context, TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH, function->_externalName); \ TRI_SetErrorContextAql(context, TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH, function->_externalName); \
return false; \ return false; \
} }
@ -112,7 +114,6 @@ static param_t InitParam (void) {
return param; return param;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief check the type of an argument for a function call /// @brief check the type of an argument for a function call
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -224,7 +225,6 @@ static bool CheckArgumentType (TRI_aql_node_t* parameter,
return false; return false;
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief determine minimum and maximum argument number for argument pattern /// @brief determine minimum and maximum argument number for argument pattern
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -312,6 +312,75 @@ static bool EqualName (TRI_associative_pointer_t* array,
/// @} /// @}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
/// @brief optimise callback function for PATHS() AQL function
////////////////////////////////////////////////////////////////////////////////
static void OptimisePaths (const TRI_aql_node_t* const fcallNode,
TRI_aql_context_t* const context,
TRI_aql_field_access_t* fieldAccess) {
TRI_aql_collection_hint_t* hint;
TRI_aql_node_t* args;
TRI_aql_node_t* vertexCollection;
TRI_aql_node_t* edgeCollection;
TRI_aql_node_t* direction;
char* directionValue;
char* name;
const char* lookFor;
size_t len;
size_t n;
args = TRI_AQL_NODE_MEMBER(fcallNode, 0);
if (args == NULL) {
return;
}
assert(args->_members._length >= 3);
vertexCollection = TRI_AQL_NODE_MEMBER(args, 0);
edgeCollection = TRI_AQL_NODE_MEMBER(args, 1);
direction = TRI_AQL_NODE_MEMBER(args, 2);
assert(vertexCollection);
assert(edgeCollection);
assert(direction);
assert(fieldAccess);
n = strlen(fieldAccess->_fullName);
name = fieldAccess->_fullName + fieldAccess->_variableNameLength;
directionValue = TRI_AQL_NODE_STRING(direction);
if (TRI_EqualString(directionValue, "outbound")) {
lookFor = ".source.";
len = strlen(lookFor);
}
else if (TRI_EqualString(directionValue, "inbound")) {
lookFor = ".destination.";
len = strlen(lookFor);
}
else {
lookFor = NULL;
len = 0;
}
if (len > 0 &&
n > fieldAccess->_variableNameLength + len &&
memcmp((void*) lookFor, (void*) name, len) == 0) {
// field name is collection.source.XXX, e.g. users.source._id
LOG_DEBUG("optimising PATHS() field access %s", fieldAccess->_fullName);
// we can now modify this fieldaccess in place to collection.XXX, e.g. users._id
// copy trailing \0 byte as well
memmove(name, name + len - 1, n - fieldAccess->_variableNameLength - len + 2);
// attach the modified fieldAccess to the collection
hint = (TRI_aql_collection_hint_t*) (TRI_AQL_NODE_DATA(vertexCollection));
hint->_ranges = TRI_AddAccessAql(context, hint->_ranges, fieldAccess);
}
}
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// --SECTION-- public functions // --SECTION-- public functions
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
@ -331,7 +400,7 @@ TRI_associative_pointer_t* TRI_InitialiseFunctionsAql (void) {
functions = (TRI_associative_pointer_t*) TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_associative_pointer_t), false); functions = (TRI_associative_pointer_t*) TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_associative_pointer_t), false);
if (!functions) { if (functions == NULL) {
return NULL; return NULL;
} }
@ -354,66 +423,66 @@ TRI_associative_pointer_t* TRI_InitialiseFunctionsAql (void) {
// a = array // a = array
// type check functions // type check functions
REGISTER_FUNCTION("IS_NULL", "IS_NULL", true, false, "."); REGISTER_FUNCTION("IS_NULL", "IS_NULL", true, false, ".", NULL);
REGISTER_FUNCTION("IS_BOOL", "IS_BOOL", true, false, "."); REGISTER_FUNCTION("IS_BOOL", "IS_BOOL", true, false, ".", NULL);
REGISTER_FUNCTION("IS_NUMBER", "IS_NUMBER", true, false, "."); REGISTER_FUNCTION("IS_NUMBER", "IS_NUMBER", true, false, ".", NULL);
REGISTER_FUNCTION("IS_STRING", "IS_STRING", true, false, "."); REGISTER_FUNCTION("IS_STRING", "IS_STRING", true, false, ".", NULL);
REGISTER_FUNCTION("IS_LIST", "IS_LIST", true, false, "."); REGISTER_FUNCTION("IS_LIST", "IS_LIST", true, false, ".", NULL);
REGISTER_FUNCTION("IS_DOCUMENT", "IS_DOCUMENT", true, false, "."); REGISTER_FUNCTION("IS_DOCUMENT", "IS_DOCUMENT", true, false, ".", NULL);
// cast functions // cast functions
REGISTER_FUNCTION("TO_NUMBER", "CAST_NUMBER", true, false, "."); REGISTER_FUNCTION("TO_NUMBER", "CAST_NUMBER", true, false, ".", NULL);
REGISTER_FUNCTION("TO_STRING", "CAST_STRING", true, false, "."); REGISTER_FUNCTION("TO_STRING", "CAST_STRING", true, false, ".", NULL);
REGISTER_FUNCTION("TO_BOOL", "CAST_BOOL", true, false, "."); REGISTER_FUNCTION("TO_BOOL", "CAST_BOOL", true, false, ".", NULL);
REGISTER_FUNCTION("TO_LIST", "CAST_LIST", true, false, "."); REGISTER_FUNCTION("TO_LIST", "CAST_LIST", true, false, ".", NULL);
// string functions // string functions
REGISTER_FUNCTION("CONCAT", "STRING_CONCAT", true, false, "sz,sz|+"); REGISTER_FUNCTION("CONCAT", "STRING_CONCAT", true, false, "sz,sz|+", NULL);
REGISTER_FUNCTION("CONCAT_SEPARATOR", "STRING_CONCAT_SEPARATOR", true, false, "s,sz,sz|+"); REGISTER_FUNCTION("CONCAT_SEPARATOR", "STRING_CONCAT_SEPARATOR", true, false, "s,sz,sz|+", NULL);
REGISTER_FUNCTION("CHAR_LENGTH", "STRING_LENGTH", true, false, "s"); REGISTER_FUNCTION("CHAR_LENGTH", "STRING_LENGTH", true, false, "s", NULL);
REGISTER_FUNCTION("LOWER", "STRING_LOWER", true, false, "s"); REGISTER_FUNCTION("LOWER", "STRING_LOWER", true, false, "s", NULL);
REGISTER_FUNCTION("UPPER", "STRING_UPPER", true, false, "s"); REGISTER_FUNCTION("UPPER", "STRING_UPPER", true, false, "s", NULL);
REGISTER_FUNCTION("SUBSTRING", "STRING_SUBSTRING", true, false, "s,n|n"); REGISTER_FUNCTION("SUBSTRING", "STRING_SUBSTRING", true, false, "s,n|n", NULL);
REGISTER_FUNCTION("CONTAINS", "STRING_CONTAINS", true, false, "s,s"); REGISTER_FUNCTION("CONTAINS", "STRING_CONTAINS", true, false, "s,s", NULL);
// numeric functions // numeric functions
REGISTER_FUNCTION("FLOOR", "NUMBER_FLOOR", true, false, "n"); REGISTER_FUNCTION("FLOOR", "NUMBER_FLOOR", true, false, "n", NULL);
REGISTER_FUNCTION("CEIL", "NUMBER_CEIL", true, false, "n"); REGISTER_FUNCTION("CEIL", "NUMBER_CEIL", true, false, "n", NULL);
REGISTER_FUNCTION("ROUND", "NUMBER_ROUND", true, false, "n"); REGISTER_FUNCTION("ROUND", "NUMBER_ROUND", true, false, "n", NULL);
REGISTER_FUNCTION("ABS", "NUMBER_ABS", true, false, "n"); REGISTER_FUNCTION("ABS", "NUMBER_ABS", true, false, "n", NULL);
REGISTER_FUNCTION("RAND", "NUMBER_RAND", false, false, ""); REGISTER_FUNCTION("RAND", "NUMBER_RAND", false, false, "", NULL);
// list functions // list functions
REGISTER_FUNCTION("UNION", "UNION", true, false, "l,l|+"); REGISTER_FUNCTION("UNION", "UNION", true, false, "l,l|+", NULL);
REGISTER_FUNCTION("LENGTH", "LENGTH", true, true, "l"); REGISTER_FUNCTION("LENGTH", "LENGTH", true, true, "l", NULL);
REGISTER_FUNCTION("MIN", "MIN", true, true, "l"); REGISTER_FUNCTION("MIN", "MIN", true, true, "l", NULL);
REGISTER_FUNCTION("MAX", "MAX", true, true, "l"); REGISTER_FUNCTION("MAX", "MAX", true, true, "l", NULL);
REGISTER_FUNCTION("SUM", "SUM", true, true, "l"); REGISTER_FUNCTION("SUM", "SUM", true, true, "l", NULL);
REGISTER_FUNCTION("UNIQUE", "UNIQUE", true, false, "l"); REGISTER_FUNCTION("UNIQUE", "UNIQUE", true, false, "l", NULL);
REGISTER_FUNCTION("REVERSE", "REVERSE", true, false, "l"); REGISTER_FUNCTION("REVERSE", "REVERSE", true, false, "l", NULL);
REGISTER_FUNCTION("FIRST", "FIRST", true, false, "l"); REGISTER_FUNCTION("FIRST", "FIRST", true, false, "l", NULL);
REGISTER_FUNCTION("LAST", "LAST", true, false, "l"); REGISTER_FUNCTION("LAST", "LAST", true, false, "l", NULL);
// document functions // document functions
REGISTER_FUNCTION("HAS", "HAS", true, false, "az,s"); REGISTER_FUNCTION("HAS", "HAS", true, false, "az,s", NULL);
REGISTER_FUNCTION("MERGE", "MERGE", true, false, "a,a|+"); REGISTER_FUNCTION("MERGE", "MERGE", true, false, "a,a|+", NULL);
REGISTER_FUNCTION("MERGE_RECURSIVE", "MERGE_RECURSIVE", true, false, "a,a|+"); REGISTER_FUNCTION("MERGE_RECURSIVE", "MERGE_RECURSIVE", true, false, "a,a|+", NULL);
// geo functions // geo functions
REGISTER_FUNCTION("NEAR", "GEO_NEAR", false, false, "h,n,n,n|s"); REGISTER_FUNCTION("NEAR", "GEO_NEAR", false, false, "h,n,n,n|s", NULL);
REGISTER_FUNCTION("WITHIN", "GEO_WITHIN", false, false, "h,n,n,n|s"); REGISTER_FUNCTION("WITHIN", "GEO_WITHIN", false, false, "h,n,n,n|s", NULL);
// graph functions // graph functions
REGISTER_FUNCTION("PATHS", "GRAPH_PATHS", false, false, "c,h|s,b"); REGISTER_FUNCTION("PATHS", "GRAPH_PATHS", false, false, "c,h|s,b", &OptimisePaths);
// misc functions // misc functions
REGISTER_FUNCTION("FAIL", "FAIL", false, false, "|s"); // FAIL is non-deterministic, otherwise query optimisation will fail! REGISTER_FUNCTION("FAIL", "FAIL", false, false, "|s", NULL); // FAIL is non-deterministic, otherwise query optimisation will fail!
REGISTER_FUNCTION("PASSTHRU", "PASSTHRU", false, false, "."); // simple non-deterministic wrapper to avoid optimisations at parse time REGISTER_FUNCTION("PASSTHRU", "PASSTHRU", false, false, ".", NULL); // simple non-deterministic wrapper to avoid optimisations at parse time
REGISTER_FUNCTION("COLLECTIONS", "COLLECTIONS", false, false, ""); REGISTER_FUNCTION("COLLECTIONS", "COLLECTIONS", false, false, "", NULL);
REGISTER_FUNCTION("NOT_NULL", "NOT_NULL", true, false, ".,."); REGISTER_FUNCTION("NOT_NULL", "NOT_NULL", true, false, ".,.", NULL);
REGISTER_FUNCTION("NOT_LIST", "NOT_LIST", true, false, ".,."); REGISTER_FUNCTION("NOT_LIST", "NOT_LIST", true, false, ".,.", NULL);
if (!result) { if (! result) {
TRI_FreeFunctionsAql(functions); TRI_FreeFunctionsAql(functions);
return NULL; return NULL;
} }
@ -498,7 +567,8 @@ bool TRI_RegisterFunctionAql (TRI_associative_pointer_t* functions,
const char* const internalName, const char* const internalName,
const bool isDeterministic, const bool isDeterministic,
const bool isGroup, const bool isGroup,
const char* const argPattern) { const char* const argPattern,
void (*optimise)(const TRI_aql_node_t* const, TRI_aql_context_t* const, TRI_aql_field_access_t*)) {
TRI_aql_function_t* function; TRI_aql_function_t* function;
function = (TRI_aql_function_t*) TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_aql_function_t), false); function = (TRI_aql_function_t*) TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_aql_function_t), false);
@ -532,6 +602,7 @@ bool TRI_RegisterFunctionAql (TRI_associative_pointer_t* functions,
function->_isDeterministic = isDeterministic; function->_isDeterministic = isDeterministic;
function->_isGroup = isGroup; function->_isGroup = isGroup;
function->_argPattern = argPattern; function->_argPattern = argPattern;
function->optimise = optimise;
// set minArgs and maxArgs // set minArgs and maxArgs
SetArgumentCount(function); SetArgumentCount(function);

View File

@ -35,6 +35,7 @@
#include "Ahuacatl/ahuacatl-context.h" #include "Ahuacatl/ahuacatl-context.h"
#include "Ahuacatl/ahuacatl-ast-node.h" #include "Ahuacatl/ahuacatl-ast-node.h"
#include "Ahuacatl/ahuacatl-access-optimiser.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -57,6 +58,7 @@ typedef struct TRI_aql_function_s {
const char* _argPattern; const char* _argPattern;
size_t _minArgs; size_t _minArgs;
size_t _maxArgs; size_t _maxArgs;
void (*optimise)(const TRI_aql_node_t* const, TRI_aql_context_t* const, TRI_aql_field_access_t*);
} }
TRI_aql_function_t; TRI_aql_function_t;
@ -101,7 +103,8 @@ bool TRI_RegisterFunctionAql (TRI_associative_pointer_t*,
const char* const, const char* const,
const bool, const bool,
const bool, const bool,
const char* const); const char* const,
void (*)(const TRI_aql_node_t* const, TRI_aql_context_t* const, TRI_aql_field_access_t*));
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief check whether a function argument must be converted to another type /// @brief check whether a function argument must be converted to another type

View File

@ -1030,6 +1030,17 @@ static void PatchVariables (TRI_aql_statement_walker_t* const walker) {
} }
if (expressionNode != NULL) { if (expressionNode != NULL) {
if (expressionNode->_type == TRI_AQL_NODE_FCALL) {
// the defining node is a function call
// get the function name
TRI_aql_function_t* function = TRI_AQL_NODE_DATA(expressionNode);
if (function->optimise != NULL) {
// call the function's optimise callback
function->optimise(expressionNode, context, fieldAccess);
}
}
if (expressionNode->_type == TRI_AQL_NODE_COLLECTION) { if (expressionNode->_type == TRI_AQL_NODE_COLLECTION) {
TRI_aql_collection_hint_t* hint = (TRI_aql_collection_hint_t*) (TRI_AQL_NODE_DATA(expressionNode)); TRI_aql_collection_hint_t* hint = (TRI_aql_collection_hint_t*) (TRI_AQL_NODE_DATA(expressionNode));

View File

@ -2042,11 +2042,11 @@ function AHUACATL_GRAPH_PATHS () {
} }
var searchAttributes = { var searchAttributes = {
"edgeCollection" : internal.db[edgeCollection], edgeCollection : internal.db[edgeCollection],
"minLength" : minLength, minLength : minLength,
"maxLength" : maxLength, maxLength : maxLength,
"direction" : searchDirection, direction : searchDirection,
"followCycles" : followCycles, followCycles : followCycles,
}; };
// TODO: restrict allEdges to edges with certain _from values etc. // TODO: restrict allEdges to edges with certain _from values etc.
@ -2056,6 +2056,7 @@ function AHUACATL_GRAPH_PATHS () {
for (var i = 0; i < n; ++i) { for (var i = 0; i < n; ++i) {
var vertex = vertices[i]; var vertex = vertices[i];
var visited = { }; var visited = { };
visited[vertex._id] = true; visited[vertex._id] = true;
var connected = AHUACATL_GRAPH_SUBNODES(searchAttributes, vertex._id, visited, [ ], [ vertex ], 0); var connected = AHUACATL_GRAPH_SUBNODES(searchAttributes, vertex._id, visited, [ ], [ vertex ], 0);
for (j = 0; j < connected.length; ++j) { for (j = 0; j < connected.length; ++j) {
@ -2075,10 +2076,10 @@ function AHUACATL_GRAPH_SUBNODES (searchAttributes, vertexId, visited, edges, ve
if (level >= searchAttributes.minLength) { if (level >= searchAttributes.minLength) {
result.push({ result.push({
"vertices" : vertices, vertices : vertices,
"edges" : edges, edges : edges,
"source" : vertices[0], source : vertices[0],
"destination" : vertices[vertices.length - 1], destination : vertices[vertices.length - 1],
}); });
} }
@ -2112,7 +2113,7 @@ function AHUACATL_GRAPH_SUBNODES (searchAttributes, vertexId, visited, edges, ve
for (var j = 0; j < targets.length; ++j) { for (var j = 0; j < targets.length; ++j) {
var targetId = targets[j]; var targetId = targets[j];
if (!searchAttributes.followCycles) { if (! searchAttributes.followCycles) {
if (visited[targetId]) { if (visited[targetId]) {
continue; continue;
} }
@ -2134,7 +2135,7 @@ function AHUACATL_GRAPH_SUBNODES (searchAttributes, vertexId, visited, edges, ve
result.push(connected[k]); result.push(connected[k]);
} }
if (!searchAttributes.followCycles) { if (! searchAttributes.followCycles) {
delete visited[targetId]; delete visited[targetId];
} }
} }