1
0
Fork 0

CXX implementation of AQL `LIKE`

This commit is contained in:
Jan Steemann 2015-08-04 10:30:18 +02:00
parent 0f1ce26e41
commit 5dab23a3b7
7 changed files with 299 additions and 7 deletions

View File

@ -1,6 +1,8 @@
v2.7.0 (XXXX-XX-XX) v2.7.0 (XXXX-XX-XX)
------------------- -------------------
* implemented AQL `LIKE` function using ICU regexes
* add `RETURN DISTINCT` for AQL queries to return unique results: * add `RETURN DISTINCT` for AQL queries to return unique results:
FOR doc IN collection FOR doc IN collection

View File

@ -28,6 +28,7 @@
#include "Aql/ExecutionBlock.h" #include "Aql/ExecutionBlock.h"
#include "Aql/CollectionScanner.h" #include "Aql/CollectionScanner.h"
#include "Aql/ExecutionEngine.h" #include "Aql/ExecutionEngine.h"
#include "Aql/Functions.h"
#include "Basics/ScopeGuard.h" #include "Basics/ScopeGuard.h"
#include "Basics/StringUtils.h" #include "Basics/StringUtils.h"
#include "Basics/StringBuffer.h" #include "Basics/StringBuffer.h"
@ -1442,7 +1443,16 @@ bool IndexRangeBlock::initRanges () {
} }
else { else {
// no V8 context required! // no V8 context required!
buildExpressions();
Functions::InitializeThreadContext();
try {
buildExpressions();
Functions::DestroyThreadContext();
}
catch (...) {
Functions::DestroyThreadContext();
throw;
}
} }
} }
@ -2894,7 +2904,16 @@ void CalculationBlock::doEvaluation (AqlItemBlock* result) {
if (! _expression->isV8()) { if (! _expression->isV8()) {
// an expression that does not require V8 // an expression that does not require V8
executeExpression(result);
Functions::InitializeThreadContext();
try {
executeExpression(result);
Functions::DestroyThreadContext();
}
catch (...) {
Functions::DestroyThreadContext();
throw;
}
} }
else { else {
bool const isRunningInCluster = triagens::arango::ServerState::instance()->isRunningInCluster(); bool const isRunningInCluster = triagens::arango::ServerState::instance()->isRunningInCluster();

View File

@ -127,7 +127,7 @@ std::unordered_map<std::string, Function const> const Executor::FunctionNames{
{ "UPPER", Function("UPPER", "AQL_UPPER", "s", true, true, false, true, true) }, { "UPPER", Function("UPPER", "AQL_UPPER", "s", true, true, false, true, true) },
{ "SUBSTRING", Function("SUBSTRING", "AQL_SUBSTRING", "s,n|n", true, true, false, true, true) }, { "SUBSTRING", Function("SUBSTRING", "AQL_SUBSTRING", "s,n|n", true, true, false, true, true) },
{ "CONTAINS", Function("CONTAINS", "AQL_CONTAINS", "s,s|b", true, true, false, true, true) }, { "CONTAINS", Function("CONTAINS", "AQL_CONTAINS", "s,s|b", true, true, false, true, true) },
{ "LIKE", Function("LIKE", "AQL_LIKE", "s,r|b", true, true, false, true, true) }, { "LIKE", Function("LIKE", "AQL_LIKE", "s,r|b", true, true, false, true, true, &Functions::Like) },
{ "LEFT", Function("LEFT", "AQL_LEFT", "s,n", true, true, false, true, true) }, { "LEFT", Function("LEFT", "AQL_LEFT", "s,n", true, true, false, true, true) },
{ "RIGHT", Function("RIGHT", "AQL_RIGHT", "s,n", true, true, false, true, true) }, { "RIGHT", Function("RIGHT", "AQL_RIGHT", "s,n", true, true, false, true, true) },
{ "TRIM", Function("TRIM", "AQL_TRIM", "s|ns", true, true, false, true, true) }, { "TRIM", Function("TRIM", "AQL_TRIM", "s|ns", true, true, false, true, true) },

View File

@ -673,13 +673,14 @@ AqlValue Expression::executeSimpleExpression (AstNode const* node,
for (size_t i = 0; i < n; ++i) { for (size_t i = 0; i < n; ++i) {
TRI_document_collection_t const* myCollection = nullptr; TRI_document_collection_t const* myCollection = nullptr;
auto arg = member->getMemberUnchecked(i); auto arg = member->getMemberUnchecked(i);
if (arg->type == NODE_TYPE_COLLECTION) { if (arg->type == NODE_TYPE_COLLECTION) {
char const* collectionName = arg->getStringValue(); char const* collectionName = arg->getStringValue();
parameters.emplace_back(std::make_pair(AqlValue(new Json(TRI_UNKNOWN_MEM_ZONE, collectionName, strlen(collectionName))), nullptr)); parameters.emplace_back(AqlValue(new Json(TRI_UNKNOWN_MEM_ZONE, collectionName, strlen(collectionName))), nullptr);
} }
else { else {
auto value = executeSimpleExpression(arg, &myCollection, trx, argv, startPos, vars, regs, false); auto value = executeSimpleExpression(arg, &myCollection, trx, argv, startPos, vars, regs, false);
parameters.emplace_back(std::make_pair(value, myCollection)); parameters.emplace_back(value, myCollection);
} }
} }

View File

@ -46,10 +46,89 @@ using namespace triagens::aql;
using Json = triagens::basics::Json; using Json = triagens::basics::Json;
using CollectionNameResolver = triagens::arango::CollectionNameResolver; using CollectionNameResolver = triagens::arango::CollectionNameResolver;
////////////////////////////////////////////////////////////////////////////////
/// @brief thread-local cache for compiled regexes
////////////////////////////////////////////////////////////////////////////////
thread_local std::unordered_map<std::string, RegexMatcher*> RegexCache;
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// --SECTION-- private functions // --SECTION-- private functions
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief compile a regex pattern from a string
////////////////////////////////////////////////////////////////////////////////
static std::string BuildRegexPattern (char const* ptr,
size_t length,
bool caseInsensitive) {
// pattern is always anchored
std::string pattern("^");
if (caseInsensitive) {
pattern.append("(?i)");
}
bool escaped = false;
for (size_t i = 0; i < length; ++i) {
char const c = ptr[i];
if (c == '\\') {
if (escaped) {
// literal backslash
pattern.append("\\\\");
}
escaped = ! escaped;
}
else {
if (c == '%') {
if (escaped) {
// literal %
pattern.push_back('%');
}
else {
// wildcard
pattern.append(".*");
}
}
else if (c == '_') {
if (escaped) {
// literal underscore
pattern.push_back('_');
}
else {
// wildcard character
pattern.push_back('.');
}
}
else if (c == '?' || c == '+' || c == '[' || c == '(' || c == ')' ||
c == '{' || c == '}' || c == '^' || c == '$' || c == '|' ||
c == '\\' || c == '.') {
// character with special meaning in a regex
pattern.push_back('\\');
pattern.push_back(c);
}
else {
if (escaped) {
// found a backslash followed by no special character
pattern.append("\\\\");
}
// literal character
pattern.push_back(c);
}
escaped = false;
}
}
// always anchor the pattern
pattern.push_back('$');
return pattern;
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief extract a function parameter from the arguments list /// @brief extract a function parameter from the arguments list
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -307,6 +386,32 @@ static void AppendAsString (triagens::basics::StringBuffer& buffer,
} }
} }
// -----------------------------------------------------------------------------
// --SECTION-- AQL functions public helpers
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief called before a query starts
/// has the chance to set up any thread-local storage
////////////////////////////////////////////////////////////////////////////////
void Functions::InitializeThreadContext () {
}
////////////////////////////////////////////////////////////////////////////////
/// @brief called when a query ends
/// its responsibility is to clear any thread-local storage
////////////////////////////////////////////////////////////////////////////////
void Functions::DestroyThreadContext () {
if (! RegexCache.empty()) {
for (auto& it : RegexCache) {
delete it.second;
}
RegexCache.clear();
}
}
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// --SECTION-- AQL function bindings // --SECTION-- AQL function bindings
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
@ -586,6 +691,70 @@ AqlValue Functions::Concat (triagens::aql::Query*,
return AqlValue(jr); return AqlValue(jr);
} }
////////////////////////////////////////////////////////////////////////////////
/// @brief function LIKE
////////////////////////////////////////////////////////////////////////////////
AqlValue Functions::Like (triagens::aql::Query* query,
triagens::arango::AqlTransaction* trx,
FunctionParameters const& parameters) {
if (parameters.size() < 2) {
THROW_ARANGO_EXCEPTION_PARAMS(TRI_ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH, "LIKE", (int) 2, (int) 3);
}
bool const caseInsensitive = GetBooleanParameter(trx, parameters, 2, false);
triagens::basics::StringBuffer buffer(TRI_UNKNOWN_MEM_ZONE, 24);
// build pattern from parameter #1
auto const regex = ExtractFunctionParameter(trx, parameters, 1, false);
AppendAsString(buffer, regex.json());
size_t const length = buffer.length();
std::string const pattern = std::move(BuildRegexPattern(buffer.c_str(), length, caseInsensitive));
auto it = RegexCache.find(pattern);
RegexMatcher* matcher = nullptr;
// check regex cache
if (it != RegexCache.end()) {
matcher = (*it).second;
}
else {
matcher = triagens::basics::Utf8Helper::DefaultUtf8Helper.buildMatcher(pattern);
try {
// insert into cache, no matter if pattern is valid or not
RegexCache.emplace(pattern, matcher);
}
catch (...) {
delete matcher;
throw;
}
}
if (matcher == nullptr) {
// compiling regular expression failed
RegisterWarning(query, "LIKE", TRI_ERROR_QUERY_INVALID_REGEX);
return AqlValue(new Json(Json::Null));
}
// extract value
buffer.clear();
auto const value = ExtractFunctionParameter(trx, parameters, 0, false);
AppendAsString(buffer, value.json());
bool error = false;
bool const result = triagens::basics::Utf8Helper::DefaultUtf8Helper.matches(matcher, buffer.c_str(), buffer.length(), error);
if (error) {
// compiling regular expression failed
RegisterWarning(query, "LIKE", TRI_ERROR_QUERY_INVALID_REGEX);
return AqlValue(new Json(Json::Null));
}
return AqlValue(new Json(result));
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief function PASSTHRU /// @brief function PASSTHRU
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////

View File

@ -54,12 +54,31 @@ namespace triagens {
triagens::arango::AqlTransaction*, triagens::arango::AqlTransaction*,
FunctionParameters const&)> FunctionImplementation; FunctionParameters const&)> FunctionImplementation;
struct Functions {
// -----------------------------------------------------------------------------
// --SECTION-- AQL functions public helpers
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief called before a query starts
/// has the chance to set up any thread-local storage
////////////////////////////////////////////////////////////////////////////////
static void InitializeThreadContext ();
////////////////////////////////////////////////////////////////////////////////
/// @brief called when a query ends
/// its responsibility is to clear any thread-local storage
////////////////////////////////////////////////////////////////////////////////
static void DestroyThreadContext ();
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// --SECTION-- AQL function bindings // --SECTION-- AQL function bindings
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
struct Functions {
static AqlValue IsNull (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&); static AqlValue IsNull (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
static AqlValue IsBool (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&); static AqlValue IsBool (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
static AqlValue IsNumber (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&); static AqlValue IsNumber (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
@ -72,6 +91,7 @@ namespace triagens {
static AqlValue ToArray (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&); static AqlValue ToArray (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
static AqlValue Length (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&); static AqlValue Length (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
static AqlValue Concat (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&); static AqlValue Concat (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
static AqlValue Like (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
static AqlValue Passthru (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&); static AqlValue Passthru (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
static AqlValue Unset (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&); static AqlValue Unset (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
static AqlValue Keep (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&); static AqlValue Keep (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);

View File

@ -133,6 +133,87 @@ function ahuacatlStringFunctionsTestSuite () {
}); });
}, },
////////////////////////////////////////////////////////////////////////////////
/// @brief test like function, invalid arguments
////////////////////////////////////////////////////////////////////////////////
testLikeInvalidCxx : function () {
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, "RETURN NOOPT(LIKE())");
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, "RETURN NOOPT(LIKE(\"test\"))");
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, "RETURN NOOPT(LIKE(\"test\", \"meow\", \"foo\", \"bar\"))");
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test like function
////////////////////////////////////////////////////////////////////////////////
testLikeCxx : function () {
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"test\"))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"%test\"))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"test%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"%test%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"this%test%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"this%is%test%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"this%g\"))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"this%n\"))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"This%n\"))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"his%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"%g\"))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"%G\"))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"this%test%is%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"%\", \"\\%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"a%c\", \"a%c\"))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"a%c\", \"ac\"))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"a%c\", \"a\\\\%\"))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"a%c\", \"\\\\%a%\"))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"a%c\", \"\\\\%\\\\%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"%%\", \"\\\\%\\\\%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"_\", \"\\\\_\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"_\", \"\\\\_%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"abcd\", \"_bcd\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"abcde\", \"_bcd%\"))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"abcde\", \"\\\\_bcd%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"\\\\abc\", \"\\\\\\\\%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"\\abc\", \"\\a%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"[ ] ( ) % * . + -\", \"[%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"[ ] ( ) % * . + -\", \"[ ] ( ) \\% * . + -\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"[ ] ( ) % * . + -\", \"%. +%\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"abc^def$g\", \"abc^def$g\"))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"abc^def$g\", \"%^%$g\"))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"ABCD\", \"abcd\", false))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"ABCD\", \"abcd\", true))"));
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"abcd\", \"ABCD\", false))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"abcd\", \"ABCD\", true))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"MÖterTräNenMÜtterSöhne\", \"MöterTräNenMütterSöhne\", true))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"MÖterTräNenMÜtterSöhne\", \"mötertränenmüttersöhne\", true))"));
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"MÖterTräNenMÜtterSöhne\", \"MÖTERTRÄNENMÜTTERSÖHNE\", true))"));
assertEqual([ [ true, false, true, false ] ], getQueryResults("RETURN [ NOOPT(LIKE(\"Möter\", \"m_ter\", true)), NOOPT(LIKE(\"Möter\", \"m_ter\", false)), NOOPT(LIKE(\"Möter\", \"m_ter\", true)), NOOPT(LIKE(\"Möter\", \"m_ter\", false)) ]"));
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test like with special characters
////////////////////////////////////////////////////////////////////////////////
testLikeSpecialCharsCxx : function () {
var data = [
"the quick\nbrown fox jumped over\r\nthe lazy dog",
"'the \"\\quick\\\n \"brown\\\rfox' jumped",
'"the fox"" jumped \\over the \newline \roof"'
];
data.forEach(function(value) {
var actual = getQueryResults("RETURN NOOPT(LIKE(" + JSON.stringify(value) + ", 'foobar'))");
assertEqual([ false ], actual);
actual = getQueryResults("RETURN NOOPT(LIKE(" + JSON.stringify(value) + ", " + JSON.stringify(value) + "))");
assertEqual([ true ], actual);
});
},
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief test first require function / expected datatype & arg. mismatch /// @brief test first require function / expected datatype & arg. mismatch
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////