mirror of https://gitee.com/bigwinds/arangodb
CXX implementation of AQL `LIKE`
This commit is contained in:
parent
0f1ce26e41
commit
5dab23a3b7
|
@ -1,6 +1,8 @@
|
|||
v2.7.0 (XXXX-XX-XX)
|
||||
-------------------
|
||||
|
||||
* implemented AQL `LIKE` function using ICU regexes
|
||||
|
||||
* add `RETURN DISTINCT` for AQL queries to return unique results:
|
||||
|
||||
FOR doc IN collection
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "Aql/ExecutionBlock.h"
|
||||
#include "Aql/CollectionScanner.h"
|
||||
#include "Aql/ExecutionEngine.h"
|
||||
#include "Aql/Functions.h"
|
||||
#include "Basics/ScopeGuard.h"
|
||||
#include "Basics/StringUtils.h"
|
||||
#include "Basics/StringBuffer.h"
|
||||
|
@ -1442,7 +1443,16 @@ bool IndexRangeBlock::initRanges () {
|
|||
}
|
||||
else {
|
||||
// no V8 context required!
|
||||
buildExpressions();
|
||||
|
||||
Functions::InitializeThreadContext();
|
||||
try {
|
||||
buildExpressions();
|
||||
Functions::DestroyThreadContext();
|
||||
}
|
||||
catch (...) {
|
||||
Functions::DestroyThreadContext();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2894,7 +2904,16 @@ void CalculationBlock::doEvaluation (AqlItemBlock* result) {
|
|||
|
||||
if (! _expression->isV8()) {
|
||||
// an expression that does not require V8
|
||||
executeExpression(result);
|
||||
|
||||
Functions::InitializeThreadContext();
|
||||
try {
|
||||
executeExpression(result);
|
||||
Functions::DestroyThreadContext();
|
||||
}
|
||||
catch (...) {
|
||||
Functions::DestroyThreadContext();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
else {
|
||||
bool const isRunningInCluster = triagens::arango::ServerState::instance()->isRunningInCluster();
|
||||
|
|
|
@ -127,7 +127,7 @@ std::unordered_map<std::string, Function const> const Executor::FunctionNames{
|
|||
{ "UPPER", Function("UPPER", "AQL_UPPER", "s", true, true, false, true, true) },
|
||||
{ "SUBSTRING", Function("SUBSTRING", "AQL_SUBSTRING", "s,n|n", true, true, false, true, true) },
|
||||
{ "CONTAINS", Function("CONTAINS", "AQL_CONTAINS", "s,s|b", true, true, false, true, true) },
|
||||
{ "LIKE", Function("LIKE", "AQL_LIKE", "s,r|b", true, true, false, true, true) },
|
||||
{ "LIKE", Function("LIKE", "AQL_LIKE", "s,r|b", true, true, false, true, true, &Functions::Like) },
|
||||
{ "LEFT", Function("LEFT", "AQL_LEFT", "s,n", true, true, false, true, true) },
|
||||
{ "RIGHT", Function("RIGHT", "AQL_RIGHT", "s,n", true, true, false, true, true) },
|
||||
{ "TRIM", Function("TRIM", "AQL_TRIM", "s|ns", true, true, false, true, true) },
|
||||
|
|
|
@ -673,13 +673,14 @@ AqlValue Expression::executeSimpleExpression (AstNode const* node,
|
|||
for (size_t i = 0; i < n; ++i) {
|
||||
TRI_document_collection_t const* myCollection = nullptr;
|
||||
auto arg = member->getMemberUnchecked(i);
|
||||
|
||||
if (arg->type == NODE_TYPE_COLLECTION) {
|
||||
char const* collectionName = arg->getStringValue();
|
||||
parameters.emplace_back(std::make_pair(AqlValue(new Json(TRI_UNKNOWN_MEM_ZONE, collectionName, strlen(collectionName))), nullptr));
|
||||
parameters.emplace_back(AqlValue(new Json(TRI_UNKNOWN_MEM_ZONE, collectionName, strlen(collectionName))), nullptr);
|
||||
}
|
||||
else {
|
||||
auto value = executeSimpleExpression(arg, &myCollection, trx, argv, startPos, vars, regs, false);
|
||||
parameters.emplace_back(std::make_pair(value, myCollection));
|
||||
parameters.emplace_back(value, myCollection);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -46,10 +46,89 @@ using namespace triagens::aql;
|
|||
using Json = triagens::basics::Json;
|
||||
using CollectionNameResolver = triagens::arango::CollectionNameResolver;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief thread-local cache for compiled regexes
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
thread_local std::unordered_map<std::string, RegexMatcher*> RegexCache;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- private functions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief compile a regex pattern from a string
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static std::string BuildRegexPattern (char const* ptr,
|
||||
size_t length,
|
||||
bool caseInsensitive) {
|
||||
// pattern is always anchored
|
||||
std::string pattern("^");
|
||||
if (caseInsensitive) {
|
||||
pattern.append("(?i)");
|
||||
}
|
||||
|
||||
bool escaped = false;
|
||||
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
char const c = ptr[i];
|
||||
|
||||
if (c == '\\') {
|
||||
if (escaped) {
|
||||
// literal backslash
|
||||
pattern.append("\\\\");
|
||||
}
|
||||
escaped = ! escaped;
|
||||
}
|
||||
else {
|
||||
if (c == '%') {
|
||||
if (escaped) {
|
||||
// literal %
|
||||
pattern.push_back('%');
|
||||
}
|
||||
else {
|
||||
// wildcard
|
||||
pattern.append(".*");
|
||||
}
|
||||
}
|
||||
else if (c == '_') {
|
||||
if (escaped) {
|
||||
// literal underscore
|
||||
pattern.push_back('_');
|
||||
}
|
||||
else {
|
||||
// wildcard character
|
||||
pattern.push_back('.');
|
||||
}
|
||||
}
|
||||
else if (c == '?' || c == '+' || c == '[' || c == '(' || c == ')' ||
|
||||
c == '{' || c == '}' || c == '^' || c == '$' || c == '|' ||
|
||||
c == '\\' || c == '.') {
|
||||
// character with special meaning in a regex
|
||||
pattern.push_back('\\');
|
||||
pattern.push_back(c);
|
||||
}
|
||||
else {
|
||||
if (escaped) {
|
||||
// found a backslash followed by no special character
|
||||
pattern.append("\\\\");
|
||||
}
|
||||
|
||||
// literal character
|
||||
pattern.push_back(c);
|
||||
}
|
||||
|
||||
escaped = false;
|
||||
}
|
||||
}
|
||||
|
||||
// always anchor the pattern
|
||||
pattern.push_back('$');
|
||||
|
||||
return pattern;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief extract a function parameter from the arguments list
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -307,6 +386,32 @@ static void AppendAsString (triagens::basics::StringBuffer& buffer,
|
|||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- AQL functions public helpers
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief called before a query starts
|
||||
/// has the chance to set up any thread-local storage
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void Functions::InitializeThreadContext () {
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief called when a query ends
|
||||
/// its responsibility is to clear any thread-local storage
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void Functions::DestroyThreadContext () {
|
||||
if (! RegexCache.empty()) {
|
||||
for (auto& it : RegexCache) {
|
||||
delete it.second;
|
||||
}
|
||||
RegexCache.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- AQL function bindings
|
||||
// -----------------------------------------------------------------------------
|
||||
|
@ -586,6 +691,70 @@ AqlValue Functions::Concat (triagens::aql::Query*,
|
|||
return AqlValue(jr);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief function LIKE
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
AqlValue Functions::Like (triagens::aql::Query* query,
|
||||
triagens::arango::AqlTransaction* trx,
|
||||
FunctionParameters const& parameters) {
|
||||
if (parameters.size() < 2) {
|
||||
THROW_ARANGO_EXCEPTION_PARAMS(TRI_ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH, "LIKE", (int) 2, (int) 3);
|
||||
}
|
||||
|
||||
bool const caseInsensitive = GetBooleanParameter(trx, parameters, 2, false);
|
||||
triagens::basics::StringBuffer buffer(TRI_UNKNOWN_MEM_ZONE, 24);
|
||||
|
||||
// build pattern from parameter #1
|
||||
auto const regex = ExtractFunctionParameter(trx, parameters, 1, false);
|
||||
AppendAsString(buffer, regex.json());
|
||||
size_t const length = buffer.length();
|
||||
|
||||
std::string const pattern = std::move(BuildRegexPattern(buffer.c_str(), length, caseInsensitive));
|
||||
|
||||
auto it = RegexCache.find(pattern);
|
||||
|
||||
RegexMatcher* matcher = nullptr;
|
||||
|
||||
// check regex cache
|
||||
if (it != RegexCache.end()) {
|
||||
matcher = (*it).second;
|
||||
}
|
||||
else {
|
||||
matcher = triagens::basics::Utf8Helper::DefaultUtf8Helper.buildMatcher(pattern);
|
||||
try {
|
||||
// insert into cache, no matter if pattern is valid or not
|
||||
RegexCache.emplace(pattern, matcher);
|
||||
}
|
||||
catch (...) {
|
||||
delete matcher;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
if (matcher == nullptr) {
|
||||
// compiling regular expression failed
|
||||
RegisterWarning(query, "LIKE", TRI_ERROR_QUERY_INVALID_REGEX);
|
||||
return AqlValue(new Json(Json::Null));
|
||||
}
|
||||
|
||||
// extract value
|
||||
buffer.clear();
|
||||
auto const value = ExtractFunctionParameter(trx, parameters, 0, false);
|
||||
AppendAsString(buffer, value.json());
|
||||
|
||||
bool error = false;
|
||||
bool const result = triagens::basics::Utf8Helper::DefaultUtf8Helper.matches(matcher, buffer.c_str(), buffer.length(), error);
|
||||
|
||||
if (error) {
|
||||
// compiling regular expression failed
|
||||
RegisterWarning(query, "LIKE", TRI_ERROR_QUERY_INVALID_REGEX);
|
||||
return AqlValue(new Json(Json::Null));
|
||||
}
|
||||
|
||||
return AqlValue(new Json(result));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief function PASSTHRU
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -54,12 +54,31 @@ namespace triagens {
|
|||
triagens::arango::AqlTransaction*,
|
||||
FunctionParameters const&)> FunctionImplementation;
|
||||
|
||||
|
||||
struct Functions {
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- AQL functions public helpers
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief called before a query starts
|
||||
/// has the chance to set up any thread-local storage
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static void InitializeThreadContext ();
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief called when a query ends
|
||||
/// its responsibility is to clear any thread-local storage
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static void DestroyThreadContext ();
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- AQL function bindings
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
struct Functions {
|
||||
|
||||
static AqlValue IsNull (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
|
||||
static AqlValue IsBool (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
|
||||
static AqlValue IsNumber (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
|
||||
|
@ -72,6 +91,7 @@ namespace triagens {
|
|||
static AqlValue ToArray (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
|
||||
static AqlValue Length (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
|
||||
static AqlValue Concat (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
|
||||
static AqlValue Like (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
|
||||
static AqlValue Passthru (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
|
||||
static AqlValue Unset (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
|
||||
static AqlValue Keep (triagens::aql::Query*, triagens::arango::AqlTransaction*, FunctionParameters const&);
|
||||
|
|
|
@ -133,6 +133,87 @@ function ahuacatlStringFunctionsTestSuite () {
|
|||
});
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test like function, invalid arguments
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testLikeInvalidCxx : function () {
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, "RETURN NOOPT(LIKE())");
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, "RETURN NOOPT(LIKE(\"test\"))");
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, "RETURN NOOPT(LIKE(\"test\", \"meow\", \"foo\", \"bar\"))");
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test like function
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testLikeCxx : function () {
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"test\"))"));
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"%test\"))"));
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"test%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"%test%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"this%test%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"this%is%test%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"this%g\"))"));
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"this%n\"))"));
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"This%n\"))"));
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"his%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"%g\"))"));
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"%G\"))"));
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"this is a test string\", \"this%test%is%\"))"));
|
||||
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"%\", \"\\%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"a%c\", \"a%c\"))"));
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"a%c\", \"ac\"))"));
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"a%c\", \"a\\\\%\"))"));
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"a%c\", \"\\\\%a%\"))"));
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"a%c\", \"\\\\%\\\\%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"%%\", \"\\\\%\\\\%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"_\", \"\\\\_\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"_\", \"\\\\_%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"abcd\", \"_bcd\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"abcde\", \"_bcd%\"))"));
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"abcde\", \"\\\\_bcd%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"\\\\abc\", \"\\\\\\\\%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"\\abc\", \"\\a%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"[ ] ( ) % * . + -\", \"[%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"[ ] ( ) % * . + -\", \"[ ] ( ) \\% * . + -\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"[ ] ( ) % * . + -\", \"%. +%\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"abc^def$g\", \"abc^def$g\"))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"abc^def$g\", \"%^%$g\"))"));
|
||||
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"ABCD\", \"abcd\", false))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"ABCD\", \"abcd\", true))"));
|
||||
assertEqual([ false ], getQueryResults("RETURN NOOPT(LIKE(\"abcd\", \"ABCD\", false))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"abcd\", \"ABCD\", true))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"MÖterTräNenMÜtterSöhne\", \"MöterTräNenMütterSöhne\", true))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"MÖterTräNenMÜtterSöhne\", \"mötertränenmüttersöhne\", true))"));
|
||||
assertEqual([ true ], getQueryResults("RETURN NOOPT(LIKE(\"MÖterTräNenMÜtterSöhne\", \"MÖTERTRÄNENMÜTTERSÖHNE\", true))"));
|
||||
|
||||
assertEqual([ [ true, false, true, false ] ], getQueryResults("RETURN [ NOOPT(LIKE(\"Möter\", \"m_ter\", true)), NOOPT(LIKE(\"Möter\", \"m_ter\", false)), NOOPT(LIKE(\"Möter\", \"m_ter\", true)), NOOPT(LIKE(\"Möter\", \"m_ter\", false)) ]"));
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test like with special characters
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testLikeSpecialCharsCxx : function () {
|
||||
var data = [
|
||||
"the quick\nbrown fox jumped over\r\nthe lazy dog",
|
||||
"'the \"\\quick\\\n \"brown\\\rfox' jumped",
|
||||
'"the fox"" jumped \\over the \newline \roof"'
|
||||
];
|
||||
|
||||
data.forEach(function(value) {
|
||||
var actual = getQueryResults("RETURN NOOPT(LIKE(" + JSON.stringify(value) + ", 'foobar'))");
|
||||
assertEqual([ false ], actual);
|
||||
|
||||
actual = getQueryResults("RETURN NOOPT(LIKE(" + JSON.stringify(value) + ", " + JSON.stringify(value) + "))");
|
||||
assertEqual([ true ], actual);
|
||||
});
|
||||
},
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test first require function / expected datatype & arg. mismatch
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
Loading…
Reference in New Issue