mirror of https://gitee.com/bigwinds/arangodb
Soundex function implementation with integration tests and minor bug fixes to feature/add-aql-tobase64-tohex-encodeuricomponent-uuid branch (#5851)
This commit is contained in:
parent
35d87bdbd5
commit
c45a1b6ac1
|
@ -113,6 +113,16 @@ COUNT()
|
|||
|
||||
This is an alias for [LENGTH()](#length).
|
||||
|
||||
ENCODE_URI_COMPONENT()
|
||||
-----------
|
||||
|
||||
`ENCODE_URI_COMPONENT(value) → encodedURIComponentString`
|
||||
|
||||
Return the encoded uri component of *value*.
|
||||
|
||||
- **value** (string): a string
|
||||
- returns **encodedURIComponentString** (string): an encoded uri component of *value*
|
||||
|
||||
FIND_FIRST()
|
||||
------------
|
||||
|
||||
|
@ -532,6 +542,23 @@ SPLIT( "foo-bar-baz", "-", 1 ) // [ "foo", "bar-baz" ]
|
|||
SPLIT( "foo, bar & baz", [ ", ", " & " ] ) // [ "foo", "bar", "baz" ]
|
||||
```
|
||||
|
||||
SOUNDEX()
|
||||
-----------
|
||||
|
||||
`SOUNDEX(value) → soundexString`
|
||||
|
||||
Return the soundex fingerprint of *value*.
|
||||
|
||||
- **value** (string): a string
|
||||
- returns **soundexString** (string): a soundex fingerprint of *value*
|
||||
|
||||
```js
|
||||
SOUNDEX( "example" ) // "E251"
|
||||
SOUNDEX( "ekzampul") // "E251"
|
||||
SOUNDEX( "soundex" ) // "S532"
|
||||
SOUNDEX( "sounteks" ) // "S532"
|
||||
```
|
||||
|
||||
SUBSTITUTE()
|
||||
------------
|
||||
|
||||
|
@ -622,6 +649,26 @@ Return a substring of *value*.
|
|||
substring from *offset* to the end of the string
|
||||
- returns **substring** (string): a substring of *value*
|
||||
|
||||
TO_BASE64()
|
||||
-----------
|
||||
|
||||
`TO_BASE64(value) → toBase64String`
|
||||
|
||||
Return the base64 representation of *value*.
|
||||
|
||||
- **value** (string): a string
|
||||
- returns **toBase64String** (string): a base64 representation of *value*
|
||||
|
||||
TO_HEX()
|
||||
-----------
|
||||
|
||||
`TO_HEX(value) → toHexString`
|
||||
|
||||
Return the hex representation of *value*.
|
||||
|
||||
- **value** (string): a string
|
||||
- returns **toHexString** (string): a hex representation of *value*
|
||||
|
||||
TRIM()
|
||||
------
|
||||
|
||||
|
@ -669,3 +716,12 @@ All other characters are returned unchanged.
|
|||
- **value** (string): a string
|
||||
- returns **upperCaseString** (string): *value* with lower-case characters converted
|
||||
to upper-case characters
|
||||
|
||||
UUID()
|
||||
-----------
|
||||
|
||||
`UUID() → UUIDString`
|
||||
|
||||
Return a random and uniquely generated UUID.
|
||||
|
||||
- returns **UUIDString** (string): a universally unique identifier
|
||||
|
|
|
@ -196,6 +196,11 @@ void AqlFunctionFeature::addStringFunctions() {
|
|||
add({"SHA512", ".", true, false, true, &Functions::Sha512});
|
||||
add({"HASH", ".", true, false, true, &Functions::Hash});
|
||||
add({"RANDOM_TOKEN", ".", false, true, true, &Functions::RandomToken});
|
||||
add({"TO_BASE64", ".", true, false, true, &Functions::ToBase64});
|
||||
add({"TO_HEX", ".", true, false, true, &Functions::ToHex});
|
||||
add({"ENCODE_URI_COMPONENT", ".", true, false, true, &Functions::EncodeURIComponent});
|
||||
add({"UUID", "", true, false, true, &Functions::UUID});
|
||||
add({"SOUNDEX", ".", true, false, true, &Functions::Soundex});
|
||||
// FULLTEXT is replaced by the AQL optimizer with an index lookup
|
||||
add({"FULLTEXT", ".h,.,.|." , false, true, false, &Functions::NotImplemented});
|
||||
}
|
||||
|
|
|
@ -32,6 +32,8 @@
|
|||
#include "Aql/RegexCache.h"
|
||||
#include "Aql/V8Executor.h"
|
||||
#include "Basics/Exceptions.h"
|
||||
#include "Basics/Mutex.h"
|
||||
#include "Basics/MutexLocker.h"
|
||||
#include "Basics/StringBuffer.h"
|
||||
#include "Basics/StringRef.h"
|
||||
#include "Basics/StringUtils.h"
|
||||
|
@ -63,6 +65,10 @@
|
|||
#include "VocBase/LogicalCollection.h"
|
||||
#include "VocBase/ManagedDocumentResult.h"
|
||||
|
||||
#include <boost/uuid/uuid.hpp>
|
||||
#include <boost/uuid/uuid_generators.hpp>
|
||||
#include <boost/uuid/uuid_io.hpp>
|
||||
|
||||
#include <s2/s2loop.h>
|
||||
#include <date/date.h>
|
||||
#include <date/iso_week.h>
|
||||
|
@ -1416,12 +1422,93 @@ AqlValue Functions::ToString(arangodb::aql::Query*,
|
|||
transaction::Methods* trx,
|
||||
VPackFunctionParameters const& parameters) {
|
||||
AqlValue value = ExtractFunctionParameterValue(parameters, 0);
|
||||
|
||||
transaction::StringBufferLeaser buffer(trx);
|
||||
arangodb::basics::VPackStringBufferAdapter adapter(buffer->stringBuffer());
|
||||
|
||||
::appendAsString(trx, adapter, value);
|
||||
return AqlValue(buffer->begin(), buffer->length());
|
||||
}
|
||||
|
||||
/// @brief function TO_BASE64
|
||||
AqlValue Functions::ToBase64(arangodb::aql::Query*,
|
||||
transaction::Methods* trx,
|
||||
VPackFunctionParameters const& parameters) {
|
||||
ValidateParameters(parameters, "TO_BASE64", 1, 1);
|
||||
AqlValue value = ExtractFunctionParameterValue(parameters, 0);
|
||||
|
||||
transaction::StringBufferLeaser buffer(trx);
|
||||
arangodb::basics::VPackStringBufferAdapter adapter(buffer->stringBuffer());
|
||||
|
||||
::appendAsString(trx, adapter, value);
|
||||
return AqlValue(buffer->begin(), buffer->length());
|
||||
|
||||
std::string encoded = basics::StringUtils::encodeBase64(std::string(buffer->begin(), buffer->length()));
|
||||
|
||||
return AqlValue(encoded);
|
||||
}
|
||||
|
||||
/// @brief function TO_HEX
|
||||
AqlValue Functions::ToHex(arangodb::aql::Query*,
|
||||
transaction::Methods* trx,
|
||||
VPackFunctionParameters const& parameters) {
|
||||
ValidateParameters(parameters, "TO_HEX", 1, 1);
|
||||
AqlValue value = ExtractFunctionParameterValue(parameters, 0);
|
||||
|
||||
transaction::StringBufferLeaser buffer(trx);
|
||||
arangodb::basics::VPackStringBufferAdapter adapter(buffer->stringBuffer());
|
||||
|
||||
::appendAsString(trx, adapter, value);
|
||||
|
||||
std::string encoded = basics::StringUtils::encodeHex(std::string(buffer->begin(), buffer->length()));
|
||||
|
||||
return AqlValue(encoded);
|
||||
}
|
||||
|
||||
/// @brief function ENCODE_URI_COMPONENT
|
||||
AqlValue Functions::EncodeURIComponent(arangodb::aql::Query*,
|
||||
transaction::Methods* trx,
|
||||
VPackFunctionParameters const& parameters) {
|
||||
ValidateParameters(parameters, "ENCODE_URI_COMPONENT", 1, 1);
|
||||
AqlValue value = ExtractFunctionParameterValue(parameters, 0);
|
||||
|
||||
transaction::StringBufferLeaser buffer(trx);
|
||||
arangodb::basics::VPackStringBufferAdapter adapter(buffer->stringBuffer());
|
||||
|
||||
::appendAsString(trx, adapter, value);
|
||||
|
||||
std::string encoded = basics::StringUtils::encodeURIComponent(std::string(buffer->begin(), buffer->length()));
|
||||
|
||||
return AqlValue(encoded);
|
||||
}
|
||||
|
||||
/// @brief function UUID
|
||||
static Mutex theMutex;
|
||||
|
||||
AqlValue Functions::UUID(arangodb::aql::Query*,
|
||||
transaction::Methods* trx,
|
||||
VPackFunctionParameters const& parameters){
|
||||
MUTEX_LOCKER(mutexLocker, theMutex);
|
||||
|
||||
std::string uuid = boost::uuids::to_string(boost::uuids::random_generator()());
|
||||
|
||||
return AqlValue(uuid);
|
||||
}
|
||||
|
||||
/// @brief function SOUNDEX
|
||||
AqlValue Functions::Soundex(arangodb::aql::Query*,
|
||||
transaction::Methods* trx,
|
||||
VPackFunctionParameters const& parameters) {
|
||||
ValidateParameters(parameters, "SOUNDEX", 1, 1);
|
||||
AqlValue value = ExtractFunctionParameterValue(parameters, 0);
|
||||
|
||||
transaction::StringBufferLeaser buffer(trx);
|
||||
arangodb::basics::VPackStringBufferAdapter adapter(buffer->stringBuffer());
|
||||
|
||||
::appendAsString(trx, adapter, value);
|
||||
|
||||
std::string encoded = basics::StringUtils::soundex(basics::StringUtils::trim(basics::StringUtils::tolower(std::string(buffer->begin(), buffer->length()))));
|
||||
|
||||
return AqlValue(encoded);
|
||||
}
|
||||
|
||||
/// @brief function TO_BOOL
|
||||
|
|
|
@ -151,8 +151,16 @@ struct Functions {
|
|||
VPackFunctionParameters const&);
|
||||
static AqlValue RegexReplace(arangodb::aql::Query*, transaction::Methods*,
|
||||
VPackFunctionParameters const&);
|
||||
|
||||
|
||||
static AqlValue ToBase64(arangodb::aql::Query*,transaction::Methods*,
|
||||
VPackFunctionParameters const&);
|
||||
static AqlValue ToHex(arangodb::aql::Query*, transaction::Methods*,
|
||||
VPackFunctionParameters const&);
|
||||
static AqlValue EncodeURIComponent(arangodb::aql::Query*, transaction::Methods*,
|
||||
VPackFunctionParameters const&);
|
||||
static AqlValue UUID(arangodb::aql::Query*, transaction::Methods*,
|
||||
VPackFunctionParameters const&);
|
||||
static AqlValue Soundex(arangodb::aql::Query*, transaction::Methods*,
|
||||
VPackFunctionParameters const&);
|
||||
// Date
|
||||
static AqlValue DateFromParameters(arangodb::aql::Query* query,
|
||||
transaction::Methods* trx,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* jshint globalstrict:false, strict:false, maxlen:5000 */
|
||||
/* global assertEqual, assertNotEqual, assertTrue */
|
||||
/* global assertEqual, assertNotEqual, assertTrue, assertMatch */
|
||||
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
// / @brief tests for query language, functions
|
||||
|
@ -57,6 +57,151 @@ function ahuacatlStringFunctionsTestSuite () {
|
|||
tearDown: function () {
|
||||
},
|
||||
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
// / @brief test tobase64
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testToBase64Values: function () {
|
||||
[
|
||||
[ null, "" ],
|
||||
[ -13, "LTEz" ],
|
||||
[ 10, "MTA="],
|
||||
[ true, "dHJ1ZQ==" ],
|
||||
[ false, "ZmFsc2U=" ],
|
||||
[ "", "" ],
|
||||
[ "foobar", "Zm9vYmFy" ],
|
||||
[ " ", "IA==" ],
|
||||
[ "The quick brown fox jumps over the lazy dog", "VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZw==" ],
|
||||
].forEach(function(test) {
|
||||
assertEqual([ test[1] ], getQueryResults('RETURN TO_BASE64(' + JSON.stringify(test[0]) + ')'), test);
|
||||
});
|
||||
},
|
||||
|
||||
|
||||
testToBase64InvalidNumberOfParameters: function () {
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN TO_BASE64()');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN TO_BASE64("test", "meow")');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN TO_BASE64("test", "meow", "foo")');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN TO_BASE64("test", "meow", "foo", "bar")');
|
||||
},
|
||||
|
||||
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
// / @brief test tohex
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testToHexValues: function () {
|
||||
[
|
||||
[ null, "" ],
|
||||
[ -13, "2d3133" ],
|
||||
[ 10, "3130"],
|
||||
[ true, "74727565" ],
|
||||
[ false, "66616c7365" ],
|
||||
[ "", "" ],
|
||||
[ "foobar", "666f6f626172" ],
|
||||
[ " ", "20" ],
|
||||
[ "The quick brown fox jumps over the lazy dog", "54686520717569636b2062726f776e20666f78206a756d7073206f76657220746865206c617a7920646f67"],
|
||||
].forEach(function(test) {
|
||||
assertEqual([ test[1] ], getQueryResults('RETURN TO_HEX(' + JSON.stringify(test[0]) + ')'), test);
|
||||
});
|
||||
},
|
||||
|
||||
|
||||
testToHexInvalidNumberOfParameters: function () {
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN TO_HEX()');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN TO_HEX("test", "meow")');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN TO_HEX("test", "meow", "foo")');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN TO_HEX("test", "meow", "foo", "bar")');
|
||||
},
|
||||
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
// / @brief test encodeURIcomponent
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testEncodeURIComponentValues: function () {
|
||||
[
|
||||
[ null, "" ],
|
||||
[ -13, "-13" ],
|
||||
[ 10, "10"],
|
||||
[ true, "true" ],
|
||||
[ false, "false" ],
|
||||
[ "", "" ],
|
||||
[ "foobar", "foobar" ],
|
||||
[ " ", "%20" ],
|
||||
[ "?x=шеллы", "%3Fx%3D%D1%88%D0%B5%D0%BB%D0%BB%D1%8B"],
|
||||
[ "?x=test", "%3Fx%3Dtest"],
|
||||
[ "The quick brown fox jumps over the lazy dog", "The%20quick%20brown%20fox%20jumps%20over%20the%20lazy%20dog"],
|
||||
[ "https://w3schools.com/my test.asp?name=ståle&car=saab", "https%3A%2F%2Fw3schools.com%2Fmy%20test.asp%3Fname%3Dst%C3%A5le%26car%3Dsaab"],
|
||||
].forEach(function(test) {
|
||||
assertEqual([ test[1] ], getQueryResults('RETURN ENCODE_URI_COMPONENT(' + JSON.stringify(test[0]) + ')'), test);
|
||||
});
|
||||
},
|
||||
|
||||
|
||||
testEncodeURIComponentInvalidNumberOfParameters: function () {
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN ENCODE_URI_COMPONENT()');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN ENCODE_URI_COMPONENT("test", "meow")');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN ENCODE_URI_COMPONENT("test", "meow", "foo")');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN ENCODE_URI_COMPONENT("test", "meow", "foo", "bar")');
|
||||
},
|
||||
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
// / @brief test UUID
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testUUIDValues: function () {
|
||||
assertMatch(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/, getQueryResults('RETURN UUID()'));
|
||||
},
|
||||
|
||||
testUUIDInvalidNumberOfParameters: function () {
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN UUID("test")');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN UUID("test", "meow")');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN UUID("test", "meow", "foo")');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN UUID("test", "meow", "foo", "bar")');
|
||||
},
|
||||
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
// / @brief test Soundex
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
testToSoundexValues: function () {
|
||||
[
|
||||
[ null, "" ],
|
||||
[ "a", "A000" ],
|
||||
[ "ab", "A100" ],
|
||||
[ "text", "T230" ],
|
||||
[ "tixt", "T230"],
|
||||
[ "Text", "T230" ],
|
||||
[ "Tixt", "T230"],
|
||||
[ "tExT", "T230" ],
|
||||
[ "tIxT", "T230"],
|
||||
[ true, "T600" ],
|
||||
[ false, "F420" ],
|
||||
[ "", "" ],
|
||||
[ " ", ""],
|
||||
[ "\n", ""],
|
||||
[ " ", "" ],
|
||||
[ " foobar", "F160" ],
|
||||
[ "foobar ", "F160" ],
|
||||
[ " foobar ", "F160" ],
|
||||
[ "foobar", "F160" ],
|
||||
[ "SOUNDEX", "S532" ],
|
||||
[ "SOUNTEKS", "S532" ],
|
||||
[ "mötör", "M360" ],
|
||||
[ "2m2ö2t2ö2r2", "M360" ],
|
||||
[ "Öööööö", "" ],
|
||||
[ "The quick brown fox jumps over the lazy dog", "T221"],
|
||||
].forEach(function(test) {
|
||||
assertEqual([ test[1] ], getQueryResults('RETURN SOUNDEX(' + JSON.stringify(test[0]) + ')'), test);
|
||||
});
|
||||
},
|
||||
|
||||
testSoundexInvalidNumberOfParameters: function () {
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN SOUNDEX()');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN SOUNDEX("test", "meow")');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN SOUNDEX("test", "meow", "foo")');
|
||||
assertQueryError(errors.ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH.code, 'RETURN SOUNDEX("test", "meow", "foo", "bar")');
|
||||
},
|
||||
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
// / @brief test JSON_STRINGIFY
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -23,6 +23,9 @@
|
|||
|
||||
#include "StringUtils.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
|
||||
|
@ -40,6 +43,29 @@
|
|||
// -----------------------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
static char const* hexValuesLower = "0123456789abcdef";
|
||||
static char const* hexValuesUpper = "0123456789ABCDEF";
|
||||
|
||||
char soundexCode(char c) {
|
||||
switch (c) {
|
||||
case 'b': case 'f': case 'p': case 'v':
|
||||
return '1';
|
||||
case 'c': case 'g': case 'j': case 'k': case 'q': case 's': case 'x': case 'z':
|
||||
return '2';
|
||||
case 'd': case 't':
|
||||
return '3';
|
||||
case 'l':
|
||||
return '4';
|
||||
case 'm': case 'n':
|
||||
return '5';
|
||||
case 'r':
|
||||
return '6';
|
||||
default:
|
||||
return '\0';
|
||||
}
|
||||
}
|
||||
|
||||
bool isSpace(char a) { return a == ' ' || a == '\t' || a == '_'; }
|
||||
|
||||
char const* const BASE64_CHARS =
|
||||
|
@ -151,6 +177,7 @@ bool parseHexanumber(char const* inputStr, size_t len, uint32_t* outputInt) {
|
|||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------
|
||||
/// @brief computes the unicode value of an ut16 symbol
|
||||
///-------------------------------------------------------
|
||||
|
@ -192,6 +219,7 @@ bool toUtf8(uint32_t outputInt, std::string& outputStr) {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------
|
||||
/// @brief true when number lays in the range
|
||||
/// U+D800 U+DBFF
|
||||
|
@ -199,13 +227,15 @@ bool toUtf8(uint32_t outputInt, std::string& outputStr) {
|
|||
bool isHighSurrugate(uint32_t number) {
|
||||
return (number >= 0xD800) && (number <= 0xDBFF);
|
||||
}
|
||||
|
||||
///-------------------------------------------------------
|
||||
/// @brief true when number lays in the range
|
||||
/// U+DC00 U+DFFF
|
||||
bool isLowSurrugate(uint32_t number) {
|
||||
return (number >= 0xDC00) && (number <= 0xDFFF);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace arangodb {
|
||||
namespace basics {
|
||||
|
@ -1300,7 +1330,82 @@ std::string urlEncode(char const* src, size_t const len) {
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string encodeURIComponent(std::string const& str) {
|
||||
return encodeURIComponent(str.c_str(), str.size());
|
||||
}
|
||||
|
||||
std::string encodeURIComponent(char const* src, size_t const len){
|
||||
char const* end = src + len;
|
||||
|
||||
if (len >= (SIZE_MAX - 1) / 3) {
|
||||
THROW_ARANGO_EXCEPTION(TRI_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
|
||||
std::string result;
|
||||
result.reserve(3 * len);
|
||||
|
||||
for (; src < end; ++src) {
|
||||
if (*src == '-' || *src == '_' || *src == '.' || *src == '!' || *src == '~' || *src == '*' || *src == '(' || *src == ')' || *src == '\''|| (*src >= 'a' && *src <= 'z') || (*src >= 'A' && *src <= 'Z') || (*src >= '0' && *src <= '9')) {
|
||||
// no need to encode this character
|
||||
result.push_back(*src);
|
||||
} else {
|
||||
// hex-encode the following character
|
||||
result.push_back('%');
|
||||
auto c = static_cast<unsigned char>(*src);
|
||||
result.push_back(::hexValuesUpper[c >> 4]);
|
||||
result.push_back(::hexValuesUpper[c % 16]);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string soundex(std::string const& str) {
|
||||
return soundex(str.c_str(), str.size());
|
||||
}
|
||||
|
||||
std::string soundex(char const* src, size_t const len) {
|
||||
char const* end = src + len;
|
||||
|
||||
while (src < end) {
|
||||
// skip over characters (e.g. whitespace and other non-ASCII letters)
|
||||
// until we find something sensible
|
||||
if ((*src >= 'a' && *src <= 'z') || (*src >= 'A' && *src <= 'Z')) {
|
||||
break;
|
||||
}
|
||||
++src;
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
if (src != end) {
|
||||
// emit an upper-case character
|
||||
result.push_back(::toupper(*src));
|
||||
src++;
|
||||
char previousCode = '\0';
|
||||
|
||||
while (src < end) {
|
||||
char currentCode = ::soundexCode(*src);
|
||||
if (currentCode != '\0' && currentCode != previousCode) {
|
||||
result.push_back(currentCode);
|
||||
if (result.length() >= 4) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
previousCode = currentCode;
|
||||
src++;
|
||||
}
|
||||
|
||||
// pad result string with '0' chars up to a length of 4
|
||||
while (result.length() < 4) {
|
||||
result.push_back('0');
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// .............................................................................
|
||||
// CONVERT TO STRING
|
||||
// .............................................................................
|
||||
|
@ -2267,8 +2372,6 @@ size_t numEntries(std::string const& sourceStr, std::string const& delimiter) {
|
|||
}
|
||||
|
||||
std::string encodeHex(char const* value, size_t length) {
|
||||
static char const* hexValues = "0123456789abcdef";
|
||||
|
||||
std::string result;
|
||||
result.reserve(length * 2);
|
||||
|
||||
|
@ -2276,8 +2379,8 @@ std::string encodeHex(char const* value, size_t length) {
|
|||
char const* e = p + length;
|
||||
while (p < e) {
|
||||
auto c = static_cast<unsigned char>(*p++);
|
||||
result.push_back(hexValues[c >> 4]);
|
||||
result.push_back(hexValues[c % 16]);
|
||||
result.push_back(::hexValuesLower[c >> 4]);
|
||||
result.push_back(::hexValuesLower[c % 16]);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
|
@ -208,6 +208,18 @@ std::string urlEncode(char const* src);
|
|||
|
||||
/// @brief url encodes the string
|
||||
std::string urlEncode(char const* src, size_t const len);
|
||||
|
||||
/// @brief uri encodes the component string
|
||||
std::string encodeURIComponent(std:: string const& str);
|
||||
|
||||
/// @brief uri encodes the component string
|
||||
std::string encodeURIComponent(char const* src, size_t const len);
|
||||
|
||||
/// @brief converts input string to soundex code
|
||||
std::string soundex(std::string const& str);
|
||||
|
||||
/// @brief converts input string to soundex code
|
||||
std::string soundex(char const* src, size_t const len);
|
||||
|
||||
/// @brief unicode hexidecmial characters to utf8
|
||||
bool unicodeToUTF8(char const* inputStr, size_t const& len,
|
||||
|
|
Loading…
Reference in New Issue