diff --git a/arangod/V8Server/v8-vocbase.cpp b/arangod/V8Server/v8-vocbase.cpp index b9dc4adf37..4730c665e5 100755 --- a/arangod/V8Server/v8-vocbase.cpp +++ b/arangod/V8Server/v8-vocbase.cpp @@ -1617,13 +1617,13 @@ static void* UnwrapGeneralCursor (v8::Handle cursorObject) { static v8::Handle JS_normalize_string (v8::Arguments const& argv) { v8::HandleScope scope; - TRI_Utf8ValueNFC x(TRI_UNKNOWN_MEM_ZONE, argv[0]); - - if (x.length() == 0) { - return scope.Close(v8::Null()); + if (argv.Length() != 1) { + return scope.Close(v8::ThrowException( + TRI_CreateErrorObject(TRI_ERROR_ILLEGAL_OPTION, + "usage: NORMALIZE_STRING()"))); } - - return scope.Close(v8::String::New(*x, x.length())); + + return scope.Close(Utf8Helper::DefaultUtf8Helper.normalize(argv[0])); } //////////////////////////////////////////////////////////////////////////////// @@ -1639,13 +1639,10 @@ static v8::Handle JS_compare_string (v8::Arguments const& argv) { "usage: COMPARE_STRING(, )"))); } - // TODO: get collation language - Utf8Helper u8(""); - v8::String::Value left(argv[0]); v8::String::Value right(argv[1]); - int result = u8.compareUtf16(*left, left.length(), *right, right.length()); + int result = Utf8Helper::DefaultUtf8Helper.compareUtf16(*left, left.length(), *right, right.length()); return scope.Close(v8::Integer::New(result)); } diff --git a/arangosh/V8Client/arangosh.cpp b/arangosh/V8Client/arangosh.cpp index fa86489c15..b91be4019c 100644 --- a/arangosh/V8Client/arangosh.cpp +++ b/arangosh/V8Client/arangosh.cpp @@ -356,43 +356,33 @@ static v8::Handle JS_normalize_string (v8::Arguments const& argv) { v8::HandleScope scope; if (argv.Length() != 1) { - return scope.Close(v8::ThrowException(v8::String::New("usage: NORMALIZE_STRING()"))); + return scope.Close(v8::ThrowException( + TRI_CreateErrorObject(TRI_ERROR_ILLEGAL_OPTION, + "usage: NORMALIZE_STRING()"))); } - TRI_Utf8ValueNFC x(TRI_UNKNOWN_MEM_ZONE, argv[0]); - - if (x.length() == 0) { - return scope.Close(v8::Null()); - } - - return scope.Close(v8::String::New(*x, x.length())); + return scope.Close(Utf8Helper::DefaultUtf8Helper.normalize(argv[0])); } //////////////////////////////////////////////////////////////////////////////// /// @brief compare two UTF 16 strings //////////////////////////////////////////////////////////////////////////////// -static v8::Handle JS_compare_strings (v8::Arguments const& argv) { +static v8::Handle JS_compare_string (v8::Arguments const& argv) { v8::HandleScope scope; if (argv.Length() != 2) { - return scope.Close(v8::ThrowException(v8::String::New("usage: COMPARE_STRINGS(, )"))); + return scope.Close(v8::ThrowException( + TRI_CreateErrorObject(TRI_ERROR_ILLEGAL_OPTION, + "usage: COMPARE_STRING(, )"))); } v8::String::Value left(argv[0]); - if (!*left) { - return scope.Close(v8::Integer::New(1)); - } - v8::String::Value right(argv[1]); - if (!*right) { - return scope.Close(v8::Integer::New(-1)); - } - Utf8Helper uh(""); - int result = uh.compareUtf16(*left, left.length(), *right, right.length()); + int result = Utf8Helper::DefaultUtf8Helper.compareUtf16(*left, left.length(), *right, right.length()); - return scope.Close(v8::Integer::New(result)); + return scope.Close(v8::Integer::New(result)); } // ----------------------------------------------------------------------------- @@ -1155,8 +1145,8 @@ int main (int argc, char* argv[]) { context->Global()->Set(v8::String::New("NORMALIZE_STRING"), v8::FunctionTemplate::New(JS_normalize_string)->GetFunction(), v8::ReadOnly); - context->Global()->Set(v8::String::New("COMPARE_STRINGS"), - v8::FunctionTemplate::New(JS_compare_strings)->GetFunction(), + context->Global()->Set(v8::String::New("COMPARE_STRING"), + v8::FunctionTemplate::New(JS_compare_string)->GetFunction(), v8::ReadOnly); // ............................................................................. diff --git a/js/server/ahuacatl.js b/js/server/ahuacatl.js index 932dddb878..b1bd15a904 100755 --- a/js/server/ahuacatl.js +++ b/js/server/ahuacatl.js @@ -717,6 +717,10 @@ function AHUACATL_RELATIONAL_UNEQUAL (lhs, rhs) { rhs = null; } + if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) { + return COMPARE_STRING(lhs, rhs) != 0; + } + return (lhs !== rhs); } @@ -760,6 +764,10 @@ function AHUACATL_RELATIONAL_GREATER_REC (lhs, rhs) { rhs = null; } + if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) { + return COMPARE_STRING(lhs, rhs) > 0; + } + if (lhs === rhs) { return null; } @@ -823,6 +831,10 @@ function AHUACATL_RELATIONAL_GREATEREQUAL_REC (lhs, rhs) { rhs = null; } + if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) { + return COMPARE_STRING(lhs, rhs) >= 0; + } + if (lhs === rhs) { return null; } @@ -886,6 +898,10 @@ function AHUACATL_RELATIONAL_LESS_REC (lhs, rhs) { rhs = null; } + if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) { + return COMPARE_STRING(lhs, rhs) < 0; + } + if (lhs === rhs) { return null; } @@ -949,6 +965,10 @@ function AHUACATL_RELATIONAL_LESSEQUAL_REC (lhs, rhs) { rhs = null; } + if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) { + return COMPARE_STRING(lhs, rhs) <= 0; + } + if (lhs === rhs) { return null; } @@ -1015,6 +1035,10 @@ function AHUACATL_RELATIONAL_CMP (lhs, rhs) { rhs = null; } + if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) { + return COMPARE_STRING(lhs, rhs); + } + if (lhs < rhs) { return -1; } diff --git a/js/server/js-ahuacatl.h b/js/server/js-ahuacatl.h index 7b3507f260..4be387e424 100644 --- a/js/server/js-ahuacatl.h +++ b/js/server/js-ahuacatl.h @@ -718,6 +718,10 @@ static string JS_server_ahuacatl = " rhs = null;\n" " }\n" "\n" + " if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {\n" + " return COMPARE_STRING(lhs, rhs) != 0;\n" + " }\n" + "\n" " return (lhs !== rhs);\n" "}\n" "\n" @@ -761,6 +765,10 @@ static string JS_server_ahuacatl = " rhs = null;\n" " }\n" "\n" + " if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {\n" + " return COMPARE_STRING(lhs, rhs) > 0;\n" + " }\n" + "\n" " if (lhs === rhs) {\n" " return null;\n" " }\n" @@ -824,6 +832,10 @@ static string JS_server_ahuacatl = " rhs = null;\n" " }\n" "\n" + " if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {\n" + " return COMPARE_STRING(lhs, rhs) >= 0;\n" + " }\n" + "\n" " if (lhs === rhs) {\n" " return null;\n" " }\n" @@ -887,6 +899,10 @@ static string JS_server_ahuacatl = " rhs = null;\n" " }\n" "\n" + " if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {\n" + " return COMPARE_STRING(lhs, rhs) < 0;\n" + " }\n" + "\n" " if (lhs === rhs) {\n" " return null;\n" " }\n" @@ -950,6 +966,10 @@ static string JS_server_ahuacatl = " rhs = null;\n" " }\n" " \n" + " if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {\n" + " return COMPARE_STRING(lhs, rhs) <= 0;\n" + " }\n" + " \n" " if (lhs === rhs) {\n" " return null;\n" " }\n" @@ -1016,6 +1036,10 @@ static string JS_server_ahuacatl = " rhs = null;\n" " }\n" "\n" + " if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {\n" + " return COMPARE_STRING(lhs, rhs);\n" + " }\n" + "\n" " if (lhs < rhs) {\n" " return -1;\n" " }\n" diff --git a/lib/Basics/Utf8Helper.cpp b/lib/Basics/Utf8Helper.cpp index efdaa5190d..0d3ca1d7fd 100644 --- a/lib/Basics/Utf8Helper.cpp +++ b/lib/Basics/Utf8Helper.cpp @@ -34,9 +34,14 @@ #include "string.h" #endif +#include "Logger/Logger.h" + using namespace triagens::basics; using namespace std; + +Utf8Helper Utf8Helper::DefaultUtf8Helper; + // ----------------------------------------------------------------------------- // --SECTION-- constructors and destructors // ----------------------------------------------------------------------------- @@ -46,32 +51,12 @@ using namespace std; /// @{ //////////////////////////////////////////////////////////////////////////////// -Utf8Helper::Utf8Helper (const string& lang) : _coll(0) { -#ifdef TRI_HAVE_ICU - UErrorCode status = U_ZERO_ERROR; - - if (lang == "") { - _coll = Collator::createInstance(status); - } - else { - Locale locale(lang.c_str()); - _coll = Collator::createInstance(locale, status); - } - - if(U_FAILURE(status)) { - cerr << "error in Collator::createInstance(): " << u_errorName(status) << endl; - return; - } - - // set the default attributes for sorting: - _coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); // A < a - _coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); - _coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status); // UCOL_IDENTICAL, UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY +Utf8Helper::Utf8Helper () : _coll(0) { + setCollatorLanguage(""); +} - if(U_FAILURE(status)) { - cerr << "error in Collator::setAttribute(...): " << u_errorName(status) << endl; - } -#endif +Utf8Helper::Utf8Helper (const string& lang) : _coll(0) { + setCollatorLanguage(lang); } Utf8Helper::~Utf8Helper () { @@ -83,14 +68,14 @@ Utf8Helper::~Utf8Helper () { int Utf8Helper::compareUtf8 (const char* left, size_t leftLength, const char* right, size_t rightLength) { #ifdef TRI_HAVE_ICU if (!_coll) { - cerr << "no Collator!" << endl; + LOGGER_ERROR << "no Collator in Utf8Helper::compareUtf8()!"; return 0; } UErrorCode status = U_ZERO_ERROR; int result = _coll->compareUTF8(StringPiece(left, leftLength), StringPiece(right, rightLength), status); if(U_FAILURE(status)) { - cerr << "error in Collator::compareUTF8(...): " << u_errorName(status) << endl; + LOGGER_ERROR << "error in Collator::compareUTF8(...): " << u_errorName(status); return 0; } @@ -127,7 +112,7 @@ int Utf8Helper::compareUtf8 (const char* left, size_t leftLength, const char* ri int Utf8Helper::compareUtf16 (const uint16_t* left, size_t leftLength, const uint16_t* right, size_t rightLength) { #ifdef TRI_HAVE_ICU if (!_coll) { - cerr << "no Collator!" << endl; + LOGGER_ERROR << "no Collator in Utf8Helper::compareUtf16()!"; return 0; } @@ -163,17 +148,74 @@ int Utf8Helper::compareUtf16 (const uint16_t* left, size_t leftLength, const uin #endif } +v8::Handle Utf8Helper::normalize (v8::Handle obj) { + v8::HandleScope scope; + + v8::String::Value str(obj); + size_t str_len = str.length(); + if (str_len > 0) { #ifdef TRI_HAVE_ICU + UErrorCode erroCode = U_ZERO_ERROR; + const Normalizer2* normalizer = Normalizer2::getNFCInstance(erroCode); + + if (U_FAILURE(erroCode)) { + LOGGER_ERROR << "error in Normalizer2::getNFCInstance(erroCode): " << u_errorName(erroCode); + return scope.Close(v8::Null()); + } + + UnicodeString result = normalizer->normalize(UnicodeString(*str, str_len), erroCode); -char * normalizeUtf8 (TRI_memory_zone_t* zone, const char* utf8, size_t inLength, size_t* outLength) { - return TR_normalize_utf8_to_NFC(zone, utf8, inLength, outLength); -} - -char * normalizeUtf16 (TRI_memory_zone_t* zone, const uint16_t* utf16, size_t inLength, size_t* outLength) { - return TR_normalize_utf16_to_NFC(zone, utf16, inLength, outLength); -} - + if (U_FAILURE(erroCode)) { + LOGGER_ERROR << "error in normalizer->normalize(UnicodeString(*str, str_len), erroCode): " << u_errorName(erroCode); + return scope.Close(v8::Null()); + } + + return scope.Close(v8::String::New(result.getBuffer(), result.length())); +#else + return scope.Close(v8::String::New(*str, str_len)); #endif + } + else { + return scope.Close(v8::String::New("")); + } +} + +void Utf8Helper::setCollatorLanguage (const string& lang) { +#ifdef TRI_HAVE_ICU + + UErrorCode status = U_ZERO_ERROR; + Collator* coll; + if (lang == "") { + coll = Collator::createInstance(status); + } + else { + Locale locale(lang.c_str()); + coll = Collator::createInstance(locale, status); + } + + if(U_FAILURE(status)) { + LOGGER_ERROR << "error in Collator::createInstance(): " << u_errorName(status); + return; + } + + // set the default attributes for sorting: + coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); // A < a + coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); + coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status); // UCOL_IDENTICAL, UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY + + if(U_FAILURE(status)) { + LOGGER_ERROR << "error in Collator::setAttribute(...): " << u_errorName(status); + return; + } + + if (_coll) { + delete _coll; + } + + _coll = coll; +#endif +} + //////////////////////////////////////////////////////////////////////////////// /// @} diff --git a/lib/Basics/Utf8Helper.h b/lib/Basics/Utf8Helper.h index 30ff45769c..9e7e8126e4 100644 --- a/lib/Basics/Utf8Helper.h +++ b/lib/Basics/Utf8Helper.h @@ -30,6 +30,7 @@ #define TRIAGENS_BASICS_UTF8_HELPER_H 1 #include "Basics/Common.h" +#include "v8.h" #ifdef TRI_HAVE_ICU #include "unicode/coll.h" @@ -51,29 +52,20 @@ namespace triagens { Utf8Helper& operator= (Utf8Helper const&); public: - -//////////////////////////////////////////////////////////////////////////////// -/// static functions -//////////////////////////////////////////////////////////////////////////////// - -#ifdef TRI_HAVE_ICU - -//////////////////////////////////////////////////////////////////////////////// -/// @brief normalize an utf8 string (NFC) -//////////////////////////////////////////////////////////////////////////////// - - static char * normalizeUtf8 (TRI_memory_zone_t* zone, const char* utf8, size_t inLength, size_t* outLength); //////////////////////////////////////////////////////////////////////////////// -/// @brief normalize an utf16 string (NFC) and export it to utf8 +/// @brief a default helper //////////////////////////////////////////////////////////////////////////////// - - static char * normalizeUtf16 (TRI_memory_zone_t* zone, const uint16_t* utf16, size_t inLength, size_t* outLength); - -#endif + static Utf8Helper DefaultUtf8Helper; public: +//////////////////////////////////////////////////////////////////////////////// +/// @brief constructor +//////////////////////////////////////////////////////////////////////////////// + + Utf8Helper(); + //////////////////////////////////////////////////////////////////////////////// /// @brief constructor /// @param string lang Use "de_DE", "en_US" or "" (default) @@ -81,7 +73,7 @@ namespace triagens { Utf8Helper(const string& lang); -//////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// /// @brief destructor //////////////////////////////////////////////////////////////////////////////// @@ -111,7 +103,19 @@ namespace triagens { int compareUtf16 (const uint16_t* left, size_t leftLength, const uint16_t* right, size_t rightLength); - private: +//////////////////////////////////////////////////////////////////////////////// +/// @brief normalize a v8 object +//////////////////////////////////////////////////////////////////////////////// + + v8::Handle normalize (v8::Handle obj); + +//////////////////////////////////////////////////////////////////////////////// +/// @brief set collator by language +//////////////////////////////////////////////////////////////////////////////// + + void setCollatorLanguage (const string& lang); + + private: #ifdef TRI_HAVE_ICU Collator* _coll; #else diff --git a/lib/V8/v8-utils.cpp b/lib/V8/v8-utils.cpp index b8e1b9e943..1251ae56db 100644 --- a/lib/V8/v8-utils.cpp +++ b/lib/V8/v8-utils.cpp @@ -1610,7 +1610,7 @@ TRI_Utf8ValueNFC::TRI_Utf8ValueNFC(TRI_memory_zone_t* memoryZone, v8::Handle 0) { _str = TR_normalize_utf16_to_NFC(_memoryZone, *str, str_len, &_length); }