1
0
Fork 0

ICU updates

This commit is contained in:
a-brandt 2012-09-18 15:42:46 +02:00
parent 1a9961cb4c
commit d077c97f2c
7 changed files with 169 additions and 88 deletions

View File

@ -1617,13 +1617,13 @@ static void* UnwrapGeneralCursor (v8::Handle<v8::Object> cursorObject) {
static v8::Handle<v8::Value> JS_normalize_string (v8::Arguments const& argv) {
v8::HandleScope scope;
TRI_Utf8ValueNFC x(TRI_UNKNOWN_MEM_ZONE, argv[0]);
if (x.length() == 0) {
return scope.Close(v8::Null());
if (argv.Length() != 1) {
return scope.Close(v8::ThrowException(
TRI_CreateErrorObject(TRI_ERROR_ILLEGAL_OPTION,
"usage: NORMALIZE_STRING(<string>)")));
}
return scope.Close(v8::String::New(*x, x.length()));
return scope.Close(Utf8Helper::DefaultUtf8Helper.normalize(argv[0]));
}
////////////////////////////////////////////////////////////////////////////////
@ -1639,13 +1639,10 @@ static v8::Handle<v8::Value> JS_compare_string (v8::Arguments const& argv) {
"usage: COMPARE_STRING(<left string>, <right string>)")));
}
// TODO: get collation language
Utf8Helper u8("");
v8::String::Value left(argv[0]);
v8::String::Value right(argv[1]);
int result = u8.compareUtf16(*left, left.length(), *right, right.length());
int result = Utf8Helper::DefaultUtf8Helper.compareUtf16(*left, left.length(), *right, right.length());
return scope.Close(v8::Integer::New(result));
}

View File

@ -356,43 +356,33 @@ static v8::Handle<v8::Value> JS_normalize_string (v8::Arguments const& argv) {
v8::HandleScope scope;
if (argv.Length() != 1) {
return scope.Close(v8::ThrowException(v8::String::New("usage: NORMALIZE_STRING(<string>)")));
return scope.Close(v8::ThrowException(
TRI_CreateErrorObject(TRI_ERROR_ILLEGAL_OPTION,
"usage: NORMALIZE_STRING(<string>)")));
}
TRI_Utf8ValueNFC x(TRI_UNKNOWN_MEM_ZONE, argv[0]);
if (x.length() == 0) {
return scope.Close(v8::Null());
}
return scope.Close(v8::String::New(*x, x.length()));
return scope.Close(Utf8Helper::DefaultUtf8Helper.normalize(argv[0]));
}
////////////////////////////////////////////////////////////////////////////////
/// @brief compare two UTF 16 strings
////////////////////////////////////////////////////////////////////////////////
static v8::Handle<v8::Value> JS_compare_strings (v8::Arguments const& argv) {
static v8::Handle<v8::Value> JS_compare_string (v8::Arguments const& argv) {
v8::HandleScope scope;
if (argv.Length() != 2) {
return scope.Close(v8::ThrowException(v8::String::New("usage: COMPARE_STRINGS(<left string>, <right string>)")));
return scope.Close(v8::ThrowException(
TRI_CreateErrorObject(TRI_ERROR_ILLEGAL_OPTION,
"usage: COMPARE_STRING(<left string>, <right string>)")));
}
v8::String::Value left(argv[0]);
if (!*left) {
return scope.Close(v8::Integer::New(1));
}
v8::String::Value right(argv[1]);
if (!*right) {
return scope.Close(v8::Integer::New(-1));
}
Utf8Helper uh("");
int result = uh.compareUtf16(*left, left.length(), *right, right.length());
int result = Utf8Helper::DefaultUtf8Helper.compareUtf16(*left, left.length(), *right, right.length());
return scope.Close(v8::Integer::New(result));
return scope.Close(v8::Integer::New(result));
}
// -----------------------------------------------------------------------------
@ -1155,8 +1145,8 @@ int main (int argc, char* argv[]) {
context->Global()->Set(v8::String::New("NORMALIZE_STRING"),
v8::FunctionTemplate::New(JS_normalize_string)->GetFunction(),
v8::ReadOnly);
context->Global()->Set(v8::String::New("COMPARE_STRINGS"),
v8::FunctionTemplate::New(JS_compare_strings)->GetFunction(),
context->Global()->Set(v8::String::New("COMPARE_STRING"),
v8::FunctionTemplate::New(JS_compare_string)->GetFunction(),
v8::ReadOnly);
// .............................................................................

View File

@ -717,6 +717,10 @@ function AHUACATL_RELATIONAL_UNEQUAL (lhs, rhs) {
rhs = null;
}
if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {
return COMPARE_STRING(lhs, rhs) != 0;
}
return (lhs !== rhs);
}
@ -760,6 +764,10 @@ function AHUACATL_RELATIONAL_GREATER_REC (lhs, rhs) {
rhs = null;
}
if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {
return COMPARE_STRING(lhs, rhs) > 0;
}
if (lhs === rhs) {
return null;
}
@ -823,6 +831,10 @@ function AHUACATL_RELATIONAL_GREATEREQUAL_REC (lhs, rhs) {
rhs = null;
}
if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {
return COMPARE_STRING(lhs, rhs) >= 0;
}
if (lhs === rhs) {
return null;
}
@ -886,6 +898,10 @@ function AHUACATL_RELATIONAL_LESS_REC (lhs, rhs) {
rhs = null;
}
if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {
return COMPARE_STRING(lhs, rhs) < 0;
}
if (lhs === rhs) {
return null;
}
@ -949,6 +965,10 @@ function AHUACATL_RELATIONAL_LESSEQUAL_REC (lhs, rhs) {
rhs = null;
}
if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {
return COMPARE_STRING(lhs, rhs) <= 0;
}
if (lhs === rhs) {
return null;
}
@ -1015,6 +1035,10 @@ function AHUACATL_RELATIONAL_CMP (lhs, rhs) {
rhs = null;
}
if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {
return COMPARE_STRING(lhs, rhs);
}
if (lhs < rhs) {
return -1;
}

View File

@ -718,6 +718,10 @@ static string JS_server_ahuacatl =
" rhs = null;\n"
" }\n"
"\n"
" if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {\n"
" return COMPARE_STRING(lhs, rhs) != 0;\n"
" }\n"
"\n"
" return (lhs !== rhs);\n"
"}\n"
"\n"
@ -761,6 +765,10 @@ static string JS_server_ahuacatl =
" rhs = null;\n"
" }\n"
"\n"
" if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {\n"
" return COMPARE_STRING(lhs, rhs) > 0;\n"
" }\n"
"\n"
" if (lhs === rhs) {\n"
" return null;\n"
" }\n"
@ -824,6 +832,10 @@ static string JS_server_ahuacatl =
" rhs = null;\n"
" }\n"
"\n"
" if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {\n"
" return COMPARE_STRING(lhs, rhs) >= 0;\n"
" }\n"
"\n"
" if (lhs === rhs) {\n"
" return null;\n"
" }\n"
@ -887,6 +899,10 @@ static string JS_server_ahuacatl =
" rhs = null;\n"
" }\n"
"\n"
" if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {\n"
" return COMPARE_STRING(lhs, rhs) < 0;\n"
" }\n"
"\n"
" if (lhs === rhs) {\n"
" return null;\n"
" }\n"
@ -950,6 +966,10 @@ static string JS_server_ahuacatl =
" rhs = null;\n"
" }\n"
" \n"
" if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {\n"
" return COMPARE_STRING(lhs, rhs) <= 0;\n"
" }\n"
" \n"
" if (lhs === rhs) {\n"
" return null;\n"
" }\n"
@ -1016,6 +1036,10 @@ static string JS_server_ahuacatl =
" rhs = null;\n"
" }\n"
"\n"
" if (leftWeight === AHUACATL_TYPEWEIGHT_STRING) {\n"
" return COMPARE_STRING(lhs, rhs);\n"
" }\n"
"\n"
" if (lhs < rhs) {\n"
" return -1;\n"
" }\n"

View File

@ -34,9 +34,14 @@
#include "string.h"
#endif
#include "Logger/Logger.h"
using namespace triagens::basics;
using namespace std;
Utf8Helper Utf8Helper::DefaultUtf8Helper;
// -----------------------------------------------------------------------------
// --SECTION-- constructors and destructors
// -----------------------------------------------------------------------------
@ -46,32 +51,12 @@ using namespace std;
/// @{
////////////////////////////////////////////////////////////////////////////////
Utf8Helper::Utf8Helper (const string& lang) : _coll(0) {
#ifdef TRI_HAVE_ICU
UErrorCode status = U_ZERO_ERROR;
if (lang == "") {
_coll = Collator::createInstance(status);
}
else {
Locale locale(lang.c_str());
_coll = Collator::createInstance(locale, status);
}
if(U_FAILURE(status)) {
cerr << "error in Collator::createInstance(): " << u_errorName(status) << endl;
return;
}
// set the default attributes for sorting:
_coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); // A < a
_coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
_coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status); // UCOL_IDENTICAL, UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY
Utf8Helper::Utf8Helper () : _coll(0) {
setCollatorLanguage("");
}
if(U_FAILURE(status)) {
cerr << "error in Collator::setAttribute(...): " << u_errorName(status) << endl;
}
#endif
Utf8Helper::Utf8Helper (const string& lang) : _coll(0) {
setCollatorLanguage(lang);
}
Utf8Helper::~Utf8Helper () {
@ -83,14 +68,14 @@ Utf8Helper::~Utf8Helper () {
int Utf8Helper::compareUtf8 (const char* left, size_t leftLength, const char* right, size_t rightLength) {
#ifdef TRI_HAVE_ICU
if (!_coll) {
cerr << "no Collator!" << endl;
LOGGER_ERROR << "no Collator in Utf8Helper::compareUtf8()!";
return 0;
}
UErrorCode status = U_ZERO_ERROR;
int result = _coll->compareUTF8(StringPiece(left, leftLength), StringPiece(right, rightLength), status);
if(U_FAILURE(status)) {
cerr << "error in Collator::compareUTF8(...): " << u_errorName(status) << endl;
LOGGER_ERROR << "error in Collator::compareUTF8(...): " << u_errorName(status);
return 0;
}
@ -127,7 +112,7 @@ int Utf8Helper::compareUtf8 (const char* left, size_t leftLength, const char* ri
int Utf8Helper::compareUtf16 (const uint16_t* left, size_t leftLength, const uint16_t* right, size_t rightLength) {
#ifdef TRI_HAVE_ICU
if (!_coll) {
cerr << "no Collator!" << endl;
LOGGER_ERROR << "no Collator in Utf8Helper::compareUtf16()!";
return 0;
}
@ -163,17 +148,74 @@ int Utf8Helper::compareUtf16 (const uint16_t* left, size_t leftLength, const uin
#endif
}
v8::Handle<v8::Value> Utf8Helper::normalize (v8::Handle<v8::Value> obj) {
v8::HandleScope scope;
v8::String::Value str(obj);
size_t str_len = str.length();
if (str_len > 0) {
#ifdef TRI_HAVE_ICU
UErrorCode erroCode = U_ZERO_ERROR;
const Normalizer2* normalizer = Normalizer2::getNFCInstance(erroCode);
if (U_FAILURE(erroCode)) {
LOGGER_ERROR << "error in Normalizer2::getNFCInstance(erroCode): " << u_errorName(erroCode);
return scope.Close(v8::Null());
}
UnicodeString result = normalizer->normalize(UnicodeString(*str, str_len), erroCode);
char * normalizeUtf8 (TRI_memory_zone_t* zone, const char* utf8, size_t inLength, size_t* outLength) {
return TR_normalize_utf8_to_NFC(zone, utf8, inLength, outLength);
}
char * normalizeUtf16 (TRI_memory_zone_t* zone, const uint16_t* utf16, size_t inLength, size_t* outLength) {
return TR_normalize_utf16_to_NFC(zone, utf16, inLength, outLength);
}
if (U_FAILURE(erroCode)) {
LOGGER_ERROR << "error in normalizer->normalize(UnicodeString(*str, str_len), erroCode): " << u_errorName(erroCode);
return scope.Close(v8::Null());
}
return scope.Close(v8::String::New(result.getBuffer(), result.length()));
#else
return scope.Close(v8::String::New(*str, str_len));
#endif
}
else {
return scope.Close(v8::String::New(""));
}
}
void Utf8Helper::setCollatorLanguage (const string& lang) {
#ifdef TRI_HAVE_ICU
UErrorCode status = U_ZERO_ERROR;
Collator* coll;
if (lang == "") {
coll = Collator::createInstance(status);
}
else {
Locale locale(lang.c_str());
coll = Collator::createInstance(locale, status);
}
if(U_FAILURE(status)) {
LOGGER_ERROR << "error in Collator::createInstance(): " << u_errorName(status);
return;
}
// set the default attributes for sorting:
coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); // A < a
coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status); // UCOL_IDENTICAL, UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY
if(U_FAILURE(status)) {
LOGGER_ERROR << "error in Collator::setAttribute(...): " << u_errorName(status);
return;
}
if (_coll) {
delete _coll;
}
_coll = coll;
#endif
}
////////////////////////////////////////////////////////////////////////////////
/// @}

View File

@ -30,6 +30,7 @@
#define TRIAGENS_BASICS_UTF8_HELPER_H 1
#include "Basics/Common.h"
#include "v8.h"
#ifdef TRI_HAVE_ICU
#include "unicode/coll.h"
@ -51,29 +52,20 @@ namespace triagens {
Utf8Helper& operator= (Utf8Helper const&);
public:
////////////////////////////////////////////////////////////////////////////////
/// static functions
////////////////////////////////////////////////////////////////////////////////
#ifdef TRI_HAVE_ICU
////////////////////////////////////////////////////////////////////////////////
/// @brief normalize an utf8 string (NFC)
////////////////////////////////////////////////////////////////////////////////
static char * normalizeUtf8 (TRI_memory_zone_t* zone, const char* utf8, size_t inLength, size_t* outLength);
////////////////////////////////////////////////////////////////////////////////
/// @brief normalize an utf16 string (NFC) and export it to utf8
/// @brief a default helper
////////////////////////////////////////////////////////////////////////////////
static char * normalizeUtf16 (TRI_memory_zone_t* zone, const uint16_t* utf16, size_t inLength, size_t* outLength);
#endif
static Utf8Helper DefaultUtf8Helper;
public:
////////////////////////////////////////////////////////////////////////////////
/// @brief constructor
////////////////////////////////////////////////////////////////////////////////
Utf8Helper();
////////////////////////////////////////////////////////////////////////////////
/// @brief constructor
/// @param string lang Use "de_DE", "en_US" or "" (default)
@ -81,7 +73,7 @@ namespace triagens {
Utf8Helper(const string& lang);
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
/// @brief destructor
////////////////////////////////////////////////////////////////////////////////
@ -111,7 +103,19 @@ namespace triagens {
int compareUtf16 (const uint16_t* left, size_t leftLength, const uint16_t* right, size_t rightLength);
private:
////////////////////////////////////////////////////////////////////////////////
/// @brief normalize a v8 object
////////////////////////////////////////////////////////////////////////////////
v8::Handle<v8::Value> normalize (v8::Handle<v8::Value> obj);
////////////////////////////////////////////////////////////////////////////////
/// @brief set collator by language
////////////////////////////////////////////////////////////////////////////////
void setCollatorLanguage (const string& lang);
private:
#ifdef TRI_HAVE_ICU
Collator* _coll;
#else

View File

@ -1610,7 +1610,7 @@ TRI_Utf8ValueNFC::TRI_Utf8ValueNFC(TRI_memory_zone_t* memoryZone, v8::Handle<v8:
v8::String::Value str(obj);
size_t str_len = str.length();
if (str_len > 0) {
_str = TR_normalize_utf16_to_NFC(_memoryZone, *str, str_len, &_length);
}