mirror of https://gitee.com/bigwinds/arangodb
Merge branch 'devel' of https://github.com/arangodb/arangodb into devel
This commit is contained in:
commit
233ea81101
|
@ -26,8 +26,6 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
#include "Basics/utf8-helper.h"
|
||||
#include "Basics/tri-strings.h"
|
||||
#include "Basics/Utf8Helper.h"
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
#include "Basics/JsonHelper.h"
|
||||
#include "Basics/json-utilities.h"
|
||||
#include "Basics/StringBuffer.h"
|
||||
#include "Basics/utf8-helper.h"
|
||||
#include "Basics/Utf8Helper.h"
|
||||
|
||||
using namespace triagens::aql;
|
||||
using JsonHelper = triagens::basics::JsonHelper;
|
||||
|
|
|
@ -28,12 +28,10 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "fulltext-query.h"
|
||||
|
||||
#include "Basics/logging.h"
|
||||
#include "Basics/tri-strings.h"
|
||||
#include "Basics/utf8-helper.h"
|
||||
|
||||
#include "fulltext-index.h"
|
||||
#include "Basics/Utf8Helper.h"
|
||||
#include "FulltextIndex/fulltext-index.h"
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- private functions
|
||||
|
|
|
@ -28,8 +28,7 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "skiplistIndex.h"
|
||||
|
||||
#include "Basics/utf8-helper.h"
|
||||
#include "Basics/Utf8Helper.h"
|
||||
#include "ShapedJson/json-shaper.h"
|
||||
#include "ShapedJson/shaped-json.h"
|
||||
#include "VocBase/document-collection.h"
|
||||
|
|
|
@ -30,16 +30,16 @@
|
|||
#include "index.h"
|
||||
|
||||
#include "Basics/conversions.h"
|
||||
#include "Basics/Exceptions.h"
|
||||
#include "Basics/fasthash.h"
|
||||
#include "Basics/files.h"
|
||||
#include "Basics/json.h"
|
||||
#include "Basics/logging.h"
|
||||
#include "Basics/string-buffer.h"
|
||||
#include "Basics/tri-strings.h"
|
||||
#include "Basics/utf8-helper.h"
|
||||
#include "Basics/fasthash.h"
|
||||
#include "Basics/json-utilities.h"
|
||||
#include "Basics/JsonHelper.h"
|
||||
#include "Basics/Exceptions.h"
|
||||
#include "Basics/Utf8Helper.h"
|
||||
#include "CapConstraint/cap-constraint.h"
|
||||
#include "FulltextIndex/fulltext-index.h"
|
||||
#include "FulltextIndex/fulltext-wordlist.h"
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "voc-shaper.h"
|
||||
|
||||
#include "Basics/Exceptions.h"
|
||||
#include "Basics/Mutex.h"
|
||||
#include "Basics/MutexLocker.h"
|
||||
#include "Basics/ReadLocker.h"
|
||||
|
@ -40,8 +40,7 @@
|
|||
#include "Basics/locks.h"
|
||||
#include "Basics/logging.h"
|
||||
#include "Basics/tri-strings.h"
|
||||
#include "Basics/utf8-helper.h"
|
||||
#include "Basics/Exceptions.h"
|
||||
#include "Basics/Utf8Helper.h"
|
||||
#include "VocBase/document-collection.h"
|
||||
#include "Wal/LogfileManager.h"
|
||||
|
||||
|
@ -1346,24 +1345,30 @@ int TRI_CompareShapeTypes (char const* leftDocument,
|
|||
case TRI_SHAPE_LONG_STRING: {
|
||||
char* leftString;
|
||||
char* rightString;
|
||||
size_t leftLength;
|
||||
size_t rightLength;
|
||||
|
||||
// compare strings
|
||||
// extract the strings
|
||||
if (leftType == TRI_SHAPE_SHORT_STRING) {
|
||||
leftString = (char*) (sizeof(TRI_shape_length_short_string_t) + left._data.data);
|
||||
leftLength = (size_t) *((TRI_shape_length_short_string_t*) left._data.data) - 1;
|
||||
}
|
||||
else {
|
||||
leftString = (char*) (sizeof(TRI_shape_length_long_string_t) + left._data.data);
|
||||
leftLength = (size_t) *((TRI_shape_length_long_string_t*) left._data.data) - 1;
|
||||
}
|
||||
|
||||
if (rightType == TRI_SHAPE_SHORT_STRING) {
|
||||
rightString = (char*) (sizeof(TRI_shape_length_short_string_t) + right._data.data);
|
||||
rightLength = (size_t) *((TRI_shape_length_short_string_t*) right._data.data) - 1;
|
||||
}
|
||||
else {
|
||||
rightString = (char*) (sizeof(TRI_shape_length_long_string_t) + right._data.data);
|
||||
rightLength = (size_t) *((TRI_shape_length_long_string_t*) right._data.data) - 1;
|
||||
}
|
||||
|
||||
return TRI_compare_utf8(leftString, rightString);
|
||||
return TRI_compare_utf8(leftString, leftLength, rightString, rightLength);
|
||||
}
|
||||
case TRI_SHAPE_ARRAY:
|
||||
case TRI_SHAPE_LIST:
|
||||
|
|
|
@ -29,16 +29,14 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "Utf8Helper.h"
|
||||
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/ustdio.h"
|
||||
#include "unicode/uclean.h"
|
||||
|
||||
#include "Basics/logging.h"
|
||||
#include "Basics/tri-strings.h"
|
||||
#include "Basics/utf8-helper.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/ucasemap.h"
|
||||
#include "unicode/uclean.h"
|
||||
#include "unicode/unorm2.h"
|
||||
#include "unicode/ustdio.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "Basics/win-utils.h"
|
||||
|
@ -47,7 +45,6 @@
|
|||
using namespace triagens::basics;
|
||||
using namespace std;
|
||||
|
||||
|
||||
Utf8Helper Utf8Helper::DefaultUtf8Helper;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
@ -73,7 +70,8 @@ Utf8Helper::~Utf8Helper () {
|
|||
}
|
||||
}
|
||||
|
||||
int Utf8Helper::compareUtf8 (const char* left, const char* right) const {
|
||||
int Utf8Helper::compareUtf8 (char const* left,
|
||||
char const* right) const {
|
||||
if (! _coll) {
|
||||
LOG_ERROR("no Collator in Utf8Helper::compareUtf8()!");
|
||||
return (strcmp(left, right));
|
||||
|
@ -89,8 +87,27 @@ int Utf8Helper::compareUtf8 (const char* left, const char* right) const {
|
|||
return result;
|
||||
}
|
||||
|
||||
int Utf8Helper::compareUtf8 (char const* left,
|
||||
size_t leftLength,
|
||||
char const* right,
|
||||
size_t rightLength) const {
|
||||
if (! _coll) {
|
||||
LOG_ERROR("no Collator in Utf8Helper::compareUtf8()!");
|
||||
return (strcmp(left, right));
|
||||
}
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int result = _coll->compareUTF8(StringPiece(left, leftLength), StringPiece(right, rightLength), status);
|
||||
if (U_FAILURE(status)) {
|
||||
LOG_ERROR("error in Collator::compareUTF8(...): %s", u_errorName(status));
|
||||
return (strcmp(left, right));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int Utf8Helper::compareUtf16 (const uint16_t* left, size_t leftLength, const uint16_t* right, size_t rightLength) const {
|
||||
if (!_coll) {
|
||||
if (! _coll) {
|
||||
LOG_ERROR("no Collator in Utf8Helper::compareUtf16()!");
|
||||
|
||||
if (leftLength == rightLength) {
|
||||
|
@ -516,10 +533,23 @@ int TRI_compare_utf16 (const uint16_t* left, size_t leftLength, const uint16_t*
|
|||
/// @brief compare two utf8 strings
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int TRI_compare_utf8 (const char* left, const char* right) {
|
||||
int TRI_compare_utf8 (char const* left,
|
||||
char const* right) {
|
||||
return Utf8Helper::DefaultUtf8Helper.compareUtf8(left, right);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief compare two utf8 strings
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int TRI_compare_utf8 (char const* left,
|
||||
size_t leftLength,
|
||||
char const* right,
|
||||
size_t rightLength) {
|
||||
return Utf8Helper::DefaultUtf8Helper.compareUtf8(left, leftLength, right, rightLength);
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Lowercase the characters in a UTF-8 string (implemented in Basic/Utf8Helper.cpp)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -548,6 +578,194 @@ TRI_vector_string_t* TRI_get_words (const char* const text,
|
|||
return Utf8Helper::DefaultUtf8Helper.getWords(text, textLength, minimalWordLength, maximalWordLength, lowerCase);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- private functions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief convert a utf-8 string to a uchar (utf-16)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
UChar* TRI_Utf8ToUChar (TRI_memory_zone_t* zone,
|
||||
const char* utf8,
|
||||
const size_t inLength,
|
||||
size_t* outLength) {
|
||||
UErrorCode status;
|
||||
UChar* utf16;
|
||||
int32_t utf16Length;
|
||||
|
||||
// 1. convert utf8 string to utf16
|
||||
// calculate utf16 string length
|
||||
status = U_ZERO_ERROR;
|
||||
u_strFromUTF8(nullptr, 0, &utf16Length, utf8, (int32_t) inLength, &status);
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
utf16 = (UChar *) TRI_Allocate(zone, (utf16Length + 1) * sizeof(UChar), false);
|
||||
if (utf16 == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// now convert
|
||||
status = U_ZERO_ERROR;
|
||||
// the +1 will append a 0 byte at the end
|
||||
u_strFromUTF8(utf16, utf16Length + 1, nullptr, utf8, (int32_t) inLength, &status);
|
||||
if (status != U_ZERO_ERROR) {
|
||||
TRI_Free(zone, utf16);
|
||||
return 0;
|
||||
}
|
||||
|
||||
*outLength = (size_t) utf16Length;
|
||||
|
||||
return utf16;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief convert a uchar (utf-16) to a utf-8 string
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_UCharToUtf8 (TRI_memory_zone_t* zone,
|
||||
const UChar* uchar,
|
||||
const size_t inLength,
|
||||
size_t* outLength) {
|
||||
UErrorCode status;
|
||||
char* utf8;
|
||||
int32_t utf8Length;
|
||||
|
||||
// calculate utf8 string length
|
||||
status = U_ZERO_ERROR;
|
||||
u_strToUTF8(nullptr, 0, &utf8Length, uchar, (int32_t) inLength, &status);
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
utf8 = static_cast<char*>(TRI_Allocate(zone, (utf8Length + 1) * sizeof(char), false));
|
||||
|
||||
if (utf8 == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// convert to utf8
|
||||
status = U_ZERO_ERROR;
|
||||
// the +1 will append a 0 byte at the end
|
||||
u_strToUTF8(utf8, utf8Length + 1, nullptr, uchar, (int32_t) inLength, &status);
|
||||
if (status != U_ZERO_ERROR) {
|
||||
TRI_Free(zone, utf8);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
*outLength = ((size_t) utf8Length);
|
||||
|
||||
return utf8;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- public functions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief normalize an utf8 string (NFC)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_normalize_utf8_to_NFC (TRI_memory_zone_t* zone,
|
||||
const char* utf8,
|
||||
const size_t inLength,
|
||||
size_t* outLength) {
|
||||
UChar* utf16;
|
||||
size_t utf16Length;
|
||||
char* utf8Dest;
|
||||
|
||||
*outLength = 0;
|
||||
|
||||
if (inLength == 0) {
|
||||
utf8Dest = static_cast<char*>(TRI_Allocate(zone, sizeof(char), false));
|
||||
|
||||
if (utf8Dest != nullptr) {
|
||||
utf8Dest[0] = '\0';
|
||||
}
|
||||
return utf8Dest;
|
||||
}
|
||||
|
||||
utf16 = TRI_Utf8ToUChar(zone, utf8, inLength, &utf16Length);
|
||||
if (utf16 == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// continue in TR_normalize_utf16_to_NFC
|
||||
utf8Dest = TRI_normalize_utf16_to_NFC(zone, (const uint16_t*) utf16, (int32_t) utf16Length, outLength);
|
||||
TRI_Free(zone, utf16);
|
||||
|
||||
return utf8Dest;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief normalize an utf8 string (NFC)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_normalize_utf16_to_NFC (TRI_memory_zone_t* zone,
|
||||
const uint16_t* utf16,
|
||||
const size_t inLength,
|
||||
size_t* outLength) {
|
||||
UErrorCode status;
|
||||
UChar * utf16Dest;
|
||||
int32_t utf16DestLength;
|
||||
char * utf8Dest;
|
||||
const UNormalizer2 *norm2;
|
||||
char buffer[64];
|
||||
bool mustFree;
|
||||
|
||||
*outLength = 0;
|
||||
|
||||
if (inLength == 0) {
|
||||
utf8Dest = static_cast<char*>(TRI_Allocate(zone, sizeof(char), false));
|
||||
if (utf8Dest != nullptr) {
|
||||
utf8Dest[0] = '\0';
|
||||
}
|
||||
return utf8Dest;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
norm2 = unorm2_getInstance(nullptr, "nfc", UNORM2_COMPOSE, &status);
|
||||
|
||||
if (status != U_ZERO_ERROR) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// normalize UChar (UTF-16)
|
||||
|
||||
if (inLength < sizeof(buffer) / sizeof(UChar)) {
|
||||
// use a static buffer
|
||||
utf16Dest = (UChar *) &buffer[0];
|
||||
mustFree = false;
|
||||
}
|
||||
else {
|
||||
// use dynamic memory
|
||||
utf16Dest = (UChar *) TRI_Allocate(zone, (inLength + 1) * sizeof(UChar), false);
|
||||
if (utf16Dest == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
mustFree = true;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
utf16DestLength = unorm2_normalize(norm2, (UChar*) utf16, (int32_t) inLength, utf16Dest, (int32_t) (inLength + 1), &status);
|
||||
if (status != U_ZERO_ERROR) {
|
||||
if (mustFree) {
|
||||
TRI_Free(zone, utf16Dest);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Convert data back from UChar (UTF-16) to UTF-8
|
||||
utf8Dest = TRI_UCharToUtf8(zone, utf16Dest, (size_t) utf16DestLength, outLength);
|
||||
if (mustFree) {
|
||||
TRI_Free(zone, utf16Dest);
|
||||
}
|
||||
|
||||
return utf8Dest;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- END-OF-FILE
|
||||
// -----------------------------------------------------------------------------
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include "Basics/vector.h"
|
||||
|
||||
#include "unicode/coll.h"
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- class Utf8Helper
|
||||
|
@ -90,7 +91,13 @@ namespace triagens {
|
|||
/// 1 : left > right
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int compareUtf8 (const char* left, const char* right) const;
|
||||
int compareUtf8 (char const* left,
|
||||
char const* right) const;
|
||||
|
||||
int compareUtf8 (char const* left,
|
||||
size_t leftLength,
|
||||
char const* right,
|
||||
size_t rightLength) const;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief compare utf16 strings
|
||||
|
@ -162,6 +169,98 @@ namespace triagens {
|
|||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- public non-class functions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief convert a utf-8 string to a uchar (utf-16)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
UChar* TRI_Utf8ToUChar (TRI_memory_zone_t* zone,
|
||||
const char* utf8,
|
||||
const size_t inLength,
|
||||
size_t* outLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief convert a uchar (utf-16) to a utf-8 string
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_UCharToUtf8 (TRI_memory_zone_t* zone,
|
||||
const UChar* uchar,
|
||||
const size_t inLength,
|
||||
size_t* outLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief normalize an utf8 string (NFC)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_normalize_utf8_to_NFC (TRI_memory_zone_t* zone,
|
||||
const char* utf8,
|
||||
const size_t inLength,
|
||||
size_t* outLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief normalize an utf16 string (NFC) and export it to utf8
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char * TRI_normalize_utf16_to_NFC (TRI_memory_zone_t* zone,
|
||||
const uint16_t* utf16,
|
||||
const size_t inLength,
|
||||
size_t* outLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief compare two utf16 strings (implemented in Basic/Utf8Helper.cpp)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int TRI_compare_utf16 (const uint16_t* left,
|
||||
size_t leftLength,
|
||||
const uint16_t* right,
|
||||
size_t rightLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief compare two utf8 strings (implemented in Basic/Utf8Helper.cpp)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int TRI_compare_utf8 (char const* left,
|
||||
char const* right);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief compare two utf8 strings (implemented in Basic/Utf8Helper.cpp)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int TRI_compare_utf8 (char const* left,
|
||||
size_t leftLength,
|
||||
char const* right,
|
||||
size_t rightLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Lowercase the characters in a UTF-8 string (implemented in Basic/Utf8Helper.cpp)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_tolower_utf8 (TRI_memory_zone_t* zone,
|
||||
const char *src,
|
||||
int32_t srcLength,
|
||||
int32_t* dstLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Uppercase the characters in a UTF-8 string (implemented in Basic/Utf8Helper.cpp)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_toupper_utf8 (TRI_memory_zone_t* zone,
|
||||
const char *src,
|
||||
int32_t srcLength,
|
||||
int32_t* dstLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Get words of an UTF-8 string (implemented in Basic/Utf8Helper.cpp)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TRI_vector_string_t* TRI_get_words (const char* const text,
|
||||
const size_t textLength,
|
||||
const size_t minimalWordLength,
|
||||
const size_t maximalWordLength,
|
||||
bool lowerCase);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -28,10 +28,9 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "Basics/json-utilities.h"
|
||||
|
||||
#include "Basics/utf8-helper.h"
|
||||
#include "Basics/string-buffer.h"
|
||||
#include "Basics/hashes.h"
|
||||
#include "Basics/string-buffer.h"
|
||||
#include "Basics/Utf8Helper.h"
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- private functions
|
||||
|
@ -248,8 +247,10 @@ int TRI_CompareValuesJson (TRI_json_t const* lhs,
|
|||
// same for STRING and STRING_REFERENCE
|
||||
int res;
|
||||
if (useUTF8) {
|
||||
res = TRI_compare_utf8(lhs->_value._string.data,
|
||||
rhs->_value._string.data);
|
||||
res = TRI_compare_utf8(lhs->_value._string.data,
|
||||
lhs->_value._string.length - 1,
|
||||
rhs->_value._string.data,
|
||||
rhs->_value._string.length - 1);
|
||||
}
|
||||
else {
|
||||
res = strcmp(lhs->_value._string.data, rhs->_value._string.data);
|
||||
|
|
|
@ -28,11 +28,9 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "tri-strings.h"
|
||||
|
||||
#include "utf8-helper.h"
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#include "Basics/conversions.h"
|
||||
#include "Basics/Utf8Helper.h"
|
||||
#include <openssl/sha.h>
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- private variables
|
||||
|
|
|
@ -1,229 +0,0 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief utf8 helper functions
|
||||
///
|
||||
/// @file
|
||||
///
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2014 ArangoDB GmbH, Cologne, Germany
|
||||
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Dr. Frank Celler
|
||||
/// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany
|
||||
/// @author Copyright 2011-2013, triAGENS GmbH, Cologne, Germany
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "utf8-helper.h"
|
||||
|
||||
#include "unicode/unorm2.h"
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- private functions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief convert a utf-8 string to a uchar (utf-16)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
UChar* TRI_Utf8ToUChar (TRI_memory_zone_t* zone,
|
||||
const char* utf8,
|
||||
const size_t inLength,
|
||||
size_t* outLength) {
|
||||
UErrorCode status;
|
||||
UChar* utf16;
|
||||
int32_t utf16Length;
|
||||
|
||||
// 1. convert utf8 string to utf16
|
||||
// calculate utf16 string length
|
||||
status = U_ZERO_ERROR;
|
||||
u_strFromUTF8(nullptr, 0, &utf16Length, utf8, (int32_t) inLength, &status);
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
utf16 = (UChar *) TRI_Allocate(zone, (utf16Length + 1) * sizeof(UChar), false);
|
||||
if (utf16 == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// now convert
|
||||
status = U_ZERO_ERROR;
|
||||
// the +1 will append a 0 byte at the end
|
||||
u_strFromUTF8(utf16, utf16Length + 1, nullptr, utf8, (int32_t) inLength, &status);
|
||||
if (status != U_ZERO_ERROR) {
|
||||
TRI_Free(zone, utf16);
|
||||
return 0;
|
||||
}
|
||||
|
||||
*outLength = (size_t) utf16Length;
|
||||
|
||||
return utf16;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief convert a uchar (utf-16) to a utf-8 string
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_UCharToUtf8 (TRI_memory_zone_t* zone,
|
||||
const UChar* uchar,
|
||||
const size_t inLength,
|
||||
size_t* outLength) {
|
||||
UErrorCode status;
|
||||
char* utf8;
|
||||
int32_t utf8Length;
|
||||
|
||||
// calculate utf8 string length
|
||||
status = U_ZERO_ERROR;
|
||||
u_strToUTF8(nullptr, 0, &utf8Length, uchar, (int32_t) inLength, &status);
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
utf8 = static_cast<char*>(TRI_Allocate(zone, (utf8Length + 1) * sizeof(char), false));
|
||||
|
||||
if (utf8 == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// convert to utf8
|
||||
status = U_ZERO_ERROR;
|
||||
// the +1 will append a 0 byte at the end
|
||||
u_strToUTF8(utf8, utf8Length + 1, nullptr, uchar, (int32_t) inLength, &status);
|
||||
if (status != U_ZERO_ERROR) {
|
||||
TRI_Free(zone, utf8);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
*outLength = ((size_t) utf8Length);
|
||||
|
||||
return utf8;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- public functions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief normalize an utf8 string (NFC)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_normalize_utf8_to_NFC (TRI_memory_zone_t* zone,
|
||||
const char* utf8,
|
||||
const size_t inLength,
|
||||
size_t* outLength) {
|
||||
UChar* utf16;
|
||||
size_t utf16Length;
|
||||
char* utf8Dest;
|
||||
|
||||
*outLength = 0;
|
||||
|
||||
if (inLength == 0) {
|
||||
utf8Dest = static_cast<char*>(TRI_Allocate(zone, sizeof(char), false));
|
||||
|
||||
if (utf8Dest != nullptr) {
|
||||
utf8Dest[0] = '\0';
|
||||
}
|
||||
return utf8Dest;
|
||||
}
|
||||
|
||||
utf16 = TRI_Utf8ToUChar(zone, utf8, inLength, &utf16Length);
|
||||
if (utf16 == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// continue in TR_normalize_utf16_to_NFC
|
||||
utf8Dest = TRI_normalize_utf16_to_NFC(zone, (const uint16_t*) utf16, (int32_t) utf16Length, outLength);
|
||||
TRI_Free(zone, utf16);
|
||||
|
||||
return utf8Dest;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief normalize an utf8 string (NFC)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_normalize_utf16_to_NFC (TRI_memory_zone_t* zone,
|
||||
const uint16_t* utf16,
|
||||
const size_t inLength,
|
||||
size_t* outLength) {
|
||||
UErrorCode status;
|
||||
UChar * utf16Dest;
|
||||
int32_t utf16DestLength;
|
||||
char * utf8Dest;
|
||||
const UNormalizer2 *norm2;
|
||||
char buffer[64];
|
||||
bool mustFree;
|
||||
|
||||
*outLength = 0;
|
||||
|
||||
if (inLength == 0) {
|
||||
utf8Dest = static_cast<char*>(TRI_Allocate(zone, sizeof(char), false));
|
||||
if (utf8Dest != nullptr) {
|
||||
utf8Dest[0] = '\0';
|
||||
}
|
||||
return utf8Dest;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
norm2 = unorm2_getInstance(nullptr, "nfc", UNORM2_COMPOSE, &status);
|
||||
|
||||
if (status != U_ZERO_ERROR) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// normalize UChar (UTF-16)
|
||||
|
||||
if (inLength < sizeof(buffer) / sizeof(UChar)) {
|
||||
// use a static buffer
|
||||
utf16Dest = (UChar *) &buffer[0];
|
||||
mustFree = false;
|
||||
}
|
||||
else {
|
||||
// use dynamic memory
|
||||
utf16Dest = (UChar *) TRI_Allocate(zone, (inLength + 1) * sizeof(UChar), false);
|
||||
if (utf16Dest == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
mustFree = true;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
utf16DestLength = unorm2_normalize(norm2, (UChar*) utf16, (int32_t) inLength, utf16Dest, (int32_t) (inLength + 1), &status);
|
||||
if (status != U_ZERO_ERROR) {
|
||||
if (mustFree) {
|
||||
TRI_Free(zone, utf16Dest);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Convert data back from UChar (UTF-16) to UTF-8
|
||||
utf8Dest = TRI_UCharToUtf8(zone, utf16Dest, (size_t) utf16DestLength, outLength);
|
||||
if (mustFree) {
|
||||
TRI_Free(zone, utf16Dest);
|
||||
}
|
||||
|
||||
return utf8Dest;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- END-OF-FILE
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Local Variables:
|
||||
// mode: outline-minor
|
||||
// outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}"
|
||||
// End:
|
|
@ -1,130 +0,0 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief utf8 helper functions
|
||||
///
|
||||
/// @file
|
||||
///
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2014 ArangoDB GmbH, Cologne, Germany
|
||||
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Dr. Frank Celler
|
||||
/// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany
|
||||
/// @author Copyright 2011-2013, triAGENS GmbH, Cologne, Germany
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ARANGODB_BASICS_C_UTF8__HELPER_H
|
||||
#define ARANGODB_BASICS_C_UTF8__HELPER_H 1
|
||||
|
||||
#include "Basics/Common.h"
|
||||
#include "Basics/vector.h"
|
||||
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- public functions
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief convert a utf-8 string to a uchar (utf-16)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
UChar* TRI_Utf8ToUChar (TRI_memory_zone_t* zone,
|
||||
const char* utf8,
|
||||
const size_t inLength,
|
||||
size_t* outLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief convert a uchar (utf-16) to a utf-8 string
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_UCharToUtf8 (TRI_memory_zone_t* zone,
|
||||
const UChar* uchar,
|
||||
const size_t inLength,
|
||||
size_t* outLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief normalize an utf8 string (NFC)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_normalize_utf8_to_NFC (TRI_memory_zone_t* zone,
|
||||
const char* utf8,
|
||||
const size_t inLength,
|
||||
size_t* outLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief normalize an utf16 string (NFC) and export it to utf8
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char * TRI_normalize_utf16_to_NFC (TRI_memory_zone_t* zone,
|
||||
const uint16_t* utf16,
|
||||
const size_t inLength,
|
||||
size_t* outLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief compare two utf16 strings (implemented in Basic/Utf8Helper.cpp)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int TRI_compare_utf16 (const uint16_t* left,
|
||||
size_t leftLength,
|
||||
const uint16_t* right,
|
||||
size_t rightLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief compare two utf8 strings (implemented in Basic/Utf8Helper.cpp)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int TRI_compare_utf8 (const char* left, const char* right);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Lowercase the characters in a UTF-8 string (implemented in Basic/Utf8Helper.cpp)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_tolower_utf8 (TRI_memory_zone_t* zone,
|
||||
const char *src,
|
||||
int32_t srcLength,
|
||||
int32_t* dstLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Uppercase the characters in a UTF-8 string (implemented in Basic/Utf8Helper.cpp)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char* TRI_toupper_utf8 (TRI_memory_zone_t* zone,
|
||||
const char *src,
|
||||
int32_t srcLength,
|
||||
int32_t* dstLength);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Get words of an UTF-8 string (implemented in Basic/Utf8Helper.cpp)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TRI_vector_string_t* TRI_get_words (const char* const text,
|
||||
const size_t textLength,
|
||||
const size_t minimalWordLength,
|
||||
const size_t maximalWordLength,
|
||||
bool lowerCase);
|
||||
|
||||
#endif
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- END-OF-FILE
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Local Variables:
|
||||
// mode: outline-minor
|
||||
// outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}"
|
||||
// End:
|
|
@ -96,7 +96,6 @@ add_library(
|
|||
Basics/tri-strings.cpp
|
||||
Basics/tri-zip.cpp
|
||||
Basics/Utf8Helper.cpp
|
||||
Basics/utf8-helper.cpp
|
||||
Basics/vector.cpp
|
||||
Basics/voc-errors.cpp
|
||||
Basics/voc-mimetypes.cpp
|
||||
|
|
|
@ -68,7 +68,6 @@ lib_libarango_a_SOURCES = \
|
|||
lib/Basics/tri-strings.cpp \
|
||||
lib/Basics/tri-zip.cpp \
|
||||
lib/Basics/Utf8Helper.cpp \
|
||||
lib/Basics/utf8-helper.cpp \
|
||||
lib/Basics/vector.cpp \
|
||||
lib/Basics/voc-errors.cpp \
|
||||
lib/Basics/voc-mimetypes.cpp \
|
||||
|
|
|
@ -32,10 +32,10 @@
|
|||
|
||||
#include "Basics/conversions.h"
|
||||
#include "Basics/logging.h"
|
||||
#include "Basics/tri-strings.h"
|
||||
#include "Basics/utf8-helper.h"
|
||||
#include "Basics/StringBuffer.h"
|
||||
#include "Basics/StringUtils.h"
|
||||
#include "Basics/tri-strings.h"
|
||||
#include "Basics/Utf8Helper.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace triagens::basics;
|
||||
|
|
|
@ -50,7 +50,7 @@
|
|||
#include "Basics/string-buffer.h"
|
||||
#include "Basics/tri-strings.h"
|
||||
#include "Basics/tri-zip.h"
|
||||
#include "Basics/utf8-helper.h"
|
||||
#include "Basics/Utf8Helper.h"
|
||||
#include "Rest/HttpRequest.h"
|
||||
#include "Rest/SslInterface.h"
|
||||
#include "SimpleHttpClient/GeneralClientConnection.h"
|
||||
|
|
Loading…
Reference in New Issue