mirror of https://gitee.com/bigwinds/arangodb
3777 lines
112 KiB
C++
3777 lines
112 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2016 by EMC Corporation, All Rights Reserved
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is EMC Corporation
|
|
///
|
|
/// @author Andrey Abramov
|
|
/// @author Vasiliy Nabatchikov
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
#ifdef _WIN32
|
|
#include <Windows.h> // for GetACP()
|
|
#else
|
|
#include <langinfo.h> // for nl_langinfo(...)
|
|
#endif
|
|
|
|
#include <algorithm>
|
|
#include <cstring>
|
|
#include <map>
|
|
#include <unordered_map>
|
|
|
|
#if defined (__GNUC__)
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
|
#endif
|
|
|
|
#include <boost/locale/generator.hpp>
|
|
|
|
#if defined (__GNUC__)
|
|
#pragma GCC diagnostic pop
|
|
#endif
|
|
|
|
#include <boost/locale/info.hpp>
|
|
|
|
#if defined (__GNUC__)
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
|
#endif
|
|
|
|
#include <boost/locale/util.hpp>
|
|
|
|
#if defined (__GNUC__)
|
|
#pragma GCC diagnostic pop
|
|
#endif
|
|
|
|
#include <unicode/coll.h> // for icu::Collator
|
|
#include <unicode/decimfmt.h> // for icu::DecimalFormat
|
|
#include <unicode/numfmt.h> // for icu::NumberFormat
|
|
#include <unicode/ucnv.h> // for UConverter
|
|
#include <unicode/ustring.h> // for u_strToUTF32, u_strToUTF8
|
|
|
|
#include "hash_utils.hpp"
|
|
#include "map_utils.hpp"
|
|
#include "object_pool.hpp"
|
|
#include "numeric_utils.hpp"
|
|
#include "error/error.hpp"
|
|
|
|
#include "locale_utils.hpp"
|
|
|
|
NS_BEGIN(std)
|
|
|
|
// GCC < v5 does not explicitly define
|
|
// std::codecvt<char16_t, char, mbstate_t>::id or std::codecvt<char32_t, char, mbstate_t>::id
|
|
// this causes linking issues in optimized code
|
|
// Note: clang tries to pretend to be GCC, so it must be explicitly excluded
|
|
#if !defined(__APPLE__) && !defined(__clang__) && defined(__GNUC__) && (__GNUC__ < 5)
|
|
/*static*/ template<> locale::id codecvt<char16_t, char, mbstate_t>::id;
|
|
/*static*/ template<> locale::id codecvt<char32_t, char, mbstate_t>::id;
|
|
#endif
|
|
|
|
NS_END // std
|
|
|
|
NS_LOCAL
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief size of internal buffers, arbitrary size
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
const size_t BUFFER_SIZE = 1024;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief size of ICU object pools, arbitrary size
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
const size_t POOL_SIZE = 8;
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- facets required by std::locale
|
|
// -----------------------------------------------------------------------------
|
|
|
|
std::string system_encoding() {
|
|
#ifdef _WIN32
|
|
static std::string prefix("cp");
|
|
|
|
return prefix + std::to_string(GetACP());
|
|
#else
|
|
return nl_langinfo(CODESET);
|
|
#endif
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief a thread-safe pool of ICU converters for a given encoding
|
|
/// may hold nullptr on ICU converter instantiation failure
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
class converter_pool: private irs::util::noncopyable {
|
|
public:
|
|
DECLARE_SHARED_PTR(UConverter);
|
|
converter_pool(std::string&& encoding)
|
|
: encoding_(std::move(encoding)), pool_(POOL_SIZE) {}
|
|
ptr get() { return pool_.emplace(encoding_).release(); }
|
|
const std::string& encoding() const NOEXCEPT { return encoding_; }
|
|
|
|
private:
|
|
struct builder {
|
|
DECLARE_SHARED_PTR(UConverter);
|
|
static ptr make(const std::string& encoding) {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
ptr value(
|
|
ucnv_open(encoding.c_str(), &status),
|
|
[](UConverter* ptr)->void{ ucnv_close(ptr); }
|
|
);
|
|
|
|
return U_SUCCESS(status) ? std::move(value) : nullptr;
|
|
}
|
|
};
|
|
|
|
std::string encoding_;
|
|
irs::unbounded_object_pool_volatile<builder> pool_;
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @param encoding the converter encoding (null == system encoding)
|
|
/// @@return a converter for the specified encoding
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
converter_pool& get_converter(const irs::string_ref& encoding) {
|
|
static auto generator = [](
|
|
const irs::hashed_string_ref& key,
|
|
const converter_pool& pool
|
|
) NOEXCEPT->irs::hashed_string_ref {
|
|
// reuse hash but point ref at value in pool
|
|
return irs::hashed_string_ref(key.hash(), pool.encoding());
|
|
};
|
|
static std::mutex mutex;
|
|
static std::unordered_map<irs::hashed_string_ref, converter_pool> encodings;
|
|
auto key = encoding;
|
|
std::string tmp;
|
|
|
|
// use system encoding if encoding.null()
|
|
if (key.null()) {
|
|
tmp = system_encoding();
|
|
key = tmp;
|
|
}
|
|
|
|
SCOPED_LOCK(mutex);
|
|
|
|
return irs::map_utils::try_emplace_update_key(
|
|
encodings,
|
|
generator,
|
|
irs::make_hashed_ref(key, std::hash<irs::string_ref>()),
|
|
key
|
|
).first->second;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief base implementation for converters between 'internal' representation
|
|
/// and an 'external' user-specified encoding (unicode internal)
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
template<typename InternType>
|
|
class codecvtu_base: public std::codecvt<InternType, char, mbstate_t> {
|
|
public:
|
|
typedef std::codecvt<InternType, char, mbstate_t> parent_t;
|
|
typedef typename parent_t::extern_type extern_type;
|
|
typedef typename parent_t::intern_type intern_type;
|
|
typedef typename parent_t::state_type state_type;
|
|
|
|
codecvtu_base(converter_pool& converters)
|
|
: contexts_(POOL_SIZE), converters_(converters) {}
|
|
|
|
protected:
|
|
struct context_t {
|
|
DECLARE_UNIQUE_PTR(context_t);
|
|
std::basic_string<typename parent_t::intern_type> buf_;
|
|
converter_pool::ptr converter_;
|
|
|
|
static ptr make(converter_pool& pool) {
|
|
auto ctx = irs::memory::make_unique<context_t>();
|
|
|
|
ctx->converter_ = pool.get();
|
|
|
|
return ctx->converter_ ? std::move(ctx) : nullptr;
|
|
}
|
|
};
|
|
typedef irs::unbounded_object_pool<context_t> context_pool;
|
|
|
|
typename context_pool::ptr context() const {
|
|
return contexts_.emplace(converters_);
|
|
}
|
|
|
|
const std::string& context_encoding() const NOEXCEPT {
|
|
return converters_.encoding();
|
|
}
|
|
|
|
virtual bool do_always_noconv() const NOEXCEPT final override {
|
|
return false; // not an identity conversion
|
|
}
|
|
|
|
virtual int do_encoding() const NOEXCEPT override = 0;
|
|
virtual std::codecvt_base::result do_in(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
const extern_type*& from_next,
|
|
intern_type* to,
|
|
intern_type* to_end,
|
|
intern_type*& to_next
|
|
) const override = 0;
|
|
virtual int do_length(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
std::size_t max
|
|
) const final override;
|
|
virtual int do_max_length() const NOEXCEPT override = 0;
|
|
virtual std::codecvt_base::result do_out(
|
|
state_type& state,
|
|
const intern_type* from,
|
|
const intern_type* from_end,
|
|
const intern_type*& from_next,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const override = 0;
|
|
virtual std::codecvt_base::result do_unshift(
|
|
state_type& state,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const final override;
|
|
|
|
private:
|
|
mutable context_pool contexts_;
|
|
converter_pool& converters_;
|
|
};
|
|
|
|
template<typename InternType>
|
|
int codecvtu_base<InternType>::do_length(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
std::size_t max
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while computing number of required input characters from encoding '%s' to produce at most '" IR_SIZE_T_SPECIFIER "' output characters",
|
|
context_encoding().c_str(), max
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
ctx->buf_.resize(max);
|
|
|
|
auto* from_next = from;
|
|
auto* to = &(ctx->buf_[0]);
|
|
auto* to_end = to + max;
|
|
auto* to_next = to;
|
|
auto res = do_in(state, from, from_end, from_next, to, to_end, to_next);
|
|
|
|
return res == std::codecvt_base::ok ? std::distance(from, from_next) : 0;
|
|
}
|
|
|
|
template<typename InternType>
|
|
std::codecvt_base::result codecvtu_base<InternType>::do_unshift(
|
|
state_type& state,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const {
|
|
to_next = to;
|
|
|
|
return std::codecvt_base::ok;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief converter between an 'internal' utf16 representation and
|
|
/// an 'external' user-specified encoding
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
class codecvt16_facet final: public codecvtu_base<char16_t> {
|
|
public:
|
|
MSVC2015_ONLY(static std::locale::id id;) // MSVC2015 requires a static instance of an 'id' member
|
|
MSVC2017_ONLY(static std::locale::id id;) // MSVC2017 requires a static instance of an 'id' member
|
|
codecvt16_facet(converter_pool& converters): codecvtu_base(converters) {}
|
|
|
|
bool append(
|
|
std::basic_string<intern_type>& buf, const icu::UnicodeString& value
|
|
) const;
|
|
|
|
protected:
|
|
virtual int do_encoding() const NOEXCEPT override;
|
|
virtual std::codecvt_base::result do_in(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
const extern_type*& from_next,
|
|
intern_type* to,
|
|
intern_type* to_end,
|
|
intern_type*& to_next
|
|
) const override;
|
|
virtual int do_max_length() const NOEXCEPT override;
|
|
virtual std::codecvt_base::result do_out(
|
|
state_type& state,
|
|
const intern_type* from,
|
|
const intern_type* from_end,
|
|
const intern_type*& from_next,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const override;
|
|
};
|
|
|
|
MSVC2015_ONLY(/*static*/ std::locale::id codecvt16_facet::id;) // MSVC2015 requires a static instance of an 'id' member
|
|
MSVC2017_ONLY(/*static*/ std::locale::id codecvt16_facet::id;) // MSVC2017 requires a static instance of an 'id' member
|
|
|
|
#if defined (__GNUC__)
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wunused-function"
|
|
#endif
|
|
|
|
bool codecvt16_facet::append(
|
|
std::basic_string<intern_type>& buf, const icu::UnicodeString& value
|
|
) const {
|
|
auto size = value.length();
|
|
|
|
if (size < 0) {
|
|
IR_FRMT_WARN(
|
|
"ICU returned invalid string size while converting unicode string to UTF16"
|
|
);
|
|
|
|
return false;
|
|
}
|
|
|
|
static_assert(sizeof(UChar) == sizeof(intern_type), "sizeof(UChar) != sizeof(intern_type)");
|
|
buf.append(reinterpret_cast<const intern_type*>(value.getBuffer()), size);
|
|
|
|
return true;
|
|
}
|
|
|
|
#if defined (__GNUC__)
|
|
#pragma GCC diagnostic pop
|
|
#endif
|
|
|
|
int codecvt16_facet::do_encoding() const NOEXCEPT {
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while computing number of required input characters from encoding '%s' to produce a single output character",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return -1;
|
|
}
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
// the exact number of externT characters that correspond to one internT character, if constant
|
|
return ucnv_isFixedWidth(ctx->converter_.get(), &status)
|
|
? ucnv_getMinCharSize(ctx->converter_.get()) : 0;
|
|
}
|
|
|
|
std::codecvt_base::result codecvt16_facet::do_in(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
const extern_type*& from_next,
|
|
intern_type* to,
|
|
intern_type* to_end,
|
|
intern_type*& to_next
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while converting encoding '%s' to unicode system encoding",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
ucnv_reset(ctx->converter_.get());
|
|
|
|
static_assert(sizeof(UChar) == sizeof(intern_type), "sizeof(UChar) != sizeof(intern_type)");
|
|
ucnv_toUnicode(
|
|
ctx->converter_.get(),
|
|
reinterpret_cast<UChar**>(&to_next),
|
|
reinterpret_cast<const UChar*>(to_end),
|
|
&from_next,
|
|
from_end,
|
|
nullptr,
|
|
true,
|
|
&status
|
|
);
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == status) {
|
|
return std::codecvt_base::partial; // destination buffer is not large enough
|
|
}
|
|
|
|
if (!U_SUCCESS(status)) {
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from locale encoding to UTF16 while converting encoding '%s' unicode system encoding",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
return std::codecvt_base::ok;
|
|
}
|
|
|
|
int codecvt16_facet::do_max_length() const NOEXCEPT {
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while computing maximum number of required input characters from encoding '%s' to produce a single output character",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return -1;
|
|
}
|
|
|
|
return ucnv_getMaxCharSize(ctx->converter_.get());
|
|
}
|
|
|
|
std::codecvt_base::result codecvt16_facet::do_out(
|
|
state_type& state,
|
|
const intern_type* from,
|
|
const intern_type* from_end,
|
|
const intern_type*& from_next,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while converting unicode system encoding to encoding '%s'",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
ucnv_reset(ctx->converter_.get());
|
|
|
|
static_assert(sizeof(UChar) == sizeof(intern_type), "sizeof(UChar) != sizeof(intern_type)");
|
|
ucnv_fromUnicode(
|
|
ctx->converter_.get(),
|
|
&to_next,
|
|
to_end,
|
|
reinterpret_cast<const UChar**>(&from_next),
|
|
reinterpret_cast<const UChar *>(from_end),
|
|
nullptr,
|
|
true,
|
|
&status
|
|
);
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == status) {
|
|
return std::codecvt_base::partial; // destination buffer is not large enough
|
|
}
|
|
|
|
if (!U_SUCCESS(status)) {
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from TF16 to locale encoding while converting unicode system encoding to encoding '%s'",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
return std::codecvt_base::ok;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief converter between an 'internal' utf32 representation and
|
|
/// an 'external' user-specified encoding and an
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
class codecvt32_facet final: public codecvtu_base<char32_t> {
|
|
public:
|
|
MSVC2015_ONLY(static std::locale::id id;) // MSVC2015 requires a static instance of an 'id' member
|
|
MSVC2017_ONLY(static std::locale::id id;) // MSVC2017 requires a static instance of an 'id' member
|
|
codecvt32_facet(converter_pool& converters): codecvtu_base(converters) {}
|
|
|
|
bool append(
|
|
std::basic_string<intern_type>& buf, const icu::UnicodeString& value
|
|
) const;
|
|
|
|
protected:
|
|
virtual int do_encoding() const NOEXCEPT final override;
|
|
virtual std::codecvt_base::result do_in(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
const extern_type*& from_next,
|
|
intern_type* to,
|
|
intern_type* to_end,
|
|
intern_type*& to_next
|
|
) const final override;
|
|
virtual int do_max_length() const NOEXCEPT override;
|
|
virtual std::codecvt_base::result do_out(
|
|
state_type& state,
|
|
const intern_type* from,
|
|
const intern_type* from_end,
|
|
const intern_type*& from_next,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const override;
|
|
};
|
|
|
|
MSVC2015_ONLY(/*static*/ std::locale::id codecvt32_facet::id;) // MSVC2015 requires a static instance of an 'id' member
|
|
MSVC2017_ONLY(/*static*/ std::locale::id codecvt32_facet::id;) // MSVC2017 requires a static instance of an 'id' member
|
|
|
|
bool codecvt32_facet::append(
|
|
std::basic_string<intern_type>& buf, const icu::UnicodeString& value
|
|
) const {
|
|
auto size = value.countChar32();
|
|
|
|
if (size < 0) {
|
|
IR_FRMT_WARN(
|
|
"ICU returned invalid string size while converting unicode string to UTF32"
|
|
);
|
|
|
|
return false;
|
|
}
|
|
|
|
auto start = buf.size();
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
buf.resize(buf.size() + size);
|
|
|
|
static_assert(sizeof(UChar32) == sizeof(intern_type), "sizeof(UChar32) != sizeof(intern_type)");
|
|
auto written =
|
|
value.toUTF32(reinterpret_cast<UChar32*>(&buf[start]), size, status);
|
|
|
|
if (U_SUCCESS(status) && written == size) {
|
|
return true;
|
|
}
|
|
|
|
buf.resize(start);
|
|
|
|
return false;
|
|
}
|
|
|
|
int codecvt32_facet::do_encoding() const NOEXCEPT {
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while computing number of required input characters from encoding '%s' to produce a single output character",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return -1;
|
|
}
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
// the exact number of extern_type characters that correspond to one intern_type character, if constant
|
|
return ucnv_isFixedWidth(ctx->converter_.get(), &status)
|
|
? int(ucnv_getMinCharSize(ctx->converter_.get())) : 0;
|
|
}
|
|
|
|
std::codecvt_base::result codecvt32_facet::do_in(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
const extern_type*& from_next,
|
|
intern_type* to,
|
|
intern_type* to_end,
|
|
intern_type*& to_next
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while converting encoding '%s' to unicode system encoding",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
UChar buf[BUFFER_SIZE];
|
|
auto* buf_end = buf + IRESEARCH_COUNTOF(buf);
|
|
int32_t src_offsets[IRESEARCH_COUNTOF(buf) + 1]; // +1 for end
|
|
int32_t dst_offsets[IRESEARCH_COUNTOF(buf) + 1]; // +1 for end (use same size since always: count of UTF32 chars <= count of UTF16 chars)
|
|
|
|
ucnv_reset(ctx->converter_.get());
|
|
|
|
// convert 'BUFFER_SIZE' at a time
|
|
while (from_next < from_end) {
|
|
auto* buf_next = buf;
|
|
auto* from_next_prev = from_next;
|
|
auto* to_next_prev = to_next;
|
|
UErrorCode src_status = U_ZERO_ERROR;
|
|
UErrorCode dst_status = U_ZERO_ERROR;
|
|
|
|
// convert from desired encoding to the intermediary representation
|
|
ucnv_toUnicode(
|
|
ctx->converter_.get(),
|
|
&buf_next,
|
|
buf_end,
|
|
&from_next,
|
|
from_end,
|
|
src_offsets,
|
|
true,
|
|
&src_status
|
|
);
|
|
|
|
if (!U_SUCCESS(src_status) && U_BUFFER_OVERFLOW_ERROR != src_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from locale encoding to UTF16 while converting encoding '%s' unicode system encoding",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
assert(buf_next >= buf && IRESEARCH_COUNTOF(src_offsets) > size_t(buf_next - buf));
|
|
src_offsets[buf_next - buf] = from_next - from_next_prev; // remember past-end position
|
|
|
|
auto* buf_dst_next = buf;
|
|
auto* buf_dst_end = buf_next;
|
|
|
|
// convert one char at a time to track source position to destination position
|
|
do {
|
|
int32_t to_used = 0;
|
|
|
|
static_assert(sizeof(UChar32) == sizeof(intern_type), "sizeof(UChar32) != sizeof(intern_type)");
|
|
u_strToUTF32(
|
|
reinterpret_cast<UChar32*>(to_next),
|
|
to_end - to_next,
|
|
&to_used, // set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow
|
|
buf_dst_next,
|
|
U_IS_SURROGATE(*buf_dst_next) ? 2 : 1, // 1 char at a time to track source/destination position mapping
|
|
&dst_status
|
|
);
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == dst_status) {
|
|
break; // conversion buffer not large enough to hold result
|
|
}
|
|
|
|
if (U_SUCCESS(dst_status) && to_used < 0) {
|
|
dst_status = U_INTERNAL_PROGRAM_ERROR; // ICU internal error
|
|
}
|
|
|
|
if (!U_SUCCESS(dst_status) && U_BUFFER_OVERFLOW_ERROR != dst_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from UTF16 to UTF32 while converting encoding '%s' to unicode system encoding",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
// all of 'to_used' since if not enough space in 'to' buffer then would have had U_BUFFER_OVERFLOW_ERROR
|
|
while (to_used) {
|
|
assert(to_next >= to_next_prev && IRESEARCH_COUNTOF(dst_offsets) > size_t(to_next - to_next_prev));
|
|
dst_offsets[to_next - to_next_prev] = buf_dst_next - buf; // remember converted position (start)
|
|
++to_next;
|
|
--to_used;
|
|
}
|
|
|
|
buf_dst_next += U_IS_SURROGATE(*buf_dst_next) ? 2 : 1; // +1 for 1 char at a time (+2 for surrogate)
|
|
} while (buf_dst_next + 1 < buf_dst_end); // +1 for possible surrogate
|
|
|
|
assert(to_next >= to_next_prev && IRESEARCH_COUNTOF(dst_offsets) > size_t(to_next - to_next_prev));
|
|
dst_offsets[to_next - to_next_prev] = buf_dst_next - buf; // remember past-end position
|
|
|
|
auto buf_pos = dst_offsets[to_next - to_next_prev];
|
|
|
|
assert(buf_pos >= 0 && IRESEARCH_COUNTOF(src_offsets) > size_t(buf_pos));
|
|
from_next = from_next_prev + src_offsets[buf_pos]; // update successfully converted
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == dst_status
|
|
|| (U_BUFFER_OVERFLOW_ERROR == src_status && from_next >= from_end)) {
|
|
return std::codecvt_base::partial; // destination buffer is not large enough
|
|
}
|
|
}
|
|
|
|
return std::codecvt_base::ok;
|
|
}
|
|
|
|
int codecvt32_facet::do_max_length() const NOEXCEPT {
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while computing maximum number of required input characters from encoding '%s' to produce a single output character",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return -1;
|
|
}
|
|
|
|
auto bytes_per_utf16 = ucnv_getMaxCharSize(ctx->converter_.get());
|
|
|
|
// *2 for UTF16->UTF32 conversion if can't fit each encoded character into a utf16 character
|
|
return bytes_per_utf16 <= 2 ? bytes_per_utf16 : (bytes_per_utf16 * 2);
|
|
}
|
|
|
|
std::codecvt_base::result codecvt32_facet::do_out(
|
|
state_type& state,
|
|
const intern_type* from,
|
|
const intern_type* from_end,
|
|
const intern_type*& from_next,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while converting unicode system encoding to encoding '%s'",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
UChar buf[BUFFER_SIZE];
|
|
auto* buf_end = buf + IRESEARCH_COUNTOF(buf);
|
|
size_t offsets[IRESEARCH_COUNTOF(buf) + 1]; // +1 for end
|
|
|
|
ucnv_reset(ctx->converter_.get());
|
|
|
|
// convert 'BUFFER_SIZE' at a time
|
|
while (from_next < from_end) {
|
|
const UChar* buf_from = buf;
|
|
auto* buf_next = buf;
|
|
auto* from_next_prev = from_next;
|
|
auto* to_next_prev = to_next;
|
|
UErrorCode src_status = U_ZERO_ERROR;
|
|
UErrorCode dst_status = U_ZERO_ERROR;
|
|
|
|
// convert one char at a time to track source position to destination position
|
|
do {
|
|
int32_t buf_used = 0;
|
|
|
|
static_assert(sizeof(UChar32) == sizeof(intern_type), "sizeof(UChar32) != sizeof(intern_type)");
|
|
u_strFromUTF32(
|
|
buf_next,
|
|
buf_end - buf_next,
|
|
&buf_used, // set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow
|
|
reinterpret_cast<const UChar32*>(from_next),
|
|
1, // 1 char at a time to track source/destination position mapping
|
|
&src_status
|
|
);
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == src_status) {
|
|
break; // conversion buffer not large enough to hold result
|
|
}
|
|
|
|
if (U_SUCCESS(src_status) && buf_used < 0) {
|
|
src_status = U_INTERNAL_PROGRAM_ERROR; // ICU internal error
|
|
}
|
|
|
|
if (!U_SUCCESS(src_status)) {
|
|
IR_FRMT_WARN(
|
|
"failure to convert from UTF32 to UTF16 while converting unicode system encoding to encoding '%s'",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
break; // finish copying all successfully converted
|
|
}
|
|
|
|
assert(buf_next >= buf && IRESEARCH_COUNTOF(offsets) > size_t(buf_next - buf));
|
|
offsets[buf_next - buf] = from_next - from; // remember converted position
|
|
buf_next += buf_used;
|
|
++from_next; // +1 for 1 char at a time
|
|
} while (from_next < from_end);
|
|
|
|
assert(buf_next >= buf && IRESEARCH_COUNTOF(offsets) > size_t(buf_next - buf));
|
|
offsets[buf_next - buf] = from_next - from; // remember past-end position
|
|
|
|
// convert intermediary representation to the desired encoding
|
|
ucnv_fromUnicode(
|
|
ctx->converter_.get(),
|
|
&to_next,
|
|
to_end,
|
|
&buf_from,
|
|
buf_next,
|
|
nullptr,
|
|
true,
|
|
&dst_status
|
|
);
|
|
|
|
if (!U_SUCCESS(dst_status) && U_BUFFER_OVERFLOW_ERROR != dst_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from UTF16 to locale encoding while converting unicode system encoding to encoding '%s'",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
assert(buf_from >= buf && IRESEARCH_COUNTOF(offsets) > size_t(buf_from - buf));
|
|
from_next = from + offsets[buf_from - buf]; // update successfully converted
|
|
|
|
if (!U_SUCCESS(src_status) && U_BUFFER_OVERFLOW_ERROR != src_status) {
|
|
return std::codecvt_base::error; // error occured during intermediary conversion
|
|
}
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == dst_status
|
|
|| (U_BUFFER_OVERFLOW_ERROR == src_status && from_next >= from_end)) {
|
|
return std::codecvt_base::partial; // destination buffer is not large enough
|
|
}
|
|
}
|
|
|
|
return std::codecvt_base::ok;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief converter between an 'internal' utf8 representation and
|
|
/// an 'external' user-specified encoding and an
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
class codecvt8u_facet: public codecvtu_base<char> {
|
|
public:
|
|
codecvt8u_facet(converter_pool& converters): codecvtu_base(converters) {}
|
|
|
|
bool append(
|
|
std::basic_string<intern_type>& buf, const icu::UnicodeString& value
|
|
) const;
|
|
|
|
protected:
|
|
virtual int do_encoding() const NOEXCEPT override { return 0; } // only non-zero for ASCII
|
|
virtual std::codecvt_base::result do_in(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
const extern_type*& from_next,
|
|
intern_type* to,
|
|
intern_type* to_end,
|
|
intern_type*& to_next
|
|
) const override;
|
|
virtual int do_max_length() const NOEXCEPT override;
|
|
virtual std::codecvt_base::result do_out(
|
|
state_type& state,
|
|
const intern_type* from,
|
|
const intern_type* from_end,
|
|
const intern_type*& from_next,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const override;
|
|
};
|
|
|
|
bool codecvt8u_facet::append(
|
|
std::basic_string<intern_type>& buf, const icu::UnicodeString& value
|
|
) const {
|
|
struct sink_t: public icu::ByteSink {
|
|
std::basic_string<intern_type>& buf_;
|
|
bool error_;
|
|
sink_t(std::basic_string<intern_type>& buf): buf_(buf), error_(false) {}
|
|
virtual void Append(const char* bytes, int32_t n) override {
|
|
if (n < 0 || error_) {
|
|
error_ = true;
|
|
return;
|
|
}
|
|
|
|
buf_.append(bytes, n);
|
|
}
|
|
} sink(buf);
|
|
auto start = buf.size();
|
|
|
|
value.toUTF8(sink);
|
|
|
|
if (!sink.error_) {
|
|
return true;
|
|
}
|
|
|
|
IR_FRMT_WARN(
|
|
"ICU returned invalid string size while converting unicode string to UTF8"
|
|
);
|
|
|
|
buf.resize(start);
|
|
|
|
return false;
|
|
}
|
|
|
|
std::codecvt_base::result codecvt8u_facet::do_in(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
const extern_type*& from_next,
|
|
intern_type* to,
|
|
intern_type* to_end,
|
|
intern_type*& to_next
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while converting encoding '%s' to unicode system encoding",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
UChar buf[BUFFER_SIZE];
|
|
auto* buf_end = buf + IRESEARCH_COUNTOF(buf);
|
|
int32_t src_offsets[IRESEARCH_COUNTOF(buf) + 1]; // +1 for end
|
|
int32_t dst_offsets[IRESEARCH_COUNTOF(buf) * U8_MAX_LENGTH + 1]; // +1 for end
|
|
|
|
ucnv_reset(ctx->converter_.get());
|
|
|
|
// convert 'BUFFER_SIZE' at a time
|
|
while (from_next < from_end) {
|
|
auto* buf_next = buf;
|
|
auto* from_next_prev = from_next;
|
|
auto* to_next_prev = to_next;
|
|
UErrorCode src_status = U_ZERO_ERROR;
|
|
UErrorCode dst_status = U_ZERO_ERROR;
|
|
|
|
// convert from desired encoding to the intermediary representation
|
|
ucnv_toUnicode(
|
|
ctx->converter_.get(),
|
|
&buf_next,
|
|
buf_end,
|
|
&from_next,
|
|
from_end,
|
|
src_offsets,
|
|
true,
|
|
&src_status
|
|
);
|
|
|
|
if (!U_SUCCESS(src_status) && U_BUFFER_OVERFLOW_ERROR != src_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from locale encoding to UTF16 while converting encoding '%s' unicode system encoding",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
auto buf_pos = buf_next - buf;
|
|
|
|
assert(buf_pos >= 0 && IRESEARCH_COUNTOF(src_offsets) > size_t(buf_pos >= 0));
|
|
src_offsets[buf_pos] = from_next - from_next_prev; // remember past-end position
|
|
|
|
auto* buf_dst_next = buf;
|
|
auto* buf_dst_end = buf_next;
|
|
|
|
// convert one char at a time to track source position to destination position
|
|
do {
|
|
int32_t to_used = 0;
|
|
|
|
u_strToUTF8(
|
|
to_next,
|
|
to_end - to_next,
|
|
&to_used, // set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow
|
|
buf_dst_next,
|
|
U_IS_SURROGATE(*buf_dst_next) ? 2 : 1, // 1 char at a time to track source/destination position mapping
|
|
&dst_status
|
|
);
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == dst_status) {
|
|
break; // conversion buffer not large enough to hold result
|
|
}
|
|
|
|
if (U_SUCCESS(dst_status) && to_used < 0) {
|
|
dst_status = U_INTERNAL_PROGRAM_ERROR; // ICU internal error
|
|
}
|
|
|
|
if (!U_SUCCESS(dst_status) && U_BUFFER_OVERFLOW_ERROR != dst_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from UTF16 to UTF8 while converting encoding '%s' to unicode system encoding",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
// all of 'to_used' since if not enough space in 'to' buffer then would have had U_BUFFER_OVERFLOW_ERROR
|
|
while (to_used) {
|
|
assert(to_next >= to_next_prev && IRESEARCH_COUNTOF(dst_offsets) > size_t(to_next - to_next_prev));
|
|
dst_offsets[to_next - to_next_prev] = buf_dst_next - buf; // remember converted position (start)
|
|
++to_next;
|
|
--to_used;
|
|
}
|
|
|
|
buf_dst_next += U_IS_SURROGATE(*buf_dst_next) ? 2 : 1; // +1 for 1 char at a time (+2 for surrogate)
|
|
} while (buf_dst_next + 1 < buf_dst_end); // +1 for possible surrogate
|
|
|
|
assert(to_next >= to_next_prev && IRESEARCH_COUNTOF(dst_offsets) > size_t(to_next - to_next_prev));
|
|
dst_offsets[to_next - to_next_prev] = buf_dst_next - buf; // remember past-end position
|
|
|
|
auto buf_dst_pos = dst_offsets[to_next - to_next_prev];
|
|
|
|
assert(buf_dst_pos >= 0 && IRESEARCH_COUNTOF(src_offsets) > size_t(buf_dst_pos));
|
|
from_next = from_next_prev + src_offsets[buf_dst_pos]; // update successfully converted
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == dst_status
|
|
|| (U_BUFFER_OVERFLOW_ERROR == src_status && from_next >= from_end)) {
|
|
return std::codecvt_base::partial; // destination buffer is not large enough
|
|
}
|
|
}
|
|
|
|
return std::codecvt_base::ok;
|
|
}
|
|
|
|
int codecvt8u_facet::do_max_length() const NOEXCEPT {
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while computing maximum number of required input characters from encoding '%s' to produce a single output character",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return -1;
|
|
}
|
|
|
|
return ucnv_getMaxCharSize(ctx->converter_.get()); // fo non-ASCII this will produce 2+ UTF8 encoded chars
|
|
}
|
|
|
|
std::codecvt_base::result codecvt8u_facet::do_out(
|
|
state_type& state,
|
|
const intern_type* from,
|
|
const intern_type* from_end,
|
|
const intern_type*& from_next,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while converting unicode system encoding to encoding '%s'",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
UChar buf[BUFFER_SIZE];
|
|
auto* buf_end = buf + IRESEARCH_COUNTOF(buf);
|
|
size_t offsets[IRESEARCH_COUNTOF(buf) + 1]; // +1 for end
|
|
|
|
ucnv_reset(ctx->converter_.get());
|
|
|
|
// convert 'BUFFER_SIZE' at a time
|
|
while (from_next < from_end) {
|
|
auto* buf_next = buf;
|
|
auto* from_next_prev = from_next;
|
|
auto* to_next_prev = to_next;
|
|
UErrorCode src_status = U_ZERO_ERROR;
|
|
UErrorCode dst_status = U_ZERO_ERROR;
|
|
|
|
// convert one char at a time to track source position to destination position
|
|
do {
|
|
size_t from_size = 1;
|
|
int32_t buf_used = 0;
|
|
|
|
if (!U8_IS_SINGLE(*from_next)) {
|
|
// find all the tail UTF8 chars if possible
|
|
for (auto* from_tail = from_next + 1;
|
|
from_tail < from_end && U8_IS_TRAIL(*from_tail);
|
|
++from_tail) {
|
|
++from_size;
|
|
}
|
|
}
|
|
|
|
u_strFromUTF8(
|
|
buf_next,
|
|
buf_end - buf_next,
|
|
&buf_used, // set to the number of output units corresponding to the transformation of all the input units, even in case of a buffer overflow
|
|
from_next,
|
|
from_size, // 1 char at a time to track source/destination position mapping
|
|
&src_status
|
|
);
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == src_status) {
|
|
break; // conversion buffer not large enough to hold result
|
|
}
|
|
|
|
if (U_SUCCESS(src_status) && buf_used < 0) {
|
|
src_status = U_INTERNAL_PROGRAM_ERROR; // ICU internal error
|
|
}
|
|
|
|
if (!U_SUCCESS(src_status)) {
|
|
IR_FRMT_WARN(
|
|
"failure to convert from UTF8 to UTF16 while converting unicode system encoding to encoding '%s'",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
break; // finish copying all successfully converted
|
|
}
|
|
|
|
// all of 'buf_used' since if not enough space in 'buf' buffer then would have had U_BUFFER_OVERFLOW_ERROR
|
|
while (buf_used) {
|
|
assert(buf_next >= buf && IRESEARCH_COUNTOF(offsets) > size_t(buf_next - buf));
|
|
offsets[buf_next - buf] = from_next - from; // remember converted position
|
|
++buf_next;
|
|
--buf_used;
|
|
}
|
|
|
|
from_next += from_size; // +1 for 1 char at a time
|
|
} while (from_next + 3 < from_end); // +3 for possible surrogates
|
|
|
|
auto buf_pos = buf_next - buf;
|
|
|
|
assert(buf_pos >= 0 && IRESEARCH_COUNTOF(offsets) > size_t(buf_pos));
|
|
offsets[buf_pos] = from_next - from_next_prev; // remember past-end position
|
|
|
|
const UChar* buf_dst_next = buf;
|
|
auto* buf_dst_end = buf_next;
|
|
|
|
// convert intermediary representation to the desired encoding
|
|
ucnv_fromUnicode(
|
|
ctx->converter_.get(),
|
|
&to_next,
|
|
to_end,
|
|
&buf_dst_next,
|
|
buf_dst_end,
|
|
nullptr,
|
|
true,
|
|
&dst_status
|
|
);
|
|
|
|
if (!U_SUCCESS(dst_status) && U_BUFFER_OVERFLOW_ERROR != dst_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from UTF16 to locale encoding while converting unicode system encoding to encoding '%s'",
|
|
context_encoding().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
auto buf_dst_pos = buf_dst_next - buf;
|
|
|
|
assert(buf_dst_pos >= 0 && IRESEARCH_COUNTOF(offsets) > size_t(buf_dst_pos));
|
|
from_next = from_next_prev + offsets[buf_dst_pos]; // update successfully converted
|
|
|
|
if (!U_SUCCESS(src_status) && U_BUFFER_OVERFLOW_ERROR != src_status) {
|
|
return std::codecvt_base::error; // error occured during intermediary conversion
|
|
}
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == dst_status
|
|
|| (U_BUFFER_OVERFLOW_ERROR == src_status && from_next >= from_end)) {
|
|
return std::codecvt_base::partial; // destination buffer is not large enough
|
|
}
|
|
}
|
|
|
|
return std::codecvt_base::ok;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief converter between an 'internal' utf8/utf16/uf32 representation,
|
|
/// based on sizeof(wchar_t), and
|
|
/// an 'external' user-specified encoding
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
class codecvtwu_facet: public std::codecvt<wchar_t, char, mbstate_t> {
|
|
public:
|
|
codecvtwu_facet(converter_pool& pool): impl_(pool) {}
|
|
|
|
bool append(
|
|
std::basic_string<intern_type>& buf, const icu::UnicodeString& value
|
|
) const {
|
|
static_assert(sizeof(impl_t::intern_type) == sizeof(intern_type), "sizeof(impl_t::intern_type) != sizeof(intern_type)");
|
|
return impl_.append(
|
|
reinterpret_cast<std::basic_string<impl_t::intern_type>&>(buf),
|
|
value
|
|
);
|
|
}
|
|
|
|
protected:
|
|
virtual bool do_always_noconv() const NOEXCEPT override {
|
|
return impl_.always_noconv();
|
|
}
|
|
|
|
virtual int do_encoding() const NOEXCEPT override {
|
|
return impl_.encoding();
|
|
}
|
|
|
|
virtual std::codecvt_base::result do_in(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
const extern_type*& from_next,
|
|
intern_type* to,
|
|
intern_type* to_end,
|
|
intern_type*& to_next
|
|
) const override {
|
|
static_assert(sizeof(impl_t::intern_type) == sizeof(intern_type), "sizeof(impl_t::intern_type) != sizeof(intern_type)");
|
|
return impl_.in(
|
|
state,
|
|
from,
|
|
from_end,
|
|
from_next,
|
|
reinterpret_cast<impl_t::intern_type*>(to),
|
|
reinterpret_cast<impl_t::intern_type*>(to_end),
|
|
reinterpret_cast<impl_t::intern_type*&>(to_next)
|
|
);
|
|
}
|
|
|
|
virtual int do_length(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
std::size_t max
|
|
) const override {
|
|
return impl_.length(state, from, from_end, max);
|
|
}
|
|
|
|
virtual int do_max_length() const NOEXCEPT override {
|
|
return impl_.max_length();
|
|
}
|
|
|
|
virtual std::codecvt_base::result do_out(
|
|
state_type& state,
|
|
const intern_type* from,
|
|
const intern_type* from_end,
|
|
const intern_type*& from_next,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const override {
|
|
static_assert(sizeof(impl_t::intern_type) == sizeof(intern_type), "sizeof(impl_t::intern_type) != sizeof(intern_type)");
|
|
return impl_.out(
|
|
state,
|
|
reinterpret_cast<const impl_t::intern_type*>(from),
|
|
reinterpret_cast<const impl_t::intern_type*>(from_end),
|
|
reinterpret_cast<const impl_t::intern_type*&>(from_next),
|
|
to,
|
|
to_end,
|
|
to_next
|
|
);
|
|
}
|
|
|
|
virtual std::codecvt_base::result do_unshift(
|
|
state_type& state,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const override {
|
|
return impl_.unshift(state, to, to_end, to_next);
|
|
}
|
|
|
|
private:
|
|
typedef std::conditional<
|
|
sizeof(char32_t) == sizeof(wchar_t),
|
|
codecvt32_facet,
|
|
std::conditional<
|
|
sizeof(char16_t) == sizeof(wchar_t),
|
|
codecvt16_facet,
|
|
std::conditional<
|
|
sizeof(char) == sizeof(wchar_t),
|
|
codecvt8u_facet,
|
|
void
|
|
>::type
|
|
>::type
|
|
>::type impl_t; // unicode implementation depends on sizeof(whcar_t)
|
|
|
|
impl_t impl_;
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief base implementation for converters between 'internal' representation
|
|
/// and an 'external' user-specified encoding (custom internal)
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
template<typename InternType>
|
|
class codecvt_base: public std::codecvt<InternType, char, mbstate_t> {
|
|
public:
|
|
typedef std::codecvt<InternType, char, mbstate_t> parent_t;
|
|
typedef typename parent_t::extern_type extern_type;
|
|
typedef typename parent_t::intern_type intern_type;
|
|
typedef typename parent_t::state_type state_type;
|
|
|
|
codecvt_base(converter_pool& converters_int, converter_pool& converters_ext)
|
|
: contexts_(POOL_SIZE),
|
|
converters_ext_(converters_ext),
|
|
converters_int_(converters_int) {
|
|
}
|
|
|
|
protected:
|
|
struct context_t {
|
|
DECLARE_UNIQUE_PTR(context_t);
|
|
std::basic_string<typename parent_t::intern_type> buf_;
|
|
converter_pool::ptr converter_ext_;
|
|
converter_pool::ptr converter_int_;
|
|
|
|
static ptr make(converter_pool& pool_int, converter_pool& pool_ext) {
|
|
auto ctx = irs::memory::make_unique<context_t>();
|
|
|
|
if (!ctx) {
|
|
return nullptr;
|
|
}
|
|
|
|
ctx->converter_ext_ = pool_ext.get();
|
|
ctx->converter_int_ = pool_int.get();
|
|
|
|
return ctx->converter_ext_ && ctx->converter_int_
|
|
? std::move(ctx) : nullptr;
|
|
}
|
|
};
|
|
typedef irs::unbounded_object_pool<context_t> context_pool;
|
|
|
|
typename context_pool::ptr context() const {
|
|
return contexts_.emplace(converters_int_, converters_ext_);
|
|
}
|
|
|
|
const std::string& context_encoding_ext() const NOEXCEPT {
|
|
return converters_ext_.encoding();
|
|
}
|
|
|
|
const std::string& context_encoding_int() const NOEXCEPT {
|
|
return converters_int_.encoding();
|
|
}
|
|
|
|
virtual bool do_always_noconv() const NOEXCEPT final override {
|
|
return false; // not an identity conversion
|
|
}
|
|
|
|
virtual int do_encoding() const NOEXCEPT override = 0;
|
|
virtual std::codecvt_base::result do_in(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
const extern_type*& from_next,
|
|
intern_type* to,
|
|
intern_type* to_end,
|
|
intern_type*& to_next
|
|
) const override = 0;
|
|
virtual int do_length(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
std::size_t max
|
|
) const final override;
|
|
virtual int do_max_length() const NOEXCEPT override = 0;
|
|
virtual std::codecvt_base::result do_out(
|
|
state_type& state,
|
|
const intern_type* from,
|
|
const intern_type* from_end,
|
|
const intern_type*& from_next,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const override = 0;
|
|
virtual std::codecvt_base::result do_unshift(
|
|
state_type& state,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const final override;
|
|
|
|
private:
|
|
mutable context_pool contexts_;
|
|
converter_pool& converters_ext_;
|
|
converter_pool& converters_int_;
|
|
};
|
|
|
|
template<typename InternType>
|
|
int codecvt_base<InternType>::do_length(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
std::size_t max
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while computing number of required input characters from encoding '%s' to produce at most '" IR_SIZE_T_SPECIFIER "' system encoding '%s' output characters",
|
|
context_encoding_ext().c_str(), max, context_encoding_int().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
ctx->buf_.resize(max);
|
|
|
|
auto* from_next = from;
|
|
auto* to = &(ctx->buf_[0]);
|
|
auto* to_end = to + max;
|
|
auto* to_next = to;
|
|
auto res = do_in(state, from, from_end, from_next, to, to_end, to_next);
|
|
|
|
return res == std::codecvt_base::ok ? std::distance(from, from_next) : 0;
|
|
}
|
|
|
|
template<typename InternType>
|
|
std::codecvt_base::result codecvt_base<InternType>::do_unshift(
|
|
state_type& state,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const {
|
|
to_next = to;
|
|
|
|
return std::codecvt_base::ok;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief converter between an 'internal' 'system' encoding representation and
|
|
/// an 'external' user-specified encoding and an
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
class codecvt8_facet final: public codecvt_base<char> {
|
|
public:
|
|
codecvt8_facet(converter_pool& pool_int, converter_pool& pool_ext)
|
|
: codecvt_base(pool_int, pool_ext) {
|
|
}
|
|
|
|
bool append(
|
|
std::basic_string<intern_type>& buf, const icu::UnicodeString& value
|
|
) const;
|
|
|
|
protected:
|
|
virtual int do_encoding() const NOEXCEPT override;
|
|
virtual std::codecvt_base::result do_in(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
const extern_type*& from_next,
|
|
intern_type* to,
|
|
intern_type* to_end,
|
|
intern_type*& to_next
|
|
) const override;
|
|
virtual int do_max_length() const NOEXCEPT override;
|
|
virtual std::codecvt_base::result do_out(
|
|
state_type& state,
|
|
const intern_type* from,
|
|
const intern_type* from_end,
|
|
const intern_type*& from_next,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const override;
|
|
};
|
|
|
|
|
|
bool codecvt8_facet::append(
|
|
std::basic_string<intern_type>& buf, const icu::UnicodeString& value
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while converting unicode string to encoding '%s'",
|
|
context_encoding_ext().c_str()
|
|
);
|
|
|
|
return false;
|
|
}
|
|
|
|
auto size = value.length();
|
|
|
|
if (size < 0) {
|
|
IR_FRMT_WARN(
|
|
"ICU returned invalid string size while converting unicode string to encoding '%s'",
|
|
context_encoding_ext().c_str()
|
|
);
|
|
|
|
return false;
|
|
}
|
|
|
|
auto* from_next = value.getBuffer();
|
|
auto* from_end = from_next + size;
|
|
auto start = buf.size();
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
ucnv_reset(ctx->converter_ext_.get());
|
|
|
|
// convert 'BUFFER_SIZE' at a time
|
|
do {
|
|
auto offset = buf.size();
|
|
|
|
buf.resize(buf.size() + BUFFER_SIZE);
|
|
|
|
auto* to_next = &buf[offset];
|
|
auto* to_end = to_next + BUFFER_SIZE;
|
|
|
|
status = U_ZERO_ERROR;
|
|
ucnv_fromUnicode(
|
|
ctx->converter_int_.get(),
|
|
&to_next,
|
|
to_end,
|
|
&from_next,
|
|
from_end,
|
|
nullptr,
|
|
true,
|
|
&status
|
|
);
|
|
|
|
if (U_SUCCESS(status)) {
|
|
buf.resize(to_next - &buf[0]); // truncate to actual data size
|
|
|
|
return true;
|
|
}
|
|
} while (status == U_BUFFER_OVERFLOW_ERROR);
|
|
|
|
IR_FRMT_WARN(
|
|
"failure while converting unicode string to encoding '%s'",
|
|
context_encoding_ext().c_str()
|
|
);
|
|
|
|
buf.resize(start); // truncate all appended data
|
|
|
|
return false;
|
|
}
|
|
|
|
int codecvt8_facet::do_encoding() const NOEXCEPT {
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while computing number of required input characters from encoding '%s' to produce a single system encoding '%s' output character",
|
|
context_encoding_ext().c_str(), context_encoding_int().c_str()
|
|
);
|
|
|
|
return -1;
|
|
}
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
// the exact number of externT characters that correspond to one internT character, if constant
|
|
return ucnv_isFixedWidth(ctx->converter_ext_.get(), &status)
|
|
&& ucnv_isFixedWidth(ctx->converter_int_.get(), &status)
|
|
? (std::max)(
|
|
ucnv_getMinCharSize(ctx->converter_ext_.get()),
|
|
ucnv_getMinCharSize(ctx->converter_int_.get())
|
|
)
|
|
: 0
|
|
;
|
|
}
|
|
|
|
std::codecvt_base::result codecvt8_facet::do_in(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
const extern_type*& from_next,
|
|
intern_type* to,
|
|
intern_type* to_end,
|
|
intern_type*& to_next
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while converting encoding '%s' to system encoding '%s'",
|
|
context_encoding_ext().c_str(), context_encoding_int().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
UChar buf[BUFFER_SIZE];
|
|
auto* buf_end = buf + IRESEARCH_COUNTOF(buf);
|
|
int32_t offsets[IRESEARCH_COUNTOF(buf) + 1]; // +1 for end
|
|
|
|
ucnv_reset(ctx->converter_ext_.get());
|
|
ucnv_reset(ctx->converter_int_.get());
|
|
|
|
// convert 'BUFFER_SIZE' at a time
|
|
while (from_next < from_end) {
|
|
auto* buf_next = buf;
|
|
auto* from_next_prev = from_next;
|
|
auto* to_next_prev = to_next;
|
|
UErrorCode src_status = U_ZERO_ERROR;
|
|
UErrorCode dst_status = U_ZERO_ERROR;
|
|
|
|
// convert from desired encoding to the intermediary representation
|
|
ucnv_toUnicode(
|
|
ctx->converter_ext_.get(),
|
|
&buf_next,
|
|
buf_end,
|
|
&from_next,
|
|
from_end,
|
|
offsets,
|
|
true,
|
|
&src_status
|
|
);
|
|
|
|
if (!U_SUCCESS(src_status) && U_BUFFER_OVERFLOW_ERROR != src_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from locale encoding to UTF16 while converting encoding '%s' to system encoding '%s'",
|
|
context_encoding_ext().c_str(), context_encoding_int().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
auto buf_pos = buf_next - buf;
|
|
|
|
assert(buf_pos >= 0 && IRESEARCH_COUNTOF(offsets) > size_t(buf_pos));
|
|
offsets[buf_pos] = from_next - from_next_prev; // remember past-end position
|
|
|
|
const UChar* buf_dst_next = buf;
|
|
auto* buf_dst_end = buf_next;
|
|
|
|
// convert from intermediary representation to the internal encoding
|
|
ucnv_fromUnicode(
|
|
ctx->converter_int_.get(),
|
|
&to_next,
|
|
to_end,
|
|
&buf_dst_next,
|
|
buf_dst_end,
|
|
nullptr,
|
|
true,
|
|
&dst_status
|
|
);
|
|
|
|
if (!U_SUCCESS(dst_status) && U_BUFFER_OVERFLOW_ERROR != dst_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from UTF16 to internal encoding while converting encoding '%s' to system encoding '%s'",
|
|
context_encoding_ext().c_str(), context_encoding_int().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
auto buf_dst_pos = buf_dst_next - buf;
|
|
|
|
assert(buf_dst_pos >= 0 && IRESEARCH_COUNTOF(offsets) > size_t(buf_dst_pos));
|
|
from_next = from_next_prev + offsets[buf_dst_pos]; // update successfully converted
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == dst_status
|
|
|| (U_BUFFER_OVERFLOW_ERROR == src_status && from_next >= from_end)) {
|
|
return std::codecvt_base::partial; // destination buffer is not large enough
|
|
}
|
|
}
|
|
|
|
return std::codecvt_base::ok;
|
|
}
|
|
|
|
int codecvt8_facet::do_max_length() const NOEXCEPT {
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while computing maximum number of required input characters from encoding '%s' to produce a single system encoding '%s' output character",
|
|
context_encoding_ext().c_str(), context_encoding_int().c_str()
|
|
);
|
|
|
|
return -1;
|
|
}
|
|
|
|
return (std::max)(
|
|
ucnv_getMaxCharSize(ctx->converter_ext_.get()),
|
|
ucnv_getMaxCharSize(ctx->converter_int_.get())
|
|
);
|
|
}
|
|
|
|
std::codecvt_base::result codecvt8_facet::do_out(
|
|
state_type& state,
|
|
const intern_type* from,
|
|
const intern_type* from_end,
|
|
const intern_type*& from_next,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while converting system encoding '%s' to encoding '%s'",
|
|
context_encoding_int().c_str(), context_encoding_ext().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
UChar buf[BUFFER_SIZE];
|
|
auto* buf_end = buf + IRESEARCH_COUNTOF(buf);
|
|
int32_t offsets[IRESEARCH_COUNTOF(buf) + 1]; // +1 for end
|
|
|
|
ucnv_reset(ctx->converter_ext_.get());
|
|
ucnv_reset(ctx->converter_int_.get());
|
|
|
|
// convert 'BUFFER_SIZE' at a time
|
|
while (from_next < from_end) {
|
|
auto* buf_next = buf;
|
|
auto* from_next_prev = from_next;
|
|
auto* to_next_prev = to_next;
|
|
UErrorCode src_status = U_ZERO_ERROR;
|
|
UErrorCode dst_status = U_ZERO_ERROR;
|
|
|
|
// convert from desired encoding to the intermediary representation
|
|
ucnv_toUnicode(
|
|
ctx->converter_int_.get(),
|
|
&buf_next,
|
|
buf_end,
|
|
&from_next,
|
|
from_end,
|
|
offsets,
|
|
true,
|
|
&src_status
|
|
);
|
|
|
|
if (!U_SUCCESS(src_status) && U_BUFFER_OVERFLOW_ERROR != src_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from locale encoding to UTF16 while converting system encoding '%s' to encoding '%s'",
|
|
context_encoding_int().c_str(), context_encoding_ext().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
auto buf_pos = buf_next - buf;
|
|
|
|
assert(buf_pos >= 0 && IRESEARCH_COUNTOF(offsets) > size_t(buf_pos));
|
|
offsets[buf_pos] = from_next - from_next_prev; // remember past-end position
|
|
|
|
const UChar* buf_dst_next = buf;
|
|
auto* buf_dst_end = buf_next;
|
|
|
|
// convert intermediary representation to the desired encoding
|
|
ucnv_fromUnicode(
|
|
ctx->converter_ext_.get(),
|
|
&to_next,
|
|
to_end,
|
|
&buf_dst_next,
|
|
buf_dst_end,
|
|
nullptr,
|
|
true,
|
|
&dst_status
|
|
);
|
|
|
|
if (!U_SUCCESS(dst_status) && U_BUFFER_OVERFLOW_ERROR != dst_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from UTF16 to locale encoding while converting system encoding '%s' to encoding '%s'",
|
|
context_encoding_int().c_str(), context_encoding_ext().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
auto buf_dst_pos = buf_dst_next - buf;
|
|
|
|
assert(buf_dst_pos >= 0 && IRESEARCH_COUNTOF(offsets) > size_t(buf_dst_pos));
|
|
from_next = from_next_prev + offsets[buf_dst_pos]; // update successfully converted
|
|
|
|
if (!U_SUCCESS(src_status) && U_BUFFER_OVERFLOW_ERROR != src_status) {
|
|
return std::codecvt_base::error; // error occured during intermediary conversion
|
|
}
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == dst_status
|
|
|| (U_BUFFER_OVERFLOW_ERROR == src_status && from_next >= from_end)) {
|
|
return std::codecvt_base::partial; // destination buffer is not large enough
|
|
}
|
|
}
|
|
|
|
return std::codecvt_base::ok;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief converter between an 'internal' 'system' encoding representation and
|
|
/// an 'external' user-specified encoding and an
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
class codecvtw_facet final: public codecvt_base<wchar_t> {
|
|
public:
|
|
codecvtw_facet(converter_pool& pool_int, converter_pool& pool_ext)
|
|
: codecvt_base(pool_int, pool_ext) {
|
|
}
|
|
|
|
bool append(
|
|
std::basic_string<intern_type>& buf, const icu::UnicodeString& value
|
|
) const;
|
|
|
|
protected:
|
|
virtual int do_encoding() const NOEXCEPT override;
|
|
virtual std::codecvt_base::result do_in(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
const extern_type*& from_next,
|
|
intern_type* to,
|
|
intern_type* to_end,
|
|
intern_type*& to_next
|
|
) const override;
|
|
virtual int do_max_length() const NOEXCEPT override;
|
|
virtual std::codecvt_base::result do_out(
|
|
state_type& state,
|
|
const intern_type* from,
|
|
const intern_type* from_end,
|
|
const intern_type*& from_next,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const override;
|
|
};
|
|
|
|
bool codecvtw_facet::append(
|
|
std::basic_string<intern_type>& buf, const icu::UnicodeString& value
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while converting unicode string to encoding '%s'",
|
|
context_encoding_ext().c_str()
|
|
);
|
|
|
|
return false;
|
|
}
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
const auto char_size =
|
|
size_t((std::max)(int8_t(1), ucnv_getMinCharSize(ctx->converter_int_.get())));
|
|
|
|
// cannot support conversion to variable-width system encoding since no way to
|
|
// determine internal char size
|
|
// cannot support conversion of fixed-width system encoding where
|
|
// sizeof(intern_type) < sizeof(<internal char>)
|
|
if (!ucnv_isFixedWidth(ctx->converter_int_.get(), &status)
|
|
|| sizeof(intern_type) < char_size) {
|
|
IR_FRMT_WARN(
|
|
"unsupported encoding while converting unicode string encoding '%s'",
|
|
context_encoding_ext().c_str()
|
|
);
|
|
|
|
return false;
|
|
}
|
|
|
|
auto size = value.length();
|
|
|
|
if (size < 0) {
|
|
IR_FRMT_WARN(
|
|
"ICU returned invalid string size while converting unicode string to encoding '%s'",
|
|
context_encoding_ext().c_str()
|
|
);
|
|
|
|
return false;
|
|
}
|
|
|
|
auto* from_next = value.getBuffer();
|
|
auto* from_end = from_next + size;
|
|
auto start = buf.size();
|
|
|
|
ucnv_reset(ctx->converter_ext_.get());
|
|
|
|
// convert 'BUFFER_SIZE' at a time
|
|
do {
|
|
auto offset = buf.size();
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
buf.resize(buf.size() + BUFFER_SIZE);
|
|
|
|
auto* to_next = &buf[offset];
|
|
auto* to_end = to_next + BUFFER_SIZE;// * sizeof(intern_type);
|
|
static_assert(sizeof(char) == 1, "sizeof(char) != 1"); // otherwise have to divide lower
|
|
|
|
// convert from intermediary representation to the internal encoding
|
|
if (sizeof(intern_type) == char_size) {
|
|
auto* buf_to_next = reinterpret_cast<char*>(to_next);
|
|
auto* buf_to_end = reinterpret_cast<char*>(to_next + BUFFER_SIZE);
|
|
|
|
ucnv_fromUnicode(
|
|
ctx->converter_ext_.get(),
|
|
&buf_to_next,
|
|
buf_to_end,
|
|
&from_next,
|
|
from_end,
|
|
nullptr,
|
|
true,
|
|
&status
|
|
);
|
|
} else {
|
|
intern_type ch = 0;
|
|
auto* buf_to = reinterpret_cast<char*>(&ch) + (sizeof(intern_type) - char_size);
|
|
auto* buf_to_end = reinterpret_cast<char*>(&ch + 1); // +1 for char after buf
|
|
|
|
// convert one char at a time and left pad with 0's
|
|
while (to_next < to_end) {
|
|
auto* buf_to_next = buf_to;
|
|
|
|
status = U_ZERO_ERROR;
|
|
ucnv_fromUnicode(
|
|
ctx->converter_int_.get(),
|
|
&buf_to_next,
|
|
buf_to_end,
|
|
&from_next,
|
|
from_end,
|
|
nullptr,
|
|
true,
|
|
&status
|
|
);
|
|
|
|
if (!U_SUCCESS(status) && U_BUFFER_OVERFLOW_ERROR != status) {
|
|
break;
|
|
}
|
|
|
|
*to_next = ch; // copy over char
|
|
++to_next;
|
|
ch = 0;
|
|
|
|
if (U_SUCCESS(status)) {
|
|
break; // nothing was converted from source to destination
|
|
}
|
|
}
|
|
}
|
|
|
|
if (U_SUCCESS(status)) {
|
|
buf.resize(to_next - &buf[0]); // truncate to actual data size
|
|
|
|
return true;
|
|
}
|
|
} while (status == U_BUFFER_OVERFLOW_ERROR);
|
|
|
|
IR_FRMT_WARN(
|
|
"failure while converting unicode string to encoding '%s'",
|
|
context_encoding_ext().c_str()
|
|
);
|
|
|
|
buf.resize(start); // truncate all appended data
|
|
|
|
return false;
|
|
}
|
|
|
|
int codecvtw_facet::do_encoding() const NOEXCEPT {
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while computing number of required input characters from encoding '%s' to produce a single system encoding '%s' output character",
|
|
context_encoding_ext().c_str(), context_encoding_int().c_str()
|
|
);
|
|
|
|
return -1;
|
|
}
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
// the exact number of externT characters that correspond to one internT character, if constant
|
|
return ucnv_isFixedWidth(ctx->converter_ext_.get(), &status)
|
|
? ucnv_getMinCharSize(ctx->converter_ext_.get()) : 0;
|
|
}
|
|
|
|
std::codecvt_base::result codecvtw_facet::do_in(
|
|
state_type& state,
|
|
const extern_type* from,
|
|
const extern_type* from_end,
|
|
const extern_type*& from_next,
|
|
intern_type* to,
|
|
intern_type* to_end,
|
|
intern_type*& to_next
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while converting encoding '%s' to system encoding '%s'",
|
|
context_encoding_ext().c_str(), context_encoding_int().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
const auto char_size =
|
|
size_t((std::max)(int8_t(1), ucnv_getMinCharSize(ctx->converter_int_.get())));
|
|
|
|
// cannot support conversion to variable-width system encoding since no way to
|
|
// determine internal char size
|
|
// cannot support conversion of fixed-width system encoding where
|
|
// sizeof(intern_type) < sizeof(<internal char>)
|
|
if (!ucnv_isFixedWidth(ctx->converter_int_.get(), &status)
|
|
|| sizeof(intern_type) < char_size) {
|
|
IR_FRMT_WARN(
|
|
"unsupported encoding while converting encoding '%s' to system encoding '%s'",
|
|
context_encoding_ext().c_str(), context_encoding_int().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
UChar buf[BUFFER_SIZE];
|
|
auto* buf_end = buf + IRESEARCH_COUNTOF(buf);
|
|
int32_t offsets[IRESEARCH_COUNTOF(buf) + 1]; // +1 for end
|
|
|
|
ucnv_reset(ctx->converter_ext_.get());
|
|
ucnv_reset(ctx->converter_int_.get());
|
|
|
|
// convert 'BUFFER_SIZE' at a time
|
|
while (from_next < from_end) {
|
|
auto* buf_next = buf;
|
|
auto* from_next_prev = from_next;
|
|
auto* to_next_prev = to_next;
|
|
UErrorCode src_status = U_ZERO_ERROR;
|
|
UErrorCode dst_status = U_ZERO_ERROR;
|
|
|
|
// convert from desired encoding to the intermediary representation
|
|
ucnv_toUnicode(
|
|
ctx->converter_ext_.get(),
|
|
&buf_next,
|
|
buf_end,
|
|
&from_next,
|
|
from_end,
|
|
offsets,
|
|
true,
|
|
&src_status
|
|
);
|
|
|
|
if (!U_SUCCESS(src_status) && U_BUFFER_OVERFLOW_ERROR != src_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from locale encoding to UTF16 while converting encoding '%s' to system encoding '%s'",
|
|
context_encoding_ext().c_str(), context_encoding_int().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
auto buf_pos = buf_next - buf;
|
|
|
|
assert(buf_pos >= 0 && IRESEARCH_COUNTOF(offsets) > size_t(buf_pos));
|
|
offsets[buf_pos] = from_next - from_next_prev; // remember past-end position
|
|
|
|
const UChar* buf_dst_next = buf;
|
|
auto* buf_dst_end = buf_next;
|
|
static_assert(sizeof(char) == 1, "sizeof(char) != 1"); // otherwise have to divide lower
|
|
|
|
// convert from intermediary representation to the internal encoding
|
|
if (sizeof(intern_type) == char_size) {
|
|
auto* buf_to_next = reinterpret_cast<char*>(to_next);
|
|
auto* buf_to_end =
|
|
buf_to_next + std::distance(to_next, to_end) * sizeof(intern_type);
|
|
|
|
ucnv_fromUnicode(
|
|
ctx->converter_int_.get(),
|
|
&buf_to_next,
|
|
buf_to_end,
|
|
&buf_dst_next,
|
|
buf_dst_end,
|
|
nullptr,
|
|
true,
|
|
&dst_status
|
|
);
|
|
} else {
|
|
intern_type ch = 0;
|
|
auto* buf_to = reinterpret_cast<char*>(&ch) + (sizeof(intern_type) - char_size);
|
|
auto* buf_to_end = reinterpret_cast<char*>(&ch + 1); // +1 for char after buf
|
|
|
|
// convert one char at a time and left pad with 0's
|
|
while (to_next < to_end) {
|
|
auto* buf_to_next = buf_to;
|
|
|
|
dst_status = U_ZERO_ERROR;
|
|
ucnv_fromUnicode(
|
|
ctx->converter_int_.get(),
|
|
&buf_to_next,
|
|
buf_to_end,
|
|
&buf_dst_next,
|
|
buf_dst_end,
|
|
nullptr,
|
|
true,
|
|
&dst_status
|
|
);
|
|
|
|
if (!U_SUCCESS(dst_status) && U_BUFFER_OVERFLOW_ERROR != dst_status) {
|
|
break;
|
|
}
|
|
|
|
*to_next = ch; // copy over char
|
|
++to_next;
|
|
ch = 0;
|
|
|
|
if (U_SUCCESS(dst_status)) {
|
|
break; // nothing was converted from source to destination
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!U_SUCCESS(dst_status) && U_BUFFER_OVERFLOW_ERROR != dst_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from UTF16 to internal encoding while converting encoding '%s' to system encoding '%s'",
|
|
context_encoding_ext().c_str(), context_encoding_int().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
auto buf_dst_pos = buf_dst_next - buf;
|
|
|
|
assert(buf_dst_pos >= 0 && IRESEARCH_COUNTOF(offsets) > size_t(buf_dst_pos));
|
|
from_next = from_next_prev + offsets[buf_dst_pos]; // update successfully converted
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == dst_status
|
|
|| (U_BUFFER_OVERFLOW_ERROR == src_status && from_next >= from_end)) {
|
|
return std::codecvt_base::partial; // destination buffer is not large enough
|
|
}
|
|
}
|
|
|
|
return std::codecvt_base::ok;
|
|
}
|
|
|
|
int codecvtw_facet::do_max_length() const NOEXCEPT {
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while computing maximum number of required input characters from encoding '%s' to produce a single system encoding '%s' output character",
|
|
context_encoding_ext().c_str(), context_encoding_int().c_str()
|
|
);
|
|
|
|
return -1;
|
|
}
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
const auto char_size =
|
|
size_t((std::max)(int8_t(1), ucnv_getMinCharSize(ctx->converter_int_.get())));
|
|
|
|
// cannot support conversion to variable-width system encoding since no way to
|
|
// determine internal char size
|
|
// cannot support conversion of fixed-width system encoding where
|
|
// sizeof(intern_type) < sizeof(<internal char>)
|
|
if (!ucnv_isFixedWidth(ctx->converter_int_.get(), &status)
|
|
|| sizeof(intern_type) < char_size) {
|
|
IR_FRMT_WARN(
|
|
"unsupported encoding while computing maximum number of required input characters from encoding '%s' to produce a single system encoding '%s' output character",
|
|
context_encoding_ext().c_str(), context_encoding_int().c_str()
|
|
);
|
|
|
|
return -1;
|
|
}
|
|
|
|
return ucnv_getMaxCharSize(ctx->converter_ext_.get());
|
|
}
|
|
|
|
std::codecvt_base::result codecvtw_facet::do_out(
|
|
state_type& state,
|
|
const intern_type* from,
|
|
const intern_type* from_end,
|
|
const intern_type*& from_next,
|
|
extern_type* to,
|
|
extern_type* to_end,
|
|
extern_type*& to_next
|
|
) const {
|
|
auto ctx = context();
|
|
|
|
from_next = from;
|
|
to_next = to;
|
|
|
|
if (!ctx) {
|
|
IR_FRMT_WARN(
|
|
"failure to get conversion context while converting system encoding '%s' to encoding '%s'",
|
|
context_encoding_int().c_str(), context_encoding_ext().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
const auto char_size =
|
|
size_t((std::max)(int8_t(1), ucnv_getMinCharSize(ctx->converter_int_.get())));
|
|
|
|
// cannot support conversion from variable-width system encoding since no way
|
|
// to determine internal char size
|
|
// cannot support conversion of fixed-width system encoding where
|
|
// sizeof(intern_type) < sizeof(<internal char>)
|
|
if (!ucnv_isFixedWidth(ctx->converter_int_.get(), &status)
|
|
|| sizeof(intern_type) < char_size) {
|
|
IR_FRMT_WARN(
|
|
"unsupported encoding while converting system encoding '%s' to encoding '%s'",
|
|
context_encoding_int().c_str(), context_encoding_ext().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error;
|
|
}
|
|
|
|
UChar buf[BUFFER_SIZE];
|
|
auto* buf_end = buf + IRESEARCH_COUNTOF(buf);
|
|
int32_t offsets[IRESEARCH_COUNTOF(buf) + 1]; // +1 for end
|
|
|
|
ucnv_reset(ctx->converter_ext_.get());
|
|
ucnv_reset(ctx->converter_int_.get());
|
|
|
|
// convert 'BUFFER_SIZE' at a time
|
|
while (from_next < from_end) {
|
|
auto* buf_next = buf;
|
|
auto* from_next_prev = from_next;
|
|
auto* to_next_prev = to_next;
|
|
UErrorCode src_status = U_ZERO_ERROR;
|
|
UErrorCode dst_status = U_ZERO_ERROR;
|
|
|
|
static_assert(sizeof(char) == 1, "sizeof(char) != 1"); // otherwise have to divide lower
|
|
|
|
// convert from the internal encoding to the intermediary representation
|
|
if (sizeof(intern_type) == char_size) {
|
|
auto* buf_from_next = reinterpret_cast<const char*>(from_next);
|
|
auto* buf_from_end =
|
|
buf_from_next + std::distance(from_next, from_end) * sizeof(intern_type);
|
|
|
|
src_status = U_ZERO_ERROR;
|
|
ucnv_toUnicode(
|
|
ctx->converter_int_.get(),
|
|
&buf_next,
|
|
buf_end,
|
|
&buf_from_next,
|
|
buf_from_end,
|
|
offsets,
|
|
true,
|
|
&src_status
|
|
);
|
|
} else {
|
|
// convert one char at a time
|
|
do {
|
|
auto* buf_from = reinterpret_cast<const char*>(from_next) + (sizeof(intern_type) - char_size);
|
|
auto* buf_from_end = reinterpret_cast<const char*>(from_next + 1); // +1 for char after buf
|
|
auto* buf_next_start = buf_next;
|
|
auto* buf_from_next = buf_from;
|
|
|
|
src_status = U_ZERO_ERROR;
|
|
ucnv_toUnicode(
|
|
ctx->converter_int_.get(),
|
|
&buf_next,
|
|
buf_end,
|
|
&buf_from_next,
|
|
buf_from_end,
|
|
nullptr,
|
|
true,
|
|
&src_status
|
|
);
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == src_status) {
|
|
break; // conversion buffer not large enough to hold result
|
|
}
|
|
|
|
if (!U_SUCCESS(src_status)) {
|
|
IR_FRMT_WARN(
|
|
"failure to convert from system encoding to UTF16 while converting system encoding '%s' to encoding '%s'",
|
|
context_encoding_int().c_str(), context_encoding_ext().c_str()
|
|
);
|
|
|
|
break; // finish copying all successfully converted
|
|
}
|
|
|
|
assert(buf_next >= buf && IRESEARCH_COUNTOF(offsets) > size_t(buf_next - buf));
|
|
|
|
while(buf_next_start < buf_next) {
|
|
offsets[buf_next_start - buf] = from_next - from; // remember converted position
|
|
++buf_next_start;
|
|
}
|
|
|
|
++from_next; // +1 for 1 char at a time
|
|
} while(from_next < from_end);
|
|
}
|
|
|
|
if (!U_SUCCESS(src_status) && U_BUFFER_OVERFLOW_ERROR != src_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from system encoding to UTF16 while converting system encoding '%s' to encoding '%s'",
|
|
context_encoding_int().c_str(), context_encoding_ext().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
auto buf_pos = buf_next - buf;
|
|
|
|
assert(buf_pos >= 0 && IRESEARCH_COUNTOF(offsets) > size_t(buf_pos));
|
|
offsets[buf_pos] = from_next - from_next_prev; // remember past-end position
|
|
|
|
const UChar* buf_dst_next = buf;
|
|
auto* buf_dst_end = buf_next;
|
|
|
|
// convert intermediary representation to the desired encoding
|
|
ucnv_fromUnicode(
|
|
ctx->converter_ext_.get(),
|
|
&to_next,
|
|
to_end,
|
|
&buf_dst_next,
|
|
buf_dst_end,
|
|
nullptr,
|
|
true,
|
|
&dst_status
|
|
);
|
|
|
|
if (!U_SUCCESS(dst_status) && U_BUFFER_OVERFLOW_ERROR != dst_status) {
|
|
from_next = from_next_prev;
|
|
to_next = to_next_prev;
|
|
|
|
IR_FRMT_WARN(
|
|
"failure to convert from UTF16 to locale encoding while converting system encoding '%s' to encoding '%s'",
|
|
context_encoding_int().c_str(), context_encoding_ext().c_str()
|
|
);
|
|
|
|
return std::codecvt_base::error; // error occured during final conversion
|
|
}
|
|
|
|
auto buf_dst_pos = buf_dst_next - buf;
|
|
|
|
assert(buf_dst_pos >= 0 && IRESEARCH_COUNTOF(offsets) > size_t(buf_dst_pos));
|
|
from_next = from_next_prev + offsets[buf_dst_pos]; // update successfully converted
|
|
|
|
if (!U_SUCCESS(src_status) && U_BUFFER_OVERFLOW_ERROR != src_status) {
|
|
return std::codecvt_base::error; // error occured during intermediary conversion
|
|
}
|
|
|
|
if (U_BUFFER_OVERFLOW_ERROR == dst_status
|
|
|| (U_BUFFER_OVERFLOW_ERROR == src_status && from_next >= from_end)) {
|
|
return std::codecvt_base::partial; // destination buffer is not large enough
|
|
}
|
|
}
|
|
|
|
return std::codecvt_base::ok;
|
|
}
|
|
|
|
class collate_facet: public std::collate<char> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class collatew_facet: public std::collate<wchar_t> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class ctype_facet: public std::ctype<char> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class ctypew_facet: public std::ctype<wchar_t> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class money_get_facet: public std::money_get<char> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class money_getw_facet: public std::money_get<wchar_t> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class money_put_facet: public std::money_put<char> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class money_putw_facet: public std::money_put<wchar_t> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class moneypunct_facet: public std::moneypunct<char> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class moneypunctintl_facet: public std::moneypunct<char, true> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class moneypunctw_facet: public std::moneypunct<wchar_t> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class moneypunctwintl_facet: public std::moneypunct<wchar_t, true> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class num_get_facet: public std::num_get<char> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class num_getw_facet: public std::num_get<wchar_t> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
template<typename CharType, typename CvtType>
|
|
class num_put_facet: public std::num_put<CharType> {
|
|
public:
|
|
typedef typename std::num_put<CharType>::char_type char_type;
|
|
typedef typename std::num_put<CharType>::iter_type iter_type;
|
|
|
|
num_put_facet(const icu::Locale& locale, const CvtType& converter)
|
|
: contexts_(POOL_SIZE), converter_(converter), locale_(locale) {
|
|
}
|
|
|
|
protected:
|
|
virtual iter_type do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, bool value
|
|
) const override;
|
|
virtual iter_type do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, long value
|
|
) const override;
|
|
virtual iter_type do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, long long value
|
|
) const override;
|
|
virtual iter_type do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, unsigned long value
|
|
) const override;
|
|
virtual iter_type do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, unsigned long long value
|
|
) const override;
|
|
virtual iter_type do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, double value
|
|
) const override;
|
|
virtual iter_type do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, long double value
|
|
) const override;
|
|
virtual iter_type do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, const void* value
|
|
) const override;
|
|
|
|
private:
|
|
struct context_t {
|
|
DECLARE_UNIQUE_PTR(context_t);
|
|
std::basic_string<char_type> buf_;
|
|
UnicodeString icu_buf0_;
|
|
UnicodeString icu_buf1_;
|
|
std::unique_ptr<icu::NumberFormat> regular_;
|
|
std::unique_ptr<icu::NumberFormat> scientific_; // uppercase (instead of mixed case by default)
|
|
|
|
static ptr make(const icu::Locale& locale) {
|
|
auto ctx = irs::memory::make_unique<context_t>();
|
|
|
|
if (!ctx) {
|
|
return nullptr;
|
|
}
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
ctx->regular_.reset(icu::NumberFormat::createInstance(locale, status));
|
|
|
|
if (!U_SUCCESS(status) && !ctx->regular_) {
|
|
return nullptr;
|
|
}
|
|
|
|
// at least on ICU v55/v57/v59 createScientificInstance(...) will create different,
|
|
// (even on the same version but different hosts) and mostly incorrct formats,
|
|
// e.g. incorrect decimal or exponent precision
|
|
// hence use createInstance(...) and cast to DecimalFormat as per ICU documentation
|
|
ctx->scientific_.reset(icu::NumberFormat::createInstance(locale, status));
|
|
|
|
if (!U_SUCCESS(status) && !ctx->scientific_) {
|
|
return nullptr;
|
|
}
|
|
|
|
auto* decimal = dynamic_cast<icu::DecimalFormat*>(ctx->scientific_.get());
|
|
|
|
if (!decimal) {
|
|
return nullptr; // can't set to scientific
|
|
}
|
|
|
|
decimal->setScientificNotation(true);
|
|
|
|
// uppercase (instead of mixed case with UDisplayContext::UDISPCTX_CAPITALIZATION_NONE)
|
|
ctx->scientific_->setContext(UDisplayContext::UDISPCTX_CAPITALIZATION_FOR_STANDALONE, status);
|
|
|
|
if (!U_SUCCESS(status)) {
|
|
return nullptr;
|
|
}
|
|
|
|
return std::move(ctx);
|
|
}
|
|
|
|
void reset(const std::ios_base& str) {
|
|
auto grouping =
|
|
!std::use_facet<std::numpunct<char_type>>(str.getloc()).grouping().empty();
|
|
|
|
buf_.clear();
|
|
icu_buf0_.truncate(0);
|
|
icu_buf1_.truncate(0);
|
|
regular_->setGroupingUsed(grouping);
|
|
regular_->setMinimumFractionDigits(0);
|
|
regular_->setMaximumFractionDigits(0);
|
|
scientific_->setGroupingUsed(grouping);
|
|
scientific_->setMinimumFractionDigits(0);
|
|
scientific_->setMaximumFractionDigits(0);
|
|
}
|
|
};
|
|
|
|
typedef irs::unbounded_object_pool<context_t> context_pool;
|
|
|
|
mutable context_pool contexts_;
|
|
const CvtType& converter_;
|
|
icu::Locale locale_;
|
|
|
|
typename context_pool::ptr context() const {
|
|
return contexts_.emplace(locale_);
|
|
}
|
|
|
|
template<typename T>
|
|
static iter_type do_put_float_hex(
|
|
iter_type out, std::ios_base& str, char_type fill, T value
|
|
);
|
|
|
|
template<typename T>
|
|
static iter_type do_put_int_hex(
|
|
iter_type out, std::ios_base& str, char_type fill, T value, bool full_width
|
|
);
|
|
|
|
template<typename T>
|
|
static iter_type do_put_int_oct(
|
|
iter_type out, std::ios_base& str, char_type fill, T value
|
|
);
|
|
|
|
static iter_type do_put_int_zero(
|
|
iter_type out, std::ios_base& str, char_type fill
|
|
);
|
|
};
|
|
|
|
template<typename CharType, typename CvtType>
|
|
typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, CvtType>::do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, bool value
|
|
) const {
|
|
if (!(str.flags() & std::ios_base::boolalpha)) {
|
|
return do_put(out, str, fill, long(value));
|
|
}
|
|
|
|
auto val = value
|
|
? std::use_facet<std::numpunct<char_type>>(str.getloc()).truename()
|
|
: std::use_facet<std::numpunct<char_type>>(str.getloc()).falsename()
|
|
;
|
|
auto rpad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::left
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto lpad = !rpad ? str.width() : size_t(0);
|
|
size_t size = 0;
|
|
|
|
str.width(0); // reset padding
|
|
|
|
for (size_t i = lpad < val.size() ? 0 : lpad - val.size(); i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
for (size_t i = 0, count = val.size(); i < count; ++i) {
|
|
*out++ = val[i];
|
|
++size;
|
|
}
|
|
|
|
for (size_t i = rpad < size ? 0 : rpad - size; i; --i) {
|
|
*out++ = fill;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
template<typename CharType, typename CvtType>
|
|
typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, CvtType>::do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, long value
|
|
) const {
|
|
if (str.flags() & std::ios_base::oct) {
|
|
return do_put_int_oct(out, str, fill, (unsigned long)value);
|
|
}
|
|
|
|
if (str.flags() & std::ios_base::hex) {
|
|
return do_put_int_hex(out, str, fill, (unsigned long)value, false);
|
|
}
|
|
|
|
// the ICU operations are identical
|
|
return do_put(out, str, fill, (long long)value);
|
|
}
|
|
|
|
template<typename CharType, typename CvtType>
|
|
typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, CvtType>::do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, long long value
|
|
) const {
|
|
if (str.flags() & std::ios_base::oct) {
|
|
return do_put_int_oct(out, str, fill, (unsigned long long)value);
|
|
}
|
|
|
|
if (str.flags() & std::ios_base::hex) {
|
|
static_assert(sizeof(uint64_t) == sizeof(unsigned long long), "sizeof(uint64_t) != sizeof(unsigned long long)");
|
|
return do_put_int_hex(out, str, fill, (uint64_t)value, false);
|
|
}
|
|
|
|
if (value >= 0) {
|
|
return do_put(out, str, fill, (unsigned long long)value);
|
|
}
|
|
|
|
auto ipad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::internal
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto rpad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::left
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto lpad = !ipad && !rpad ? str.width() : size_t(0);
|
|
size_t size = 0;
|
|
|
|
str.width(0); // reset padding
|
|
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
throw irs::detailed_io_error(
|
|
"failed to retrieve ICU formatter in num_put_facet::do_put(...)"
|
|
);
|
|
}
|
|
|
|
static_assert(sizeof(int64_t) == sizeof(long long), "sizeof(int64_t) != sizeof(long long)");
|
|
ctx->reset(str);
|
|
ctx->regular_->format(int64_t(0 - value), ctx->icu_buf0_);
|
|
|
|
if (!converter_.append(ctx->buf_, ctx->icu_buf0_)) {
|
|
throw irs::detailed_io_error(
|
|
"failed to convert data from UTF8 in num_put_facet::do_put(...)"
|
|
);
|
|
}
|
|
|
|
size_t len = ctx->buf_.size() + 1; // +1 for '-'
|
|
|
|
for (size_t i = lpad < len ? 0 : lpad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
*out++ = '-';
|
|
++size;
|
|
|
|
for (size_t i = ipad < len ? 0 : ipad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
for (size_t i = 0, count = ctx->buf_.size(); i < count; ++i) {
|
|
*out++ = ctx->buf_[i];
|
|
++size;
|
|
}
|
|
|
|
for (size_t i = rpad < size ? 0 : rpad - size; i; --i) {
|
|
*out++ = fill;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
template<typename CharType, typename CvtType>
|
|
typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, CvtType>::do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, unsigned long value
|
|
) const {
|
|
if (str.flags() & std::ios_base::oct) {
|
|
return do_put_int_oct(out, str, fill, (unsigned long)value);
|
|
}
|
|
|
|
if (str.flags() & std::ios_base::hex) {
|
|
return do_put_int_hex(out, str, fill, (unsigned long)value, false);
|
|
}
|
|
|
|
// the ICU operations are identical
|
|
return do_put(out, str, fill, (unsigned long long)value);
|
|
}
|
|
|
|
template<typename CharType, typename CvtType>
|
|
typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, CvtType>::do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, unsigned long long value
|
|
) const {
|
|
if (str.flags() & std::ios_base::oct) {
|
|
return do_put_int_oct(out, str, fill, (unsigned long long)value);
|
|
}
|
|
|
|
if (str.flags() & std::ios_base::hex) {
|
|
static_assert(sizeof(uint64_t) == sizeof(unsigned long long), "sizeof(uint64_t) != sizeof(unsigned long long)");
|
|
return do_put_int_hex(out, str, fill, (uint64_t)value, false);
|
|
}
|
|
|
|
if (!value) {
|
|
return do_put_int_zero(out, str,fill); // optimization for '0'
|
|
}
|
|
|
|
if ((unsigned long long)irs::integer_traits<int64_t>::const_max < value) {
|
|
throw irs::detailed_io_error(
|
|
"value too large while converting data from UTF8 in num_put_facet::do_put(...)"
|
|
);
|
|
}
|
|
|
|
auto ipad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::internal
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto rpad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::left
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto lpad = !ipad && !rpad ? str.width() : size_t(0);
|
|
size_t size = 0;
|
|
|
|
str.width(0); // reset padding
|
|
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
throw irs::detailed_io_error(
|
|
"failed to retrieve ICU formatter in num_put_facet::do_put(...)"
|
|
);
|
|
}
|
|
|
|
static_assert(sizeof(int64_t) == sizeof(long long), "sizeof(int64_t) != sizeof(long long)");
|
|
ctx->reset(str);
|
|
ctx->regular_->format(int64_t(value), ctx->icu_buf0_);
|
|
|
|
if (!converter_.append(ctx->buf_, ctx->icu_buf0_)) {
|
|
throw irs::detailed_io_error(
|
|
"failed to convert data from UTF8 in num_put_facet::do_put(...)"
|
|
);
|
|
}
|
|
|
|
size_t len = ctx->buf_.size() + (str.flags() & std::ios_base::showpos ? 1 : 0);
|
|
|
|
for (size_t i = lpad < len ? 0 : lpad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
if (str.flags() & std::ios_base::showpos) {
|
|
*out++ = '+';
|
|
++size;
|
|
}
|
|
|
|
for (size_t i = ipad < len ? 0 : ipad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
for (size_t i = 0, count = ctx->buf_.size(); i < count; ++i) {
|
|
*out++ = ctx->buf_[i];
|
|
++size;
|
|
}
|
|
|
|
for (size_t i = rpad < size ? 0 : rpad - size; i; --i) {
|
|
*out++ = fill;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
template<typename CharType, typename CvtType>
|
|
typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, CvtType>::do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, double value
|
|
) const {
|
|
if ((str.flags() & std::ios_base::floatfield) == (std::ios_base::fixed | std::ios_base::scientific)) {
|
|
return do_put_float_hex(out, str, fill, value);
|
|
}
|
|
|
|
auto ipad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::internal
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto rpad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::left
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto lpad = !ipad && !rpad ? str.width() : size_t(0);
|
|
size_t size = 0;
|
|
|
|
str.width(0); // reset padding
|
|
|
|
auto ctx = context();
|
|
|
|
if (!ctx) {
|
|
throw irs::detailed_io_error(
|
|
"failed to retrieve ICU formatter in num_put_facet::do_put(...)"
|
|
);
|
|
}
|
|
|
|
ctx->reset(str);
|
|
ctx->regular_->setMinimumFractionDigits(6); // default 6 as per specification
|
|
ctx->regular_->setMaximumFractionDigits(6); // default 6 as per specification
|
|
ctx->scientific_->setMinimumFractionDigits(6); // default 6 as per specification
|
|
ctx->scientific_->setMaximumFractionDigits(6); // default 6 as per specification
|
|
|
|
static const UnicodeString point(".");
|
|
icu::UnicodeString* icu_buf;
|
|
bool negative = false;
|
|
|
|
if (value < 0) {
|
|
value = 0 - value;
|
|
negative = true;
|
|
}
|
|
|
|
if ((str.flags() & std::ios_base::floatfield) == std::ios_base::fixed) {
|
|
icu::FieldPosition decimal(UNumberFormatFields::UNUM_DECIMAL_SEPARATOR_FIELD);
|
|
|
|
// Decimal floating point, lowercase
|
|
ctx->regular_->format(value, ctx->icu_buf0_, decimal);
|
|
icu_buf = &ctx->icu_buf0_;
|
|
|
|
if ((str.flags() & std::ios_base::showpoint)
|
|
&& !decimal.getBeginIndex() && !decimal.getEndIndex()) { // 0,0 indicates no decimal
|
|
icu_buf->append(point); // append at end
|
|
}
|
|
} else if ((str.flags() & std::ios_base::floatfield) == std::ios_base::scientific) {
|
|
icu::FieldPosition decimal(UNumberFormatFields::UNUM_DECIMAL_SEPARATOR_FIELD);
|
|
|
|
// Scientific notation (mantissa/exponent), uppercase/lowercase
|
|
ctx->scientific_->format(value, ctx->icu_buf0_, decimal);
|
|
icu_buf = &ctx->icu_buf0_;
|
|
|
|
if ((str.flags() & std::ios_base::showpoint)
|
|
&& !decimal.getBeginIndex() && !decimal.getEndIndex()) { // 0,0 indicates no decimal
|
|
icu_buf->insert(icu_buf->length() - 2, point); // -2 to insert before 'e0'
|
|
}
|
|
} else {
|
|
icu::FieldPosition decimal_r(UNumberFormatFields::UNUM_DECIMAL_SEPARATOR_FIELD);
|
|
icu::FieldPosition decimal_s(UNumberFormatFields::UNUM_DECIMAL_SEPARATOR_FIELD);
|
|
|
|
// set the maximum number of significant digits to be printed (as per spec)
|
|
ctx->regular_->setMinimumFractionDigits(0);
|
|
ctx->regular_->setMaximumFractionDigits(str.precision());
|
|
ctx->scientific_->setMinimumFractionDigits(0);
|
|
ctx->scientific_->setMaximumFractionDigits(str.precision());
|
|
|
|
// Use the shortest representation:
|
|
// Decimal floating point
|
|
// Scientific notation (mantissa/exponent), uppercase/lowercase
|
|
ctx->regular_->format(value, ctx->icu_buf0_, decimal_r);
|
|
ctx->scientific_->format(value, ctx->icu_buf1_, decimal_s);
|
|
|
|
if ((str.flags() & std::ios_base::showpoint)) {
|
|
if (!decimal_r.getBeginIndex() && !decimal_r.getEndIndex()) { // 0,0 indicates no decimal
|
|
ctx->icu_buf0_.append(point); // append at end
|
|
}
|
|
|
|
if (!decimal_s.getBeginIndex() && !decimal_s.getEndIndex()) { // 0,0 indicates no decimal
|
|
ctx->icu_buf1_.insert(ctx->icu_buf1_.length() - 2, point); // -2 to insert before 'e0'
|
|
}
|
|
}
|
|
|
|
icu_buf = ctx->icu_buf1_.length() < ctx->icu_buf1_.length()
|
|
? &ctx->icu_buf1_ : &ctx->icu_buf0_;
|
|
}
|
|
|
|
// ensure all letters are uppercased/lowercased
|
|
if (!(str.flags() & std::ios_base::uppercase)) {
|
|
icu_buf->toLower();
|
|
}
|
|
|
|
if (!converter_.append(ctx->buf_, *icu_buf)) {
|
|
throw irs::detailed_io_error(
|
|
"failed to convert data from UTF8 in num_put_facet::do_put(...)"
|
|
);
|
|
}
|
|
|
|
size_t len = ctx->buf_.size()
|
|
+ (negative || (str.flags() & std::ios_base::showpos) ? 1 : 0);
|
|
|
|
for (size_t i = lpad < len ? 0 : lpad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
if (negative) {
|
|
*out++ = '-';
|
|
++size;
|
|
} else if (str.flags() & std::ios_base::showpos) {
|
|
*out++ = '+';
|
|
++size;
|
|
}
|
|
|
|
for (size_t i = ipad < len ? 0 : ipad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
for (size_t i = 0, count = ctx->buf_.size(); i < count; ++i) {
|
|
*out++ = ctx->buf_[i];
|
|
++size;
|
|
}
|
|
|
|
for (size_t i = rpad < size ? 0 : rpad - size; i; --i) {
|
|
*out++ = fill;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
template<typename CharType, typename CvtType>
|
|
typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, CvtType>::do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, long double value
|
|
) const {
|
|
if ((str.flags() & std::ios_base::floatfield) == (std::ios_base::fixed | std::ios_base::scientific)) {
|
|
return do_put_float_hex(out, str, fill, value);
|
|
}
|
|
|
|
// the ICU operations are identical (with lower precision)
|
|
return do_put(out, str, fill, (double)value);
|
|
}
|
|
|
|
template<typename CharType, typename CvtType>
|
|
typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, CvtType>::do_put(
|
|
iter_type out, std::ios_base& str, char_type fill, const void* value
|
|
) const {
|
|
return do_put_int_hex(out, str, fill, size_t(value), true);
|
|
}
|
|
|
|
template<typename CharType, typename CvtType>
|
|
template<typename T>
|
|
/*static*/ typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, CvtType>::do_put_float_hex(
|
|
iter_type out, std::ios_base& str, char_type fill, T value
|
|
) {
|
|
typedef typename std::enable_if<std::is_floating_point<T>::value, T>::type type;
|
|
|
|
auto ipad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::internal
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto rpad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::left
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto lpad = !ipad && !rpad ? str.width() : size_t(0);
|
|
size_t size = 0;
|
|
|
|
str.width(0); // reset padding
|
|
|
|
static auto mantissa_bits = std::numeric_limits<type>::digits;
|
|
static const char lower[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
|
static const char upper[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
|
|
auto* table = str.flags() & std::ios_base::uppercase ? upper : lower;
|
|
bool negative = false;
|
|
|
|
if (value < 0) {
|
|
value = 0 - value;
|
|
negative = true;
|
|
}
|
|
|
|
// optimization for '0'
|
|
if (!value) {
|
|
size_t len = 6 // 0x0p+0
|
|
+ ((str.flags() & std::ios_base::showpos) ? 1 : 0)
|
|
+ ((str.flags() & std::ios_base::showpoint) ? 1 : 0)
|
|
;
|
|
|
|
for (size_t i = lpad < len ? 0 : lpad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
// if a sign character occurs in the representation, will pad after the sign
|
|
if (str.flags() & std::ios_base::showpos) {
|
|
*out++ = '+';
|
|
++size;
|
|
|
|
for (size_t i = ipad < len ? 0 : ipad - len; i; --i) {
|
|
*out++ = fill;
|
|
++len; // subtract from 'ipad'
|
|
++size;
|
|
}
|
|
}
|
|
|
|
*out++ = '0'; // hexadecimal prefix
|
|
*out++ = str.flags() & std::ios_base::uppercase ? 'X' : 'x';
|
|
size += 2;
|
|
|
|
for (size_t i = ipad < len ? 0 : ipad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
*out++ = '0';
|
|
++size;
|
|
|
|
if (str.flags() & std::ios_base::showpoint) {
|
|
*out++ = '.';
|
|
++size;
|
|
}
|
|
|
|
*out++ = str.flags() & std::ios_base::uppercase ? 'P' : 'p';
|
|
*out++ = '+';
|
|
*out++ = '0';
|
|
size += 3;
|
|
|
|
for (size_t i = rpad < size ? 0 : rpad - size; i; --i) {
|
|
*out++ = fill;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
int exponent;
|
|
auto mantissa_f = std::frexp(value, &exponent);
|
|
auto mantissa_i = size_t(std::ldexp(mantissa_f, mantissa_bits));
|
|
int half_byte = sizeof(size_t) * 2;
|
|
|
|
// strip leading/trailing zero half-bytes
|
|
{
|
|
static_assert(std::numeric_limits<size_t>::digits < irs::integer_traits<int>::const_max, "std::numeric_limits<size_t>::digits >= std::numeric_limits<int>::max()");
|
|
auto clz = int(irs::math::math_traits<size_t>::clz(mantissa_i));
|
|
auto ctz = int(irs::math::math_traits<size_t>::ctz(mantissa_i));
|
|
|
|
exponent -= 4 - (clz % 4); // number of bits used in the first half-byte
|
|
half_byte -= clz / 4; // 4 for half-byte
|
|
half_byte -= ctz / 4; // 4 for half-byte
|
|
mantissa_i >>= ctz & ~size_t(0x3); // (ctz / 4) * 4
|
|
}
|
|
|
|
auto exp_str = std::to_string(exponent);
|
|
|
|
size_t len = half_byte
|
|
+ 4 // for 0x...p+
|
|
+ (negative || (str.flags() & std::ios_base::showpos) ? 1 : 0)
|
|
+ (!half_byte || (str.flags() & std::ios_base::showpoint) ? 1 : 0)
|
|
+ exp_str.size()
|
|
;
|
|
|
|
for (size_t i = lpad < len ? 0 : lpad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
// if a sign character occurs in the representation, will pad after the sign
|
|
if (negative || (str.flags() & std::ios_base::showpos)) {
|
|
*out++ = negative ? '-' : '+';
|
|
++size;
|
|
|
|
for (size_t i = ipad < len ? 0 : ipad - len; i; --i) {
|
|
*out++ = fill;
|
|
++len; // subtract from 'ipad'
|
|
++size;
|
|
}
|
|
}
|
|
|
|
*out++ = '0'; // hexadecimal prefix
|
|
*out++ = str.flags() & std::ios_base::uppercase ? 'X' : 'x';
|
|
size += 2;
|
|
|
|
for (size_t i = ipad < len ? 0 : ipad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
bool started = false;
|
|
|
|
while(half_byte) {
|
|
auto val = (mantissa_i >> (--half_byte * 4)) & 0xF;
|
|
|
|
*out++ = table[val];
|
|
++size;
|
|
|
|
if (!started) {
|
|
started = true;
|
|
|
|
if (half_byte || (str.flags() & std::ios_base::showpoint)) {
|
|
*out++ = '.';
|
|
++size;
|
|
}
|
|
}
|
|
}
|
|
|
|
*out++ = str.flags() & std::ios_base::uppercase ? 'P' : 'p'; // exponent suffix
|
|
*out++ = '+';
|
|
size += 2;
|
|
|
|
for (size_t i = 0, count = exp_str.size(); i < count; ++i) {
|
|
*out++ = exp_str[i];
|
|
++size;
|
|
}
|
|
|
|
for (size_t i = rpad < size ? 0 : rpad - size; i; --i) {
|
|
*out++ = fill;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
template<typename CharType, typename CvtType>
|
|
template<typename T>
|
|
/*static*/ typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, CvtType>::do_put_int_hex(
|
|
iter_type out, std::ios_base& str, char_type fill, T value, bool full_width
|
|
) {
|
|
typedef typename std::enable_if<std::is_unsigned<T>::value, T>::type type;
|
|
|
|
if (!value && !full_width) {
|
|
return do_put_int_zero(out, str, fill); // optimization for '0'
|
|
}
|
|
|
|
auto ipad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::internal
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto rpad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::left
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto lpad = !ipad && !rpad ? str.width() : size_t(0);
|
|
size_t size = 0;
|
|
|
|
str.width(0); // reset padding
|
|
|
|
static const char lower[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
|
static const char upper[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
|
|
auto* table = str.flags() & std::ios_base::uppercase ? upper : lower;
|
|
auto val = irs::numeric_utils::numeric_traits<type>::hton(value);
|
|
auto* v = reinterpret_cast<uint8_t*>(&val);
|
|
bool started = false;
|
|
size_t len = sizeof(val) * 2 // *2 for hi+lo
|
|
+ (str.flags() & std::ios_base::showpos ? 1 : 0)
|
|
+ (str.flags() & std::ios_base::showbase ? 2 : 0)
|
|
;
|
|
|
|
for (auto i = sizeof(type); i; --i, ++v) {
|
|
if (started) {
|
|
*out++ = table[*v >> 4];
|
|
*out++ = table[*v & 0xF];
|
|
size += 2;
|
|
continue;
|
|
}
|
|
|
|
if (!*v && !full_width) {
|
|
len -= 2; // 2 for hi+lo
|
|
continue;
|
|
}
|
|
|
|
auto hi = *v >> 4;
|
|
auto lo = *v & 0xF;
|
|
|
|
len -= hi || full_width ? 0 : 1;
|
|
|
|
for (size_t i = lpad < len ? 0 : lpad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
// if a sign character occurs in the representation, will pad after the sign
|
|
if (str.flags() & std::ios_base::showpos) {
|
|
*out++ = '+';
|
|
++size;
|
|
|
|
for (size_t i = ipad < len ? 0 : ipad - len; i; --i) {
|
|
*out++ = fill;
|
|
++len; // subtract from 'ipad'
|
|
++size;
|
|
}
|
|
}
|
|
|
|
// else if representation began with 0x or 0X, will pad after the x or X
|
|
if (str.flags() & std::ios_base::showbase) {
|
|
*out++ = '0'; // hexadecimal prefix
|
|
*out++ = str.flags() & std::ios_base::uppercase ? 'X' : 'x';
|
|
size += 2;
|
|
}
|
|
|
|
for (size_t i = ipad < len ? 0 : ipad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
if (hi || full_width) {
|
|
*out++ = table[hi];
|
|
++size;
|
|
}
|
|
|
|
*out++ = table[lo];
|
|
++size;
|
|
started = true;
|
|
}
|
|
|
|
for (size_t i = rpad < size ? 0 : rpad - size; i; --i) {
|
|
*out++ = fill;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
template<typename CharType, typename CvtType>
|
|
template<typename T>
|
|
/*static*/ typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, CvtType>::do_put_int_oct(
|
|
iter_type out, std::ios_base& str, char_type fill, T value
|
|
) {
|
|
typedef typename std::enable_if<std::is_unsigned<T>::value, T>::type type;
|
|
|
|
if (!value) {
|
|
return do_put_int_zero(out, str, fill); // optimization for '0'
|
|
}
|
|
|
|
auto ipad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::internal
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto rpad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::left
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto lpad = !ipad && !rpad ? str.width() : size_t(0);
|
|
size_t size = 0;
|
|
|
|
str.width(0); // reset padding
|
|
|
|
static const char table[] = { '0', '1', '2', '3', '4', '5', '6', '7' };
|
|
size_t shift = (sizeof(type) * 8 / 3) + 1; // shift in blocks of 3 bits, +1 for initial decrement
|
|
bool started = false;
|
|
size_t len = shift
|
|
+ (str.flags() & std::ios_base::showpos ? 1 : 0)
|
|
+ (str.flags() & std::ios_base::showbase ? 1 : 0)
|
|
;
|
|
|
|
do {
|
|
auto v = (value >> (--shift * 3)) & 0x7; // shift in blocks of 3 bits
|
|
|
|
if (started) {
|
|
*out++ = table[v];
|
|
++size;
|
|
continue;
|
|
}
|
|
|
|
if (!v) {
|
|
--len;
|
|
continue;
|
|
}
|
|
|
|
for (size_t i = lpad < len ? 0 : lpad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
if (str.flags() & std::ios_base::showpos) {
|
|
*out++ = '+';
|
|
++size;
|
|
}
|
|
|
|
for (size_t i = ipad < len ? 0 : ipad - len; i; --i) {
|
|
*out++ = fill;
|
|
++len; // subtract from 'ipad'
|
|
++size;
|
|
}
|
|
|
|
if (str.flags() & std::ios_base::showbase) {
|
|
*out++ = '0'; // octal prefix
|
|
++size;
|
|
}
|
|
|
|
*out++ = table[v];
|
|
++size;
|
|
started = true;
|
|
} while (shift);
|
|
|
|
for (size_t i = rpad < size ? 0 : rpad - size; i; --i) {
|
|
*out++ = fill;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
template<typename CharType, typename CvtType>
|
|
/*static*/ typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, CvtType>::do_put_int_zero(
|
|
iter_type out, std::ios_base& str, char_type fill
|
|
) {
|
|
auto ipad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::internal
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto rpad = (str.flags() & std::ios_base::adjustfield) == std::ios_base::left
|
|
? str.width() : size_t(0)
|
|
;
|
|
auto lpad = !ipad && !rpad ? str.width() : size_t(0);
|
|
size_t size = 0;
|
|
|
|
str.width(0); // reset padding
|
|
|
|
size_t len = strlen("0") + (str.flags() & std::ios_base::showpos ? 1 : 0);
|
|
|
|
for (size_t i = lpad < len ? 0 : lpad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
if (str.flags() & std::ios_base::showpos) {
|
|
*out++ = '+';
|
|
++size;
|
|
}
|
|
|
|
for (size_t i = ipad < len ? 0 : ipad - len; i; --i) {
|
|
*out++ = fill;
|
|
++size;
|
|
}
|
|
|
|
*out++ = '0';
|
|
++size;
|
|
|
|
for (size_t i = rpad < size ? 0 : rpad - size; i; --i) {
|
|
*out++ = fill;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
class num_putw_facet: public std::num_put<wchar_t> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class numpunct_facet: public std::numpunct<char> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class numpunctw_facet: public std::numpunct<wchar_t> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class time_get_facet: public std::time_get<char> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class time_getw_facet: public std::time_get<wchar_t> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class time_put_facet: public std::time_put<char> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class time_putw_facet: public std::time_put<wchar_t> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class messages_facet: public std::messages<char> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
class messagesw_facet: public std::messages<wchar_t> {
|
|
// FIXME TODO implement
|
|
};
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- custom std::locale facets
|
|
// -----------------------------------------------------------------------------
|
|
|
|
class locale_info_facet: public std::locale::facet {
|
|
public:
|
|
static std::locale::id id; // required for each class derived from std::locale::facet as per spec
|
|
|
|
locale_info_facet(const irs::string_ref& name);
|
|
locale_info_facet(locale_info_facet const& other) = delete; // because of string_ref
|
|
locale_info_facet(locale_info_facet&& other) NOEXCEPT { *this = std::move(other); }
|
|
locale_info_facet& operator=(const locale_info_facet& other) = delete; // because of string_ref
|
|
locale_info_facet& operator=(locale_info_facet&& other) NOEXCEPT;
|
|
bool operator<(const locale_info_facet& other) const NOEXCEPT { return name_ < other.name_; }
|
|
const irs::string_ref& country() const NOEXCEPT { return country_; }
|
|
const irs::string_ref& encoding() const NOEXCEPT { return encoding_; }
|
|
const irs::string_ref& language() const NOEXCEPT { return language_; }
|
|
const std::string& name() const NOEXCEPT { return name_; }
|
|
bool unicode() const NOEXCEPT { return unicode_t::NONE != unicode_; }
|
|
bool utf8() const NOEXCEPT { return unicode_t::UTF8 == unicode_; }
|
|
const irs::string_ref& variant() const NOEXCEPT { return variant_; }
|
|
|
|
private:
|
|
enum class unicode_t { NONE, UTF7, UTF8, UTF16, UTF32 };
|
|
std::string name_; // the normalized locale name: language[_COUNTRY][.encoding][@variant]
|
|
irs::string_ref country_;
|
|
irs::string_ref encoding_;
|
|
irs::string_ref language_;
|
|
irs::string_ref variant_;
|
|
unicode_t unicode_;
|
|
};
|
|
|
|
/*static*/ std::locale::id locale_info_facet::id;
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
/// The name has the following format: language[_COUNTRY][.encoding][@variant]
|
|
/// Where 'language' is ISO-639 language code like "en" or "ru",
|
|
/// 'COUNTRY' is ISO-3166 country identifier like "US" or "RU",
|
|
/// 'encoding' is a charracter set name like "UTF-8" or "ISO-8859-1",
|
|
/// 'variant' is backend specific variant like "euro" or "calendar=hebrew"
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
locale_info_facet::locale_info_facet(const irs::string_ref& name)
|
|
: name_(name),
|
|
country_(""),
|
|
encoding_("us-ascii"),
|
|
language_("C"),
|
|
variant_(""),
|
|
unicode_(unicode_t::NONE) { // us-ascii is not unicode
|
|
if (name_ == "C") {
|
|
return;
|
|
}
|
|
|
|
if (name_.empty() || name_ == "c") {
|
|
name_ = "C"; // uppercase 'classic' locale name
|
|
|
|
return;
|
|
}
|
|
|
|
auto data = &name_[0];
|
|
std::transform(data, data + name_.size(), data, ::tolower); // lowercase full string
|
|
auto length = ::strcspn(data, "-_.@");
|
|
|
|
language_ = irs::string_ref(data, length);
|
|
data += length;
|
|
|
|
// found country
|
|
if ('-' == data[0] || '_' == data[0]) {
|
|
++data;
|
|
length = ::strcspn(data, ".@");
|
|
country_ = irs::string_ref(data, length);
|
|
std::transform(data, data + length, data, ::toupper); // uppercase country
|
|
data += length;
|
|
}
|
|
|
|
// found encoding
|
|
if ('.' == data[0]) {
|
|
++data;
|
|
length = ::strcspn(data, "@");
|
|
encoding_ = irs::string_ref(data, length);
|
|
data += length;
|
|
|
|
// normalize encoding and compare to 'utf8' (data already in lower case)
|
|
std::string buf = encoding_;
|
|
auto* str = &buf[0];
|
|
auto end = std::remove_if(
|
|
str, str + buf.size(),
|
|
[](char x){ return !(('0' <= x && '9' >= x) || ('a' <= x && 'z' >= x)); }
|
|
);
|
|
irs::string_ref enc(str, std::distance(str, end));
|
|
|
|
if (enc == "utf7") {
|
|
unicode_ = unicode_t::UTF7;
|
|
} else if (enc == "utf8") {
|
|
unicode_ = unicode_t::UTF8;
|
|
} else if (enc == "utf16") {
|
|
unicode_ = unicode_t::UTF16;
|
|
} else if (enc == "utf32") {
|
|
unicode_ = unicode_t::UTF32;
|
|
}
|
|
}
|
|
|
|
// found variant
|
|
if ('@' == data[0]) {
|
|
++data;
|
|
variant_ = data;
|
|
}
|
|
}
|
|
|
|
locale_info_facet& locale_info_facet::operator=(
|
|
locale_info_facet&& other
|
|
) NOEXCEPT {
|
|
if (this != &other) {
|
|
const char* start = &(other.name_[0]);
|
|
const char* end = start + other.name_.size();
|
|
|
|
name_ = std::move(other.name_); // move first since string_ref point into it
|
|
|
|
country_ = other.country_.c_str() < start || other.country_.c_str() >= end
|
|
? other.country_ // does not point into 'name_'
|
|
: irs::string_ref(
|
|
&name_[0] + std::distance(start, other.country_.c_str()),
|
|
other.country_.size()
|
|
)
|
|
;
|
|
|
|
encoding_ = other.encoding_.c_str() < start || other.encoding_.c_str() >= end
|
|
? other.encoding_ // does not point into 'name_'
|
|
: irs::string_ref(
|
|
&name_[0] + std::distance(start, other.encoding_.c_str()),
|
|
other.encoding_.size()
|
|
)
|
|
;
|
|
|
|
language_ = other.language_.c_str() < start || other.language_.c_str() >= end
|
|
? other.language_ // does not point into 'name_'
|
|
: irs::string_ref(
|
|
&name_[0] + std::distance(start, other.language_.c_str()),
|
|
other.language_.size()
|
|
)
|
|
;
|
|
|
|
variant_ = other.variant_.c_str() < start || other.variant_.c_str() >= end
|
|
? other.variant_ // does not point into 'name_'
|
|
: irs::string_ref(
|
|
&name_[0] + std::distance(start, other.variant_.c_str()),
|
|
other.variant_.size()
|
|
)
|
|
;
|
|
|
|
unicode_ = other.unicode_;
|
|
other.country_ = irs::string_ref::NIL;
|
|
other.encoding_ = irs::string_ref::NIL;
|
|
other.language_ = irs::string_ref::NIL;
|
|
other.variant_ = irs::string_ref::NIL;
|
|
other.unicode_ = unicode_t::NONE;
|
|
}
|
|
|
|
return *this;
|
|
}
|
|
|
|
const std::locale& get_locale(
|
|
const irs::string_ref& name, bool forceUnicodeSystem = true
|
|
) {
|
|
struct less_t {
|
|
bool operator()(
|
|
const locale_info_facet* lhs, const locale_info_facet* rhs
|
|
) const NOEXCEPT {
|
|
return (!lhs && rhs) || (lhs && rhs && *lhs < *rhs);
|
|
}
|
|
};
|
|
|
|
auto unicodeSystem =
|
|
forceUnicodeSystem || locale_info_facet(system_encoding()).unicode();
|
|
locale_info_facet info(name);
|
|
static std::map<locale_info_facet*, std::locale, less_t> locales_s;
|
|
static std::map<locale_info_facet*, std::locale, less_t> locales_u;
|
|
auto& locales = unicodeSystem ? locales_u : locales_s;
|
|
static std::mutex mutex;
|
|
SCOPED_LOCK(mutex);
|
|
auto itr = locales.find(&info);
|
|
|
|
if (itr != locales.end()) {
|
|
return itr->second;
|
|
}
|
|
|
|
// Boost locales always assume system is unicode
|
|
boost::locale::generator locale_genrator; // stateful object, cannot be static
|
|
icu::Locale icu_locale(
|
|
std::string(info.language()).c_str(),
|
|
std::string(info.country()).c_str(),
|
|
std::string(info.variant()).c_str()
|
|
);
|
|
|
|
if (icu_locale.isBogus()) {
|
|
IR_FRMT_WARN("locale '%s' is not supported by ICU", info.name().c_str());
|
|
}
|
|
|
|
std::locale boost_locale;
|
|
|
|
// FIXME TODO this is a workaround for boost throwning exceptions for
|
|
// unsupported encodings which are overriden below anyway
|
|
try {
|
|
boost_locale = locale_genrator.generate(info.name());
|
|
} catch(...) {
|
|
if (info.encoding().c_str() < info.name().c_str()
|
|
|| info.encoding().c_str() >= info.name().c_str() + info.name().size()) {
|
|
throw;
|
|
}
|
|
|
|
auto boost_locale_name = info.name();
|
|
|
|
boost_locale_name.erase(
|
|
info.encoding().c_str() - info.name().c_str() - 1, // -1 for '_'
|
|
info.encoding().size() + 1 // +1 for '_'
|
|
); // skip encoding
|
|
boost_locale = locale_genrator.generate(boost_locale_name);
|
|
}
|
|
|
|
auto locale_info =
|
|
irs::memory::make_unique<locale_info_facet>(std::move(info));
|
|
auto* locale_info_ptr = locale_info.get();
|
|
auto& converter = get_converter(locale_info->encoding());
|
|
auto locale = std::locale(boost_locale, locale_info.release());
|
|
|
|
locale = std::locale(
|
|
locale, irs::memory::make_unique<codecvt16_facet>(converter).release()
|
|
);
|
|
locale = std::locale(
|
|
locale, irs::memory::make_unique<codecvt32_facet>(converter).release()
|
|
);
|
|
|
|
if (unicodeSystem) {
|
|
auto cvt8 = irs::memory::make_unique<codecvt8u_facet>(converter);
|
|
auto cvtw = irs::memory::make_unique<codecvtwu_facet>(converter);
|
|
|
|
locale = std::locale(
|
|
locale,
|
|
irs::memory::make_unique<num_put_facet<char,codecvt8u_facet>>(icu_locale, *cvt8).release()
|
|
);
|
|
locale = std::locale(
|
|
locale,
|
|
irs::memory::make_unique<num_put_facet<wchar_t, codecvtwu_facet>>(icu_locale, *cvtw).release()
|
|
);
|
|
locale = std::locale(locale, cvt8.release());
|
|
locale = std::locale(locale, cvtw.release());
|
|
} else {
|
|
auto& converter_int = get_converter(system_encoding());
|
|
auto cvt8 = irs::memory::make_unique<codecvt8_facet>(converter_int, converter);
|
|
auto cvtw = irs::memory::make_unique<codecvtw_facet>(converter_int, converter);
|
|
|
|
locale = std::locale(
|
|
locale,
|
|
irs::memory::make_unique<num_put_facet<char, codecvt8_facet>>(icu_locale, *cvt8).release()
|
|
);
|
|
locale = std::locale(
|
|
locale,
|
|
irs::memory::make_unique<num_put_facet<wchar_t, codecvtw_facet>>(icu_locale, *cvtw).release()
|
|
);
|
|
locale = std::locale(locale, cvt8.release());
|
|
locale = std::locale(locale, cvtw.release());
|
|
}
|
|
|
|
return locales.emplace(locale_info_ptr, locale).first->second;
|
|
}
|
|
|
|
NS_END
|
|
|
|
NS_ROOT
|
|
NS_BEGIN( locale_utils )
|
|
|
|
#if defined(_MSC_VER) && _MSC_VER <= 1800 && defined(IRESEARCH_DLL) // MSVC2013
|
|
// MSVC2013 does not properly export
|
|
// std::codecvt<char32_t, char, mbstate_t>::id for shared libraries
|
|
template<>
|
|
const std::codecvt<char32_t, char, mbstate_t>& codecvt(
|
|
std::locale const& locale
|
|
) {
|
|
return std::use_facet<std::codecvt<char32_t, char, mbstate_t>>(locale);
|
|
}
|
|
#elif defined(_MSC_VER) && _MSC_VER <= 1916 // MSVC2015/MSVC2017
|
|
// MSVC2015/MSVC2017 implementations do not support char16_t/char32_t 'codecvt'
|
|
// due to a missing export, as per their comment:
|
|
// This is an active bug in our database (VSO#143857), which we'll investigate
|
|
// for a future release, but we're currently working on higher priority things
|
|
template<>
|
|
const std::codecvt<char16_t, char, mbstate_t>& codecvt(
|
|
std::locale const& locale
|
|
) {
|
|
return std::use_facet<codecvt16_facet>(locale);
|
|
}
|
|
|
|
template<>
|
|
const std::codecvt<char32_t, char, mbstate_t>& codecvt(
|
|
std::locale const& locale
|
|
) {
|
|
return std::use_facet<codecvt32_facet>(locale);
|
|
}
|
|
#endif
|
|
|
|
const irs::string_ref& country(std::locale const& locale) {
|
|
auto* loc = &locale;
|
|
|
|
if (!std::has_facet<locale_info_facet>(*loc)) {
|
|
loc = &get_locale(loc->name());
|
|
}
|
|
|
|
return std::use_facet<locale_info_facet>(*loc).country();
|
|
}
|
|
|
|
const irs::string_ref& encoding(std::locale const& locale) {
|
|
auto* loc = &locale;
|
|
|
|
if (!std::has_facet<locale_info_facet>(*loc)) {
|
|
loc = &get_locale(loc->name());
|
|
}
|
|
|
|
return std::use_facet<locale_info_facet>(*loc).encoding();
|
|
}
|
|
|
|
const irs::string_ref& language(std::locale const& locale) {
|
|
auto* loc = &locale;
|
|
|
|
if (!std::has_facet<locale_info_facet>(*loc)) {
|
|
loc = &get_locale(loc->name());
|
|
}
|
|
|
|
return std::use_facet<locale_info_facet>(*loc).language();
|
|
}
|
|
|
|
std::locale locale(
|
|
irs::string_ref const& name,
|
|
irs::string_ref const& encodingOverride /*= irs::string_ref::NIL*/,
|
|
bool forceUnicodeSystem /*= true*/
|
|
) {
|
|
if (encodingOverride.null()) {
|
|
return get_locale(name, forceUnicodeSystem);
|
|
}
|
|
|
|
locale_info_facet info(name);
|
|
std::string locale_name = info.language();
|
|
|
|
if (!info.country().empty()) {
|
|
locale_name.append(1, '_').append(info.country());
|
|
}
|
|
|
|
if (!encodingOverride.empty()) {
|
|
locale_name.append(1, '.').append(encodingOverride);
|
|
}
|
|
|
|
if (!info.variant().empty()) {
|
|
locale_name.append(1, '@').append(info.variant());
|
|
}
|
|
|
|
return get_locale(locale_name, forceUnicodeSystem);
|
|
}
|
|
|
|
const std::string& name(std::locale const& locale) {
|
|
auto* loc = &locale;
|
|
|
|
if (!std::has_facet<locale_info_facet>(*loc)) {
|
|
loc = &get_locale(loc->name());
|
|
}
|
|
|
|
return std::use_facet<locale_info_facet>(*loc).name();
|
|
}
|
|
|
|
bool utf8(std::locale const& locale) {
|
|
auto* loc = &locale;
|
|
|
|
if (!std::has_facet<locale_info_facet>(*loc)) {
|
|
loc = &get_locale(loc->name());
|
|
}
|
|
|
|
return std::use_facet<locale_info_facet>(*loc).utf8();
|
|
}
|
|
|
|
NS_END // locale_utils
|
|
NS_END
|
|
|
|
// -----------------------------------------------------------------------------
|
|
// --SECTION-- END-OF-FILE
|
|
// -----------------------------------------------------------------------------
|