mirror of https://gitee.com/bigwinds/arangodb
issue 539.1: backport from iresearch upstream: add support for text normalizing analyzer, make loading shared object ptional in the registry (#8813)
This commit is contained in:
parent
0375218ca2
commit
2de9510051
|
@ -367,6 +367,7 @@ add_dependencies(${IResearch_TARGET_NAME}-static
|
||||||
${IResearch_TARGET_NAME}-analyzer-delimited-static
|
${IResearch_TARGET_NAME}-analyzer-delimited-static
|
||||||
${IResearch_TARGET_NAME}-analyzer-ngram-static
|
${IResearch_TARGET_NAME}-analyzer-ngram-static
|
||||||
${IResearch_TARGET_NAME}-analyzer-text-static
|
${IResearch_TARGET_NAME}-analyzer-text-static
|
||||||
|
${IResearch_TARGET_NAME}-analyzer-text-normalizing-static
|
||||||
${IResearch_TARGET_NAME}-analyzer-text-stemming-static
|
${IResearch_TARGET_NAME}-analyzer-text-stemming-static
|
||||||
${IResearch_TARGET_NAME}-analyzer-token-masking-static
|
${IResearch_TARGET_NAME}-analyzer-token-masking-static
|
||||||
${IResearch_TARGET_NAME}-format-1_0-static
|
${IResearch_TARGET_NAME}-format-1_0-static
|
||||||
|
@ -387,6 +388,7 @@ if(MSVC)
|
||||||
${IResearch_TARGET_NAME}-analyzer-delimited-static-scrt
|
${IResearch_TARGET_NAME}-analyzer-delimited-static-scrt
|
||||||
${IResearch_TARGET_NAME}-analyzer-ngram-static-scrt
|
${IResearch_TARGET_NAME}-analyzer-ngram-static-scrt
|
||||||
${IResearch_TARGET_NAME}-analyzer-text-static-scrt
|
${IResearch_TARGET_NAME}-analyzer-text-static-scrt
|
||||||
|
${IResearch_TARGET_NAME}-analyzer-text-normalizing-static-scrt
|
||||||
${IResearch_TARGET_NAME}-analyzer-text-stemming-static-scrt
|
${IResearch_TARGET_NAME}-analyzer-text-stemming-static-scrt
|
||||||
${IResearch_TARGET_NAME}-analyzer-token-masking-static-scrt
|
${IResearch_TARGET_NAME}-analyzer-token-masking-static-scrt
|
||||||
${IResearch_TARGET_NAME}-format-1_0-static-scrt
|
${IResearch_TARGET_NAME}-format-1_0-static-scrt
|
||||||
|
@ -505,6 +507,7 @@ target_link_libraries(${IResearch_TARGET_NAME}-static
|
||||||
${IResearch_TARGET_NAME}-analyzer-delimited-static
|
${IResearch_TARGET_NAME}-analyzer-delimited-static
|
||||||
${IResearch_TARGET_NAME}-analyzer-ngram-static
|
${IResearch_TARGET_NAME}-analyzer-ngram-static
|
||||||
${IResearch_TARGET_NAME}-analyzer-text-static
|
${IResearch_TARGET_NAME}-analyzer-text-static
|
||||||
|
${IResearch_TARGET_NAME}-analyzer-text-normalizing-static
|
||||||
${IResearch_TARGET_NAME}-analyzer-text-stemming-static
|
${IResearch_TARGET_NAME}-analyzer-text-stemming-static
|
||||||
${IResearch_TARGET_NAME}-analyzer-token-masking-static
|
${IResearch_TARGET_NAME}-analyzer-token-masking-static
|
||||||
${IResearch_TARGET_NAME}-format-1_0-static
|
${IResearch_TARGET_NAME}-format-1_0-static
|
||||||
|
@ -537,6 +540,7 @@ if(MSVC)
|
||||||
${IResearch_TARGET_NAME}-analyzer-delimited-static-scrt
|
${IResearch_TARGET_NAME}-analyzer-delimited-static-scrt
|
||||||
${IResearch_TARGET_NAME}-analyzer-ngram-static-scrt
|
${IResearch_TARGET_NAME}-analyzer-ngram-static-scrt
|
||||||
${IResearch_TARGET_NAME}-analyzer-text-static-scrt
|
${IResearch_TARGET_NAME}-analyzer-text-static-scrt
|
||||||
|
${IResearch_TARGET_NAME}-analyzer-text-normalizing-static-scrt
|
||||||
${IResearch_TARGET_NAME}-analyzer-text-stemming-static-scrt
|
${IResearch_TARGET_NAME}-analyzer-text-stemming-static-scrt
|
||||||
${IResearch_TARGET_NAME}-analyzer-token-masking-static-scrt
|
${IResearch_TARGET_NAME}-analyzer-token-masking-static-scrt
|
||||||
${IResearch_TARGET_NAME}-format-1_0-static-scrt
|
${IResearch_TARGET_NAME}-format-1_0-static-scrt
|
||||||
|
@ -566,6 +570,7 @@ if (MSVC)
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-delimited-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-delimited-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-ngram-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-ngram-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-static>"
|
||||||
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-normalizing-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-stemming-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-stemming-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-token-masking-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-token-masking-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-format-1_0-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-format-1_0-static>"
|
||||||
|
@ -624,6 +629,7 @@ if (MSVC)
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-delimited-static-scrt>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-delimited-static-scrt>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-ngram-static-scrt>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-ngram-static-scrt>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-static-scrt>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-static-scrt>"
|
||||||
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-normalizing-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-stemming-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-stemming-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-token-masking-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-token-masking-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-format-1_0-static-scrt>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-format-1_0-static-scrt>"
|
||||||
|
@ -683,6 +689,7 @@ elseif (APPLE)
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-delimited-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-delimited-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-ngram-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-ngram-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-static>"
|
||||||
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-normalizing-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-stemming-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-stemming-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-token-masking-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-token-masking-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-format-1_0-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-format-1_0-static>"
|
||||||
|
@ -741,6 +748,7 @@ else()
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-delimited-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-delimited-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-ngram-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-ngram-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-static>"
|
||||||
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-normalizing-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-stemming-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-text-stemming-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-token-masking-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-analyzer-token-masking-static>"
|
||||||
"$<TARGET_FILE:${IResearch_TARGET_NAME}-format-1_0-static>"
|
"$<TARGET_FILE:${IResearch_TARGET_NAME}-format-1_0-static>"
|
||||||
|
@ -1021,6 +1029,129 @@ if(MSVC)
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
### analysis plugin : text token normalizing
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
add_library(${IResearch_TARGET_NAME}-analyzer-text-normalizing-shared
|
||||||
|
SHARED
|
||||||
|
./analysis/text_token_normalizing_stream.cpp
|
||||||
|
./analysis/text_token_normalizing_stream.hpp
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(${IResearch_TARGET_NAME}-analyzer-text-normalizing-static
|
||||||
|
STATIC
|
||||||
|
./analysis/text_token_normalizing_stream.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
# setup CRT
|
||||||
|
if(MSVC)
|
||||||
|
add_library(${IResearch_TARGET_NAME}-analyzer-text-normalizing-shared-scrt
|
||||||
|
SHARED
|
||||||
|
./analysis/text_token_normalizing_stream.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(${IResearch_TARGET_NAME}-analyzer-text-normalizing-static-scrt
|
||||||
|
STATIC
|
||||||
|
./analysis/text_token_normalizing_stream.cpp
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
target_include_directories(${IResearch_TARGET_NAME}-analyzer-text-normalizing-shared
|
||||||
|
PRIVATE ${ICU_INCLUDE_DIR} # cmake on MSVC does not properly expand dependencies for linked libraries
|
||||||
|
)
|
||||||
|
|
||||||
|
target_include_directories(${IResearch_TARGET_NAME}-analyzer-text-normalizing-static
|
||||||
|
PRIVATE ${ICU_INCLUDE_DIR} # cmake on MSVC does not properly expand dependencies for linked libraries
|
||||||
|
)
|
||||||
|
|
||||||
|
# setup CRT
|
||||||
|
if(MSVC)
|
||||||
|
target_include_directories(${IResearch_TARGET_NAME}-analyzer-text-normalizing-shared-scrt
|
||||||
|
PRIVATE ${ICU_INCLUDE_DIR} # cmake on MSVC does not properly expand dependencies for linked libraries
|
||||||
|
)
|
||||||
|
|
||||||
|
target_include_directories(${IResearch_TARGET_NAME}-analyzer-text-normalizing-static-scrt
|
||||||
|
PRIVATE ${ICU_INCLUDE_DIR} # cmake on MSVC does not properly expand dependencies for linked libraries
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# setup CRT
|
||||||
|
if(MSVC)
|
||||||
|
target_compile_options(${IResearch_TARGET_NAME}-analyzer-text-normalizing-shared
|
||||||
|
PRIVATE "$<$<CONFIG:Debug>:/MDd>$<$<NOT:$<CONFIG:Debug>>:/MD>"
|
||||||
|
)
|
||||||
|
|
||||||
|
target_compile_options(${IResearch_TARGET_NAME}-analyzer-text-normalizing-static
|
||||||
|
PRIVATE "$<$<CONFIG:Debug>:/MDd>$<$<NOT:$<CONFIG:Debug>>:/MD>"
|
||||||
|
)
|
||||||
|
|
||||||
|
target_compile_options(${IResearch_TARGET_NAME}-analyzer-text-normalizing-shared-scrt
|
||||||
|
PRIVATE "$<$<CONFIG:Debug>:/MTd>$<$<NOT:$<CONFIG:Debug>>:/MT>"
|
||||||
|
)
|
||||||
|
|
||||||
|
target_compile_options(${IResearch_TARGET_NAME}-analyzer-text-normalizing-static-scrt
|
||||||
|
PRIVATE "$<$<CONFIG:Debug>:/MTd>$<$<NOT:$<CONFIG:Debug>>:/MT>"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set_target_properties(${IResearch_TARGET_NAME}-analyzer-text-normalizing-shared
|
||||||
|
PROPERTIES
|
||||||
|
PREFIX lib
|
||||||
|
IMPORT_PREFIX lib
|
||||||
|
OUTPUT_NAME analyzer-text-token-normalize
|
||||||
|
DEBUG_POSTFIX "" # otherwise library names will not match expected dynamically loaded value
|
||||||
|
COMPILE_DEFINITIONS "$<$<CONFIG:Debug>:IRESEARCH_DEBUG>;IRESEARCH_DLL;IRESEARCH_DLL_EXPORTS;IRESEARCH_DLL_PLUGIN"
|
||||||
|
CXX_VISIBILITY_PRESET hidden
|
||||||
|
)
|
||||||
|
|
||||||
|
set_target_properties(${IResearch_TARGET_NAME}-analyzer-text-normalizing-static
|
||||||
|
PROPERTIES
|
||||||
|
PREFIX lib
|
||||||
|
IMPORT_PREFIX lib
|
||||||
|
OUTPUT_NAME analyzer-text-token-normalize-s
|
||||||
|
COMPILE_DEFINITIONS "$<$<CONFIG:Debug>:IRESEARCH_DEBUG>"
|
||||||
|
)
|
||||||
|
|
||||||
|
# setup CRT
|
||||||
|
if(MSVC)
|
||||||
|
set_target_properties(${IResearch_TARGET_NAME}-analyzer-text-normalizing-shared-scrt
|
||||||
|
PROPERTIES
|
||||||
|
PREFIX lib
|
||||||
|
IMPORT_PREFIX lib
|
||||||
|
OUTPUT_NAME analyzer-text-token-normalize-scrt
|
||||||
|
COMPILE_DEFINITIONS "$<$<CONFIG:Debug>:IRESEARCH_DEBUG>;IRESEARCH_DLL;IRESEARCH_DLL_EXPORTS;IRESEARCH_DLL_PLUGIN"
|
||||||
|
CXX_VISIBILITY_PRESET hidden
|
||||||
|
)
|
||||||
|
|
||||||
|
set_target_properties(${IResearch_TARGET_NAME}-analyzer-text-normalizing-static-scrt
|
||||||
|
PROPERTIES
|
||||||
|
PREFIX lib
|
||||||
|
IMPORT_PREFIX lib
|
||||||
|
OUTPUT_NAME analyzer-text-token-normalize-scrt-s
|
||||||
|
COMPILE_DEFINITIONS "$<$<CONFIG:Debug>:IRESEARCH_DEBUG>"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
target_link_libraries(${IResearch_TARGET_NAME}-analyzer-text-normalizing-shared
|
||||||
|
${IResearch_TARGET_NAME}-shared
|
||||||
|
)
|
||||||
|
|
||||||
|
target_link_libraries(${IResearch_TARGET_NAME}-analyzer-text-normalizing-static
|
||||||
|
${IResearch_TARGET_NAME}-static
|
||||||
|
)
|
||||||
|
|
||||||
|
# setup CRT
|
||||||
|
if(MSVC)
|
||||||
|
target_link_libraries(${IResearch_TARGET_NAME}-analyzer-text-normalizing-shared-scrt
|
||||||
|
${IResearch_TARGET_NAME}-shared-scrt
|
||||||
|
)
|
||||||
|
|
||||||
|
target_link_libraries(${IResearch_TARGET_NAME}-analyzer-text-normalizing-static-scrt
|
||||||
|
${IResearch_TARGET_NAME}-static-scrt
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
### analysis plugin : text token stemming
|
### analysis plugin : text token stemming
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#ifndef IRESEARCH_DLL
|
#ifndef IRESEARCH_DLL
|
||||||
#include "delimited_token_stream.hpp"
|
#include "delimited_token_stream.hpp"
|
||||||
#include "ngram_token_stream.hpp"
|
#include "ngram_token_stream.hpp"
|
||||||
|
#include "text_token_normalizing_stream.hpp"
|
||||||
#include "text_token_stemming_stream.hpp"
|
#include "text_token_stemming_stream.hpp"
|
||||||
#include "text_token_stream.hpp"
|
#include "text_token_stream.hpp"
|
||||||
#include "token_masking_stream.hpp"
|
#include "token_masking_stream.hpp"
|
||||||
|
@ -94,19 +95,22 @@ NS_BEGIN(analysis)
|
||||||
|
|
||||||
/*static*/ bool analyzers::exists(
|
/*static*/ bool analyzers::exists(
|
||||||
const string_ref& name,
|
const string_ref& name,
|
||||||
const irs::text_format::type_id& args_format
|
const irs::text_format::type_id& args_format,
|
||||||
|
bool load_library /*= true*/
|
||||||
) {
|
) {
|
||||||
return nullptr != analyzer_register::instance().get(entry_key_t(name, args_format));
|
return nullptr != analyzer_register::instance().get(entry_key_t(name, args_format), load_library);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*static*/ analyzer::ptr analyzers::get(
|
/*static*/ analyzer::ptr analyzers::get(
|
||||||
const string_ref& name,
|
const string_ref& name,
|
||||||
const irs::text_format::type_id& args_format,
|
const irs::text_format::type_id& args_format,
|
||||||
const string_ref& args
|
const string_ref& args,
|
||||||
|
bool load_library /*= true*/
|
||||||
) NOEXCEPT {
|
) NOEXCEPT {
|
||||||
try {
|
try {
|
||||||
auto* factory =
|
auto* factory = analyzer_register::instance().get(
|
||||||
analyzer_register::instance().get(entry_key_t(name, args_format));
|
entry_key_t(name, args_format), load_library
|
||||||
|
);
|
||||||
|
|
||||||
return factory ? factory(args) : nullptr;
|
return factory ? factory(args) : nullptr;
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
|
@ -121,6 +125,7 @@ NS_BEGIN(analysis)
|
||||||
#ifndef IRESEARCH_DLL
|
#ifndef IRESEARCH_DLL
|
||||||
irs::analysis::delimited_token_stream::init();
|
irs::analysis::delimited_token_stream::init();
|
||||||
irs::analysis::ngram_token_stream::init();
|
irs::analysis::ngram_token_stream::init();
|
||||||
|
irs::analysis::text_token_normalizing_stream::init();
|
||||||
irs::analysis::text_token_stemming_stream::init();
|
irs::analysis::text_token_stemming_stream::init();
|
||||||
irs::analysis::text_token_stream::init();
|
irs::analysis::text_token_stream::init();
|
||||||
irs::analysis::token_masking_stream::init();
|
irs::analysis::token_masking_stream::init();
|
||||||
|
|
|
@ -78,7 +78,9 @@ class IRESEARCH_API analyzers {
|
||||||
/// @brief checks whether an analyzer with the specified name is registered
|
/// @brief checks whether an analyzer with the specified name is registered
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
static bool exists(
|
static bool exists(
|
||||||
const string_ref& name, const irs::text_format::type_id& args_format
|
const string_ref& name,
|
||||||
|
const irs::text_format::type_id& args_format,
|
||||||
|
bool load_library = true
|
||||||
);
|
);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -90,7 +92,8 @@ class IRESEARCH_API analyzers {
|
||||||
static analyzer::ptr get(
|
static analyzer::ptr get(
|
||||||
const string_ref& name,
|
const string_ref& name,
|
||||||
const irs::text_format::type_id& args_format,
|
const irs::text_format::type_id& args_format,
|
||||||
const string_ref& args
|
const string_ref& args,
|
||||||
|
bool load_library = true
|
||||||
) NOEXCEPT;
|
) NOEXCEPT;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
@ -0,0 +1,355 @@
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// DISCLAIMER
|
||||||
|
///
|
||||||
|
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
|
||||||
|
///
|
||||||
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
/// you may not use this file except in compliance with the License.
|
||||||
|
/// You may obtain a copy of the License at
|
||||||
|
///
|
||||||
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
///
|
||||||
|
/// Unless required by applicable law or agreed to in writing, software
|
||||||
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
/// See the License for the specific language governing permissions and
|
||||||
|
/// limitations under the License.
|
||||||
|
///
|
||||||
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||||
|
///
|
||||||
|
/// @author Andrey Abramov
|
||||||
|
/// @author Vasiliy Nabatchikov
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#include <rapidjson/rapidjson/document.h> // for rapidjson::Document
|
||||||
|
#include <unicode/locid.h> // for icu::Locale
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#pragma warning(disable: 4512)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <unicode/normalizer2.h> // for icu::Normalizer2
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#pragma warning(default: 4512)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <unicode/translit.h> // for icu::Transliterator
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#pragma warning(disable: 4229)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <unicode/uclean.h> // for u_cleanup
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#pragma warning(default: 4229)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "utils/locale_utils.hpp"
|
||||||
|
|
||||||
|
#include "text_token_normalizing_stream.hpp"
|
||||||
|
|
||||||
|
NS_ROOT
|
||||||
|
NS_BEGIN(analysis)
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
// --SECTION-- private types
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
struct text_token_normalizing_stream::state_t {
|
||||||
|
icu::UnicodeString data;
|
||||||
|
icu::Locale icu_locale;
|
||||||
|
std::locale locale;
|
||||||
|
std::shared_ptr<const icu::Normalizer2> normalizer;
|
||||||
|
const options_t options;
|
||||||
|
std::string term_buf; // used by reset()
|
||||||
|
std::shared_ptr<icu::Transliterator> transliterator;
|
||||||
|
state_t(const options_t& opts): icu_locale("C"), options(opts) {
|
||||||
|
// NOTE: use of the default constructor for Locale() or
|
||||||
|
// use of Locale::createFromName(nullptr)
|
||||||
|
// causes a memory leak with Boost 1.58, as detected by valgrind
|
||||||
|
icu_locale.setToBogus(); // set to uninitialized
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
NS_END // analysis
|
||||||
|
NS_END // ROOT
|
||||||
|
|
||||||
|
NS_LOCAL
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief args is a jSON encoded object with the following attributes:
|
||||||
|
/// "locale"(string): the locale to use for stemming <required>
|
||||||
|
/// "case_convert"(string enum): modify token case using "locale"
|
||||||
|
/// "no_accent"(bool): remove accents
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
irs::analysis::analyzer::ptr make_json(const irs::string_ref& args) {
|
||||||
|
rapidjson::Document json;
|
||||||
|
|
||||||
|
if (json.Parse(args.c_str(), args.size()).HasParseError()) {
|
||||||
|
IR_FRMT_ERROR(
|
||||||
|
"Invalid jSON arguments passed while constructing text_token_normalizing_stream, arguments: %s",
|
||||||
|
args.c_str()
|
||||||
|
);
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
typedef irs::analysis::text_token_normalizing_stream::options_t options_t;
|
||||||
|
options_t options;
|
||||||
|
|
||||||
|
switch (json.GetType()) {
|
||||||
|
case rapidjson::kStringType:
|
||||||
|
options.locale = json.GetString(); // required
|
||||||
|
|
||||||
|
return irs::memory::make_shared<irs::analysis::text_token_normalizing_stream>(
|
||||||
|
std::move(options)
|
||||||
|
);
|
||||||
|
case rapidjson::kObjectType:
|
||||||
|
if (json.HasMember("locale") && json["locale"].IsString()) {
|
||||||
|
options.locale = json["locale"].GetString(); // required
|
||||||
|
|
||||||
|
if (json.HasMember("case_convert")) {
|
||||||
|
auto& case_convert = json["case_convert"]; // optional string enum
|
||||||
|
|
||||||
|
if (!case_convert.IsString()) {
|
||||||
|
IR_FRMT_WARN("Non-string value in 'case_convert' while constructing text_token_normalizing_stream from jSON arguments: %s", args.c_str());
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const std::unordered_map<std::string, options_t::case_convert_t> case_convert_map = {
|
||||||
|
{ "lower", options_t::case_convert_t::LOWER },
|
||||||
|
{ "none", options_t::case_convert_t::NONE },
|
||||||
|
{ "upper", options_t::case_convert_t::UPPER },
|
||||||
|
};
|
||||||
|
auto itr = case_convert_map.find(case_convert.GetString());
|
||||||
|
|
||||||
|
if (itr == case_convert_map.end()) {
|
||||||
|
IR_FRMT_WARN("Invalid value in 'case_convert' while constructing text_token_normalizing_stream from jSON arguments: %s", args.c_str());
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
options.case_convert = itr->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (json.HasMember("no_accent")) {
|
||||||
|
auto& no_accent = json["no_accent"]; // optional bool
|
||||||
|
|
||||||
|
if (!no_accent.IsBool()) {
|
||||||
|
IR_FRMT_WARN("Non-boolean value in 'no_accent' while constructing text_token_normalizing_stream from jSON arguments: %s", args.c_str());
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
options.no_accent = no_accent.GetBool();
|
||||||
|
}
|
||||||
|
|
||||||
|
return irs::memory::make_shared<irs::analysis::text_token_normalizing_stream>(
|
||||||
|
std::move(options)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
default: // fall through
|
||||||
|
IR_FRMT_ERROR(
|
||||||
|
"Missing 'locale' while constructing text_token_normalizing_stream from jSON arguments: %s",
|
||||||
|
args.c_str()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} catch (...) {
|
||||||
|
IR_FRMT_ERROR(
|
||||||
|
"Caught error while constructing text_token_normalizing_stream from jSON arguments: %s",
|
||||||
|
args.c_str()
|
||||||
|
);
|
||||||
|
IR_LOG_EXCEPTION();
|
||||||
|
}
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief args is a language to use for normalizing
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
irs::analysis::analyzer::ptr make_text(const irs::string_ref& args) {
|
||||||
|
try {
|
||||||
|
irs::analysis::text_token_normalizing_stream::options_t options;
|
||||||
|
|
||||||
|
options.locale = args; // interpret 'args' as a locale name
|
||||||
|
|
||||||
|
return irs::memory::make_shared<irs::analysis::text_token_normalizing_stream>(
|
||||||
|
std::move(options)
|
||||||
|
);
|
||||||
|
} catch (...) {
|
||||||
|
IR_FRMT_ERROR(
|
||||||
|
"Caught error while constructing text_token_normalizing_stream TEXT arguments: %s",
|
||||||
|
args.c_str()
|
||||||
|
);
|
||||||
|
IR_LOG_EXCEPTION();
|
||||||
|
}
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
REGISTER_ANALYZER_JSON(irs::analysis::text_token_normalizing_stream, make_json);
|
||||||
|
REGISTER_ANALYZER_TEXT(irs::analysis::text_token_normalizing_stream, make_text);
|
||||||
|
|
||||||
|
NS_END
|
||||||
|
|
||||||
|
NS_ROOT
|
||||||
|
NS_BEGIN(analysis)
|
||||||
|
|
||||||
|
DEFINE_ANALYZER_TYPE_NAMED(text_token_normalizing_stream, "text-token-normalize")
|
||||||
|
|
||||||
|
text_token_normalizing_stream::text_token_normalizing_stream(
|
||||||
|
const options_t& options
|
||||||
|
): analyzer(text_token_normalizing_stream::type()),
|
||||||
|
attrs_(4), // increment + offset + payload + term
|
||||||
|
state_(memory::make_unique<state_t>(options)),
|
||||||
|
term_eof_(true) {
|
||||||
|
attrs_.emplace(inc_);
|
||||||
|
attrs_.emplace(offset_);
|
||||||
|
attrs_.emplace(payload_);
|
||||||
|
attrs_.emplace(term_);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*static*/ void text_token_normalizing_stream::init() {
|
||||||
|
REGISTER_ANALYZER_JSON(text_token_normalizing_stream, make_json); // match registration above
|
||||||
|
REGISTER_ANALYZER_TEXT(text_token_normalizing_stream, make_text); // match registration above
|
||||||
|
}
|
||||||
|
|
||||||
|
/*static*/ analyzer::ptr text_token_normalizing_stream::make(
|
||||||
|
const string_ref& locale
|
||||||
|
) {
|
||||||
|
return make_text(locale);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool text_token_normalizing_stream::next() {
|
||||||
|
if (term_eof_) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
term_eof_ = true;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool text_token_normalizing_stream::reset(const irs::string_ref& data) {
|
||||||
|
if (state_->icu_locale.isBogus()) {
|
||||||
|
state_->locale = irs::locale_utils::locale(
|
||||||
|
state_->options.locale, irs::string_ref::NIL, true // true == convert to unicode, required for ICU and Snowball
|
||||||
|
);
|
||||||
|
state_->icu_locale = icu::Locale(
|
||||||
|
std::string(irs::locale_utils::language(state_->locale)).c_str(),
|
||||||
|
std::string(irs::locale_utils::country(state_->locale)).c_str()
|
||||||
|
);
|
||||||
|
|
||||||
|
if (state_->icu_locale.isBogus()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto err = UErrorCode::U_ZERO_ERROR; // a value that passes the U_SUCCESS() test
|
||||||
|
|
||||||
|
if (!state_->normalizer) {
|
||||||
|
// reusable object owned by ICU
|
||||||
|
state_->normalizer.reset(
|
||||||
|
icu::Normalizer2::getNFCInstance(err), [](const icu::Normalizer2*)->void{}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!U_SUCCESS(err) || !state_->normalizer) {
|
||||||
|
state_->normalizer.reset();
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state_->options.no_accent && !state_->transliterator) {
|
||||||
|
// transliteration rule taken verbatim from: http://userguide.icu-project.org/transforms/general
|
||||||
|
icu::UnicodeString collationRule("NFD; [:Nonspacing Mark:] Remove; NFC"); // do not allocate statically since it causes memory leaks in ICU
|
||||||
|
|
||||||
|
// reusable object owned by *this
|
||||||
|
state_->transliterator.reset(icu::Transliterator::createInstance(
|
||||||
|
collationRule, UTransDirection::UTRANS_FORWARD, err
|
||||||
|
));
|
||||||
|
|
||||||
|
if (!U_SUCCESS(err) || !state_->transliterator) {
|
||||||
|
state_->transliterator.reset();
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ...........................................................................
|
||||||
|
// convert encoding to UTF8 for use with ICU
|
||||||
|
// ...........................................................................
|
||||||
|
std::string data_utf8;
|
||||||
|
|
||||||
|
// valid conversion since 'locale_' was created with internal unicode encoding
|
||||||
|
if (!irs::locale_utils::append_internal(data_utf8, data, state_->locale)) {
|
||||||
|
return false; // UTF8 conversion failure
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data_utf8.size() > irs::integer_traits<int32_t>::const_max) {
|
||||||
|
return false; // ICU UnicodeString signatures can handle at most INT32_MAX
|
||||||
|
}
|
||||||
|
|
||||||
|
state_->data = icu::UnicodeString::fromUTF8(
|
||||||
|
icu::StringPiece(data_utf8.c_str(), (int32_t)(data_utf8.size()))
|
||||||
|
);
|
||||||
|
|
||||||
|
// ...........................................................................
|
||||||
|
// normalize unicode
|
||||||
|
// ...........................................................................
|
||||||
|
icu::UnicodeString term_icu;
|
||||||
|
|
||||||
|
state_->normalizer->normalize(state_->data, term_icu, err);
|
||||||
|
|
||||||
|
if (!U_SUCCESS(err)) {
|
||||||
|
term_icu = state_->data; // use non-normalized value if normalization failure
|
||||||
|
}
|
||||||
|
|
||||||
|
// ...........................................................................
|
||||||
|
// case-convert unicode
|
||||||
|
// ...........................................................................
|
||||||
|
switch (state_->options.case_convert) {
|
||||||
|
case options_t::case_convert_t::LOWER:
|
||||||
|
term_icu.toLower(state_->icu_locale); // inplace case-conversion
|
||||||
|
break;
|
||||||
|
case options_t::case_convert_t::UPPER:
|
||||||
|
term_icu.toUpper(state_->icu_locale); // inplace case-conversion
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
{} // NOOP
|
||||||
|
};
|
||||||
|
|
||||||
|
// ...........................................................................
|
||||||
|
// collate value, e.g. remove accents
|
||||||
|
// ...........................................................................
|
||||||
|
if (state_->transliterator) {
|
||||||
|
state_->transliterator->transliterate(term_icu); // inplace translitiration
|
||||||
|
}
|
||||||
|
|
||||||
|
state_->term_buf.clear();
|
||||||
|
term_icu.toUTF8String(state_->term_buf);
|
||||||
|
|
||||||
|
// ...........................................................................
|
||||||
|
// use the normalized value
|
||||||
|
// ...........................................................................
|
||||||
|
static_assert(sizeof(irs::byte_type) == sizeof(char), "sizeof(irs::byte_type) != sizeof(char)");
|
||||||
|
term_.value(irs::ref_cast<irs::byte_type>(irs::string_ref(state_->term_buf)));
|
||||||
|
offset_.start = 0;
|
||||||
|
offset_.end = data.size();
|
||||||
|
payload_.value = ref_cast<uint8_t>(data);
|
||||||
|
term_eof_ = false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
NS_END // analysis
|
||||||
|
NS_END // ROOT
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
// --SECTION-- END-OF-FILE
|
||||||
|
// -----------------------------------------------------------------------------
|
|
@ -0,0 +1,80 @@
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// DISCLAIMER
|
||||||
|
///
|
||||||
|
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
|
||||||
|
///
|
||||||
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
/// you may not use this file except in compliance with the License.
|
||||||
|
/// You may obtain a copy of the License at
|
||||||
|
///
|
||||||
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
///
|
||||||
|
/// Unless required by applicable law or agreed to in writing, software
|
||||||
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
/// See the License for the specific language governing permissions and
|
||||||
|
/// limitations under the License.
|
||||||
|
///
|
||||||
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||||
|
///
|
||||||
|
/// @author Andrey Abramov
|
||||||
|
/// @author Vasiliy Nabatchikov
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#ifndef IRESEARCH_TEXT_TOKEN_NORMALIZING_STREAM_H
|
||||||
|
#define IRESEARCH_TEXT_TOKEN_NORMALIZING_STREAM_H
|
||||||
|
|
||||||
|
#include "analyzers.hpp"
|
||||||
|
#include "token_attributes.hpp"
|
||||||
|
|
||||||
|
NS_ROOT
|
||||||
|
NS_BEGIN(analysis)
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief an analyser capable of normalizing the text, treated as a single
|
||||||
|
/// token, i.e. case conversion and accent removal
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
class text_token_normalizing_stream: public analyzer, util::noncopyable {
|
||||||
|
public:
|
||||||
|
struct options_t {
|
||||||
|
enum case_convert_t { LOWER, NONE, UPPER };
|
||||||
|
case_convert_t case_convert{case_convert_t::NONE}; // no extra normalization
|
||||||
|
std::string locale;
|
||||||
|
bool no_accent{false}; // no extra normalization
|
||||||
|
};
|
||||||
|
|
||||||
|
struct state_t;
|
||||||
|
|
||||||
|
DECLARE_ANALYZER_TYPE();
|
||||||
|
|
||||||
|
// for use with irs::order::add<T>() and default args (static build)
|
||||||
|
DECLARE_FACTORY(const string_ref& locale);
|
||||||
|
|
||||||
|
text_token_normalizing_stream(const options_t& options);
|
||||||
|
virtual const irs::attribute_view& attributes() const NOEXCEPT override {
|
||||||
|
return attrs_;
|
||||||
|
}
|
||||||
|
static void init(); // for trigering registration in a static build
|
||||||
|
virtual bool next() override;
|
||||||
|
virtual bool reset(const irs::string_ref& data) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
class term_attribute final: public irs::term_attribute {
|
||||||
|
public:
|
||||||
|
using irs::term_attribute::value;
|
||||||
|
void value(const irs::bytes_ref& value) { value_ = value; }
|
||||||
|
};
|
||||||
|
|
||||||
|
irs::attribute_view attrs_;
|
||||||
|
irs::increment inc_;
|
||||||
|
irs::offset offset_;
|
||||||
|
irs::payload payload_; // raw token value
|
||||||
|
std::shared_ptr<state_t> state_;
|
||||||
|
term_attribute term_; // token value with evaluated quotes
|
||||||
|
bool term_eof_;
|
||||||
|
};
|
||||||
|
|
||||||
|
NS_END // analysis
|
||||||
|
NS_END // ROOT
|
||||||
|
|
||||||
|
#endif
|
|
@ -90,13 +90,19 @@ DEFINE_ATTRIBUTE_TYPE(iresearch::term_meta)
|
||||||
meta.segments_ = std::move(segments);
|
meta.segments_ = std::move(segments);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*static*/ bool formats::exists(const string_ref& name) {
|
/*static*/ bool formats::exists(
|
||||||
return nullptr != format_register::instance().get(name);
|
const string_ref& name,
|
||||||
|
bool load_library /*= true*/
|
||||||
|
) {
|
||||||
|
return nullptr != format_register::instance().get(name, load_library);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*static*/ format::ptr formats::get(const string_ref& name) NOEXCEPT {
|
/*static*/ format::ptr formats::get(
|
||||||
|
const string_ref& name,
|
||||||
|
bool load_library /*= true*/
|
||||||
|
) NOEXCEPT {
|
||||||
try {
|
try {
|
||||||
auto* factory = format_register::instance().get(name);
|
auto* factory = format_register::instance().get(name, load_library);
|
||||||
|
|
||||||
return factory ? factory() : nullptr;
|
return factory ? factory() : nullptr;
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
|
|
|
@ -525,7 +525,7 @@ class IRESEARCH_API formats {
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief checks whether a format with the specified name is registered
|
/// @brief checks whether a format with the specified name is registered
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
static bool exists(const string_ref& name);
|
static bool exists(const string_ref& name, bool load_library = true);
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief find a format by name, or nullptr if not found
|
/// @brief find a format by name, or nullptr if not found
|
||||||
|
@ -533,7 +533,10 @@ class IRESEARCH_API formats {
|
||||||
/// requires use of DECLARE_FACTORY() in class definition
|
/// requires use of DECLARE_FACTORY() in class definition
|
||||||
/// NOTE: make(...) MUST be defined in CPP to ensire proper code scope
|
/// NOTE: make(...) MUST be defined in CPP to ensire proper code scope
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
static format::ptr get(const string_ref& name) NOEXCEPT;
|
static format::ptr get(
|
||||||
|
const string_ref& name,
|
||||||
|
bool load_library = true
|
||||||
|
) NOEXCEPT;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief for static lib reference all known formats in lib
|
/// @brief for static lib reference all known formats in lib
|
||||||
|
|
|
@ -89,19 +89,22 @@ NS_ROOT
|
||||||
|
|
||||||
/*static*/ bool scorers::exists(
|
/*static*/ bool scorers::exists(
|
||||||
const string_ref& name,
|
const string_ref& name,
|
||||||
const irs::text_format::type_id& args_format
|
const irs::text_format::type_id& args_format,
|
||||||
|
bool load_library /*= true*/
|
||||||
) {
|
) {
|
||||||
return nullptr != scorer_register::instance().get(entry_key_t(name, args_format));
|
return nullptr != scorer_register::instance().get(entry_key_t(name, args_format),load_library);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*static*/ sort::ptr scorers::get(
|
/*static*/ sort::ptr scorers::get(
|
||||||
const string_ref& name,
|
const string_ref& name,
|
||||||
const irs::text_format::type_id& args_format,
|
const irs::text_format::type_id& args_format,
|
||||||
const string_ref& args
|
const string_ref& args,
|
||||||
|
bool load_library /*= true*/
|
||||||
) NOEXCEPT {
|
) NOEXCEPT {
|
||||||
try {
|
try {
|
||||||
auto* factory =
|
auto* factory = scorer_register::instance().get(
|
||||||
scorer_register::instance().get(entry_key_t(name, args_format));
|
entry_key_t(name, args_format), load_library
|
||||||
|
);
|
||||||
|
|
||||||
return factory ? factory(args) : nullptr;
|
return factory ? factory(args) : nullptr;
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
|
|
|
@ -77,7 +77,9 @@ class IRESEARCH_API scorers {
|
||||||
/// @brief checks whether scorer with a specified name is registered
|
/// @brief checks whether scorer with a specified name is registered
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
static bool exists(
|
static bool exists(
|
||||||
const string_ref& name, const irs::text_format::type_id& args_format
|
const string_ref& name,
|
||||||
|
const irs::text_format::type_id& args_format,
|
||||||
|
bool load_library = true
|
||||||
);
|
);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -89,7 +91,8 @@ class IRESEARCH_API scorers {
|
||||||
static sort::ptr get(
|
static sort::ptr get(
|
||||||
const string_ref& name,
|
const string_ref& name,
|
||||||
const irs::text_format::type_id& args_format,
|
const irs::text_format::type_id& args_format,
|
||||||
const string_ref& args
|
const string_ref& args,
|
||||||
|
bool load_library = true
|
||||||
) NOEXCEPT;
|
) NOEXCEPT;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
@ -44,15 +44,19 @@ NS_ROOT
|
||||||
// --SECTION-- attribute::type_id
|
// --SECTION-- attribute::type_id
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
/*static*/ bool attribute::type_id::exists(const string_ref& name) {
|
/*static*/ bool attribute::type_id::exists(
|
||||||
return nullptr != attribute_register::instance().get(name);
|
const string_ref& name,
|
||||||
|
bool load_library /*= true*/
|
||||||
|
) {
|
||||||
|
return nullptr != attribute_register::instance().get(name, load_library);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*static*/ const attribute::type_id* attribute::type_id::get(
|
/*static*/ const attribute::type_id* attribute::type_id::get(
|
||||||
const string_ref& name
|
const string_ref& name,
|
||||||
|
bool load_library /*= true*/
|
||||||
) NOEXCEPT {
|
) NOEXCEPT {
|
||||||
try {
|
try {
|
||||||
return attribute_register::instance().get(name);
|
return attribute_register::instance().get(name, load_library);
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
IR_FRMT_ERROR("Caught exception while getting an attribute instance");
|
IR_FRMT_ERROR("Caught exception while getting an attribute instance");
|
||||||
IR_LOG_EXCEPTION();
|
IR_LOG_EXCEPTION();
|
||||||
|
|
|
@ -56,8 +56,11 @@ struct IRESEARCH_API attribute {
|
||||||
public:
|
public:
|
||||||
type_id(const string_ref& name): name_(name) {}
|
type_id(const string_ref& name): name_(name) {}
|
||||||
operator const type_id*() const { return this; }
|
operator const type_id*() const { return this; }
|
||||||
static bool exists(const string_ref& name);
|
static bool exists(const string_ref& name, bool load_library = true);
|
||||||
static const type_id* get(const string_ref& name) NOEXCEPT;
|
static const type_id* get(
|
||||||
|
const string_ref& name,
|
||||||
|
bool load_library = true
|
||||||
|
) NOEXCEPT;
|
||||||
const string_ref& name() const { return name_; }
|
const string_ref& name() const { return name_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -74,9 +74,20 @@ class generic_register: public singleton<RegisterType> {
|
||||||
return std::make_pair(itr.first->second, itr.second);
|
return std::make_pair(itr.first->second, itr.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
entry_type get(const key_type& key) const {
|
entry_type get(const key_type& key, bool load_library) const {
|
||||||
const entry_type* entry = lookup(key);
|
const entry_type* entry = lookup(key);
|
||||||
return entry ? *entry : load_entry_from_so(key);
|
|
||||||
|
if (entry) {
|
||||||
|
return *entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (load_library) {
|
||||||
|
return load_entry_from_so(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
IR_FRMT_ERROR("%s : key not found", __FUNCTION__);
|
||||||
|
|
||||||
|
return entry_type();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool visit(const visitor_t& visitor) {
|
bool visit(const visitor_t& visitor) {
|
||||||
|
|
|
@ -682,7 +682,9 @@ IResearchAnalyzerFeature::AnalyzerPool::Builder::make(irs::string_ref const& typ
|
||||||
|
|
||||||
// ArangoDB, for API consistency, only supports analyzers configurable via
|
// ArangoDB, for API consistency, only supports analyzers configurable via
|
||||||
// jSON
|
// jSON
|
||||||
return irs::analysis::analyzers::get(type, irs::text_format::json, properties);
|
return irs::analysis::analyzers::get( // get analyzer
|
||||||
|
type, irs::text_format::json, properties, false // args
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
IResearchAnalyzerFeature::AnalyzerPool::AnalyzerPool(irs::string_ref const& name)
|
IResearchAnalyzerFeature::AnalyzerPool::AnalyzerPool(irs::string_ref const& name)
|
||||||
|
|
|
@ -90,11 +90,13 @@ bool makeScorer(irs::sort::ptr& scorer, irs::string_ref const& name,
|
||||||
break;
|
break;
|
||||||
case 1: {
|
case 1: {
|
||||||
// ArangoDB, for API consistency, only supports scorers configurable via jSON
|
// ArangoDB, for API consistency, only supports scorers configurable via jSON
|
||||||
scorer = irs::scorers::get(name, irs::text_format::json, irs::string_ref::NIL);
|
scorer = irs::scorers::get( // get scorer
|
||||||
|
name, irs::text_format::json, irs::string_ref::NIL, false // args
|
||||||
|
);
|
||||||
|
|
||||||
if (!scorer) {
|
if (!scorer) {
|
||||||
// ArangoDB, for API consistency, only supports scorers configurable via jSON
|
// ArangoDB, for API consistency, only supports scorers configurable via jSON
|
||||||
scorer = irs::scorers::get(name, irs::text_format::json, "[]"); // pass arg as json array
|
scorer = irs::scorers::get(name, irs::text_format::json, "[]", false); // pass arg as json array
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
default: { // fall through
|
default: { // fall through
|
||||||
|
@ -123,7 +125,9 @@ bool makeScorer(irs::sort::ptr& scorer, irs::string_ref const& name,
|
||||||
builder.close();
|
builder.close();
|
||||||
|
|
||||||
// ArangoDB, for API consistency, only supports scorers configurable via jSON
|
// ArangoDB, for API consistency, only supports scorers configurable via jSON
|
||||||
scorer = irs::scorers::get(name, irs::text_format::json, builder.toJson()); // pass arg as json
|
scorer = irs::scorers::get( // get scorer
|
||||||
|
name, irs::text_format::json, builder.toJson(), false // pass arg as json
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,7 +147,7 @@ bool fromFCall(irs::sort::ptr* scorer, irs::string_ref const& scorerName,
|
||||||
if (!scorer) {
|
if (!scorer) {
|
||||||
// cheap shallow check
|
// cheap shallow check
|
||||||
// ArangoDB, for API consistency, only supports scorers configurable via jSON
|
// ArangoDB, for API consistency, only supports scorers configurable via jSON
|
||||||
return irs::scorers::exists(scorerName, irs::text_format::json);
|
return irs::scorers::exists(scorerName, irs::text_format::json, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// we don't support non-constant arguments for scorers now, if it
|
// we don't support non-constant arguments for scorers now, if it
|
||||||
|
@ -330,12 +334,14 @@ void ScorerReplacer::extract(aql::Variable const& var, std::vector<Scorer>& scor
|
||||||
if (!comparer) {
|
if (!comparer) {
|
||||||
// cheap shallow check
|
// cheap shallow check
|
||||||
// ArangoDB, for API consistency, only supports scorers configurable via jSON
|
// ArangoDB, for API consistency, only supports scorers configurable via jSON
|
||||||
return irs::scorers::exists(scorerName, irs::text_format::json);
|
return irs::scorers::exists(scorerName, irs::text_format::json, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// create scorer with default arguments
|
// create scorer with default arguments
|
||||||
// ArangoDB, for API consistency, only supports scorers configurable via jSON
|
// ArangoDB, for API consistency, only supports scorers configurable via jSON
|
||||||
*comparer = irs::scorers::get(scorerName, irs::text_format::json, irs::string_ref::NIL);
|
*comparer = irs::scorers::get( // get scorer
|
||||||
|
scorerName, irs::text_format::json, irs::string_ref::NIL, false // args
|
||||||
|
);
|
||||||
|
|
||||||
return bool(*comparer);
|
return bool(*comparer);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue