1
0
Fork 0
arangodb/3rdParty/iresearch/tests/index/index_tests.hpp

467 lines
13 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2016 by EMC Corporation, All Rights Reserved
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is EMC Corporation
///
/// @author Andrey Abramov
/// @author Vasiliy Nabatchikov
////////////////////////////////////////////////////////////////////////////////
#ifndef IRESEARCH_INDEX_TESTS_H
#define IRESEARCH_INDEX_TESTS_H
#include "tests_shared.hpp"
#include "tests_param.hpp"
#include "assert_format.hpp"
#include "analysis/analyzers.hpp"
#include "analysis/token_streams.hpp"
#include "analysis/token_attributes.hpp"
#include "index/directory_reader.hpp"
#include "index/index_writer.hpp"
#include "doc_generator.hpp"
#include "utils/locale_utils.hpp"
#include "utils/timer_utils.hpp"
NS_ROOT
struct term_attribute;
NS_END // NS_ROOT
NS_BEGIN(tests)
class directory_mock: public irs::directory {
public:
directory_mock(irs::directory& impl): impl_(impl) {}
using directory::attributes;
virtual irs::attribute_store& attributes() NOEXCEPT override {
return impl_.attributes();
}
virtual irs::index_output::ptr create(
const std::string& name
) NOEXCEPT override {
return impl_.create(name);
}
virtual bool exists(
bool& result, const std::string& name
) const NOEXCEPT override {
return impl_.exists(result, name);
}
virtual bool length(
uint64_t& result, const std::string& name
) const NOEXCEPT override {
return impl_.length(result, name);
}
virtual irs::index_lock::ptr make_lock(
const std::string& name
) NOEXCEPT override {
return impl_.make_lock(name);
}
virtual bool mtime(
std::time_t& result, const std::string& name
) const NOEXCEPT override {
return impl_.mtime(result, name);
}
virtual irs::index_input::ptr open(
const std::string& name,
irs::IOAdvice advice
) const NOEXCEPT override {
return impl_.open(name, advice);
}
virtual bool remove(const std::string& name) NOEXCEPT override {
return impl_.remove(name);
}
virtual bool rename(
const std::string& src, const std::string& dst
) NOEXCEPT override {
return impl_.rename(src, dst);
}
virtual bool sync(const std::string& name) NOEXCEPT override {
return impl_.sync(name);
}
virtual bool visit(const irs::directory::visitor_f& visitor) const override {
return impl_.visit(visitor);
}
private:
irs::directory& impl_;
}; // directory_mock
struct blocking_directory : directory_mock {
explicit blocking_directory(irs::directory& impl, const std::string& blocker)
: tests::directory_mock(impl), blocker(blocker) {
}
irs::index_output::ptr create(const std::string& name) NOEXCEPT {
auto stream = tests::directory_mock::create(name);
if (name == blocker) {
{
SCOPED_LOCK_NAMED(policy_lock, guard);
policy_applied.notify_all();
}
// wait for intermediate commits to be applied
SCOPED_LOCK_NAMED(intermediate_commits_lock, guard);
}
return stream;
}
void wait_for_blocker() {
bool has = false;
exists(has, blocker);
while (!has) {
exists(has, blocker);
SCOPED_LOCK_NAMED(policy_lock, policy_guard);
policy_applied.wait_for(policy_guard, std::chrono::milliseconds(1000));
}
}
std::string blocker;
std::mutex policy_lock;
std::condition_variable policy_applied;
std::mutex intermediate_commits_lock;
}; // blocking_directory
typedef std::tuple<dir_factory_f, const char*> index_test_context;
std::string to_string(const testing::TestParamInfo<index_test_context>& info);
class index_test_base : public virtual test_param_base<index_test_context> {
protected:
std::shared_ptr<irs::directory> get_directory(const test_base& ctx) const {
dir_factory_f factory;
std::tie(factory, std::ignore) = GetParam();
return (*factory)(&ctx).first;
}
irs::format::ptr get_codec() const {
const char* codec_name;
std::tie(std::ignore, codec_name) = GetParam();
return irs::formats::get(codec_name);
}
irs::directory& dir() const { return *dir_; }
irs::format::ptr codec() { return codec_; }
const index_t& index() const { return index_; }
index_t& index() { return index_; }
void sort(const irs::comparer& comparator) {
for (auto& segment : index_) {
segment.sort(comparator);
}
}
irs::index_writer::ptr open_writer(
irs::directory& dir,
irs::OpenMode mode = irs::OM_CREATE,
const irs::index_writer::init_options& options = {}) {
return irs::index_writer::make(dir, codec_, mode, options);
}
irs::index_writer::ptr open_writer(
irs::OpenMode mode = irs::OM_CREATE,
const irs::index_writer::init_options& options = {}) {
return irs::index_writer::make(*dir_, codec_, mode, options);
}
irs::directory_reader open_reader() {
return irs::directory_reader::open(*dir_, codec_);
}
void assert_index(const irs::flags& features, size_t skip = 0) const {
tests::assert_index(dir(), codec_, index(), features, skip);
}
virtual void SetUp() {
test_base::SetUp();
MSVC_ONLY(_setmaxstdio(2048)); // workaround for error: EMFILE - Too many open files
// set directory
dir_ = get_directory(*this);
ASSERT_NE(nullptr, dir_);
// set codec
codec_ = get_codec();
ASSERT_NE(nullptr, codec_);
}
virtual void TearDown() {
dir_ = nullptr;
codec_ = nullptr;
test_base::TearDown();
iresearch::timer_utils::init_stats(); // disable profile state tracking
}
void write_segment(
irs::index_writer& writer,
tests::index_segment& segment,
tests::doc_generator_base& gen
) {
// add segment
const document* src;
while ((src = gen.next())) {
segment.add(
src->indexed.begin(),
src->indexed.end(),
src->sorted
);
ASSERT_TRUE(insert(
writer,
src->indexed.begin(), src->indexed.end(),
src->stored.begin(), src->stored.end(),
src->sorted
));
}
if (writer.comparator()) {
segment.sort(*writer.comparator());
}
}
void add_segment(irs::index_writer& writer, tests::doc_generator_base& gen) {
index_.emplace_back();
write_segment(writer, index_.back(), gen);
writer.commit();
}
void add_segments(
irs::index_writer& writer, std::vector<doc_generator_base::ptr>& gens
) {
for (auto& gen : gens) {
index_.emplace_back();
write_segment(writer, index_.back(), *gen);
}
writer.commit();
}
void add_segment(
tests::doc_generator_base& gen,
irs::OpenMode mode = irs::OM_CREATE,
const irs::index_writer::init_options& opts = {}
) {
auto writer = open_writer(mode, opts);
add_segment(*writer, gen);
}
private:
index_t index_;
std::shared_ptr<irs::directory> dir_;
irs::format::ptr codec_;
}; // index_test_base
NS_BEGIN(templates)
//////////////////////////////////////////////////////////////////////////////
/// @class token_stream_payload
/// @brief token stream wrapper which sets payload equal to term value
//////////////////////////////////////////////////////////////////////////////
class token_stream_payload: public irs::token_stream {
public:
explicit token_stream_payload(irs::token_stream* impl);
bool next();
const irs::attribute_view& attributes() const NOEXCEPT {
return impl_->attributes();
}
private:
const irs::term_attribute* term_;
irs::payload pay_;
irs::token_stream* impl_;
}; // token_stream_payload
//////////////////////////////////////////////////////////////////////////////
/// @class text_field
/// @brief field which uses text analyzer for tokenization and stemming
//////////////////////////////////////////////////////////////////////////////
template<typename T>
class text_field : public tests::field_base {
public:
text_field(
const irs::string_ref& name, bool payload = false
): token_stream_(irs::analysis::analyzers::get("text", irs::text_format::json, "{\"locale\":\"C\", \"stopwords\":[]}")) {
if (payload) {
if (!token_stream_->reset(value_)) {
throw irs::illegal_state();
}
pay_stream_.reset(new token_stream_payload(token_stream_.get()));
}
this->name(name);
}
text_field(
const irs::string_ref& name, const T& value, bool payload = false
): token_stream_(irs::analysis::analyzers::get("text", irs::text_format::json, "{\"locale\":\"C\", \"stopwords\":[]}")),
value_(value) {
if (payload) {
if (!token_stream_->reset(value_)) {
throw irs::illegal_state();
}
pay_stream_.reset(new token_stream_payload(token_stream_.get()));
}
this->name(name);
}
text_field(text_field&& other) NOEXCEPT
: pay_stream_(std::move(other.pay_stream_)),
token_stream_(std::move(other.token_stream_)),
value_(std::move(other.value_)) {
}
irs::string_ref value() const { return value_; }
void value(const T& value) { value_ = value; }
void value(T&& value) { value_ = std::move(value); }
const irs::flags& features() const {
static irs::flags features{
iresearch::frequency::type(), iresearch::position::type(),
iresearch::offset::type(), iresearch::payload::type()
};
return features;
}
irs::token_stream& get_tokens() const {
token_stream_->reset(value_);
return pay_stream_
? static_cast<irs::token_stream&>(*pay_stream_)
: *token_stream_;
}
private:
virtual bool write(irs::data_output&) const { return false; }
std::unique_ptr<token_stream_payload> pay_stream_;
irs::analysis::analyzer::ptr token_stream_;
T value_;
}; // text_field
//////////////////////////////////////////////////////////////////////////////
/// @class string field
/// @brief field which uses simple analyzer without tokenization
//////////////////////////////////////////////////////////////////////////////
class string_field : public tests::field_base {
public:
string_field(
const irs::string_ref& name,
const irs::flags& extra_features = irs::flags::empty_instance()
);
string_field(
const irs::string_ref& name,
const irs::string_ref& value,
const irs::flags& extra_features = irs::flags::empty_instance()
);
void value(const irs::string_ref& str);
irs::string_ref value() const { return value_; }
virtual const irs::flags& features() const override;
virtual irs::token_stream& get_tokens() const override;
virtual bool write(irs::data_output& out) const override;
private:
irs::flags features_;
mutable irs::string_token_stream stream_;
std::string value_;
}; // string_field
//////////////////////////////////////////////////////////////////////////////
/// @class string_ref field
/// @brief field which uses simple analyzer without tokenization
//////////////////////////////////////////////////////////////////////////////
class string_ref_field : public tests::field_base {
public:
string_ref_field(
const irs::string_ref& name,
const irs::flags& extra_features = irs::flags::empty_instance()
);
string_ref_field(
const irs::string_ref& name,
const irs::string_ref& value,
const irs::flags& extra_features = irs::flags::empty_instance()
);
void value(const irs::string_ref& str);
irs::string_ref value() const { return value_; }
virtual const irs::flags& features() const override;
virtual irs::token_stream& get_tokens() const override;
virtual bool write(irs::data_output& out) const override;
private:
irs::flags features_;
mutable irs::string_token_stream stream_;
irs::string_ref value_;
}; // string_field
//////////////////////////////////////////////////////////////////////////////
/// @class europarl_doc_template
/// @brief document template for europarl.subset.text
//////////////////////////////////////////////////////////////////////////////
class europarl_doc_template: public delim_doc_generator::doc_template {
public:
typedef templates::text_field<irs::string_ref> text_field;
virtual void init();
virtual void value(size_t idx, const std::string& value);
virtual void end();
virtual void reset();
private:
std::string title_; // current title
std::string body_; // current body
irs::doc_id_t idval_ = 0;
}; // europarl_doc_template
NS_END // templates
void generic_json_field_factory(
tests::document& doc,
const std::string& name,
const json_doc_generator::json_value& data
);
void payloaded_json_field_factory(
tests::document& doc,
const std::string& name,
const json_doc_generator::json_value& data
);
NS_END // tests
#endif // IRESEARCH_INDEX_TESTS_H