1
0
Fork 0
arangodb/3rdParty/iresearch/tests/index/assert_format.hpp

422 lines
11 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2016 by EMC Corporation, All Rights Reserved
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is EMC Corporation
///
/// @author Andrey Abramov
/// @author Vasiliy Nabatchikov
////////////////////////////////////////////////////////////////////////////////
#ifndef IRESEARCH_ASSERT_FORMAT_H
#define IRESEARCH_ASSERT_FORMAT_H
#include <set>
#include "doc_generator.hpp"
#include "index/field_meta.hpp"
#include "index/comparer.hpp"
#include "formats/formats.hpp"
namespace tests {
struct position {
position(uint32_t pos, uint32_t start,
uint32_t end, const irs::bytes_ref& pay);
bool operator<(const position& rhs) const {
return pos < rhs.pos;
}
uint32_t pos;
uint32_t start;
uint32_t end;
irs::bstring payload;
};
class posting {
public:
posting(irs::doc_id_t id);
posting(irs::doc_id_t id, std::set<position>&& positions)
: positions_(std::move(positions)), id_(id) {
}
posting(posting&& rhs) NOEXCEPT
: positions_(std::move(rhs.positions_)),
id_(rhs.id_) {
}
posting& operator=(posting&& rhs) NOEXCEPT {
if (this != &rhs) {
positions_ = std::move(rhs.positions_);
id_ = rhs.id_;
}
return *this;
}
void add(uint32_t pos, uint32_t offs_start, const irs::attribute_view& attrs);
bool operator<(const posting& rhs) const {
return id_ < rhs.id_;
}
const std::set<position>& positions() const { return positions_; }
irs::doc_id_t id() const { return id_; }
size_t size() const { return positions_.size(); }
private:
friend struct term;
std::set<position> positions_;
irs::doc_id_t id_;
};
struct term {
term(const irs::bytes_ref& data);
posting& add(irs::doc_id_t id);
bool operator<(const term& rhs) const;
uint64_t docs_count() const { return postings.size(); }
void sort(const std::map<irs::doc_id_t, irs::doc_id_t>& docs) {
std::set<posting> resorted_postings;
for (auto& posting : postings) {
resorted_postings.emplace(
docs.at(posting.id_),
std::move(const_cast<tests::posting&>(posting).positions_)
);
}
postings = std::move(resorted_postings);
}
std::set<posting> postings;
irs::bstring value;
};
class field : public irs::field_meta {
public:
field(
const irs::string_ref& name,
const irs::flags& features
);
field(field&& rhs) NOEXCEPT;
field& operator=(field&& rhs) NOEXCEPT;
term& add(const irs::bytes_ref& term);
term* find(const irs::bytes_ref& term);
size_t remove(const irs::bytes_ref& t);
void sort(const std::map<irs::doc_id_t, irs::doc_id_t>& docs) {
for (auto& term : terms) {
const_cast<tests::term&>(term).sort(docs);
}
}
std::set<term> terms;
std::unordered_set<irs::doc_id_t> docs;
uint32_t pos;
uint32_t offs;
};
class index_segment: irs::util::noncopyable {
public:
typedef std::map<irs::string_ref, field> field_map_t;
typedef field_map_t::const_iterator iterator;
index_segment();
index_segment(index_segment&& rhs) NOEXCEPT;
index_segment& operator=(index_segment&& rhs) NOEXCEPT;
size_t doc_count() const { return count_; }
size_t size() const { return fields_.size(); }
const irs::document_mask& doc_mask() const { return doc_mask_; }
const field_map_t& fields() const { return fields_; }
bool find(const irs::string_ref& name, const irs::bytes_ref& term) {
field* fld = find( name );
return fld && fld->find(term);
}
const field* find(size_t id) const {
return id_to_field_.at(id);
}
field* find(const irs::string_ref& name) {
auto it = fields_.find( name );
return it == fields_.end()?nullptr:&it->second;
}
const field* find(const irs::string_ref& name) const {
auto it = fields_.find( name );
return it == fields_.end()?nullptr:&it->second;
}
template<typename Iterator>
void add(Iterator begin, Iterator end, ifield::ptr sorted = nullptr) {
// reset field per-document state
for (auto it = begin; it != end; ++it) {
auto* field_data = find((*it).name());
if (!field_data) {
continue;
}
field_data->pos = 0;
field_data->offs = 0;
}
for (; begin != end; ++begin) {
add(*begin);
}
if (sorted) {
add_sorted(*sorted);
}
++count_;
}
void sort(const irs::comparer& comparator) {
if (sort_.empty()) {
return;
}
std::sort(
sort_.begin(), sort_.end(),
[&comparator](
const std::pair<irs::bstring, irs::doc_id_t>& lhs,
const std::pair<irs::bstring, irs::doc_id_t>& rhs) {
return comparator(lhs.first, rhs.first);
});
irs::doc_id_t new_doc_id = irs::doc_limits::min();
std::map<irs::doc_id_t, irs::doc_id_t> order;
for (auto& entry : sort_) {
order[entry.second] = new_doc_id++;
}
for (auto& field : fields_) {
field.second.sort(order);
}
}
void clear() {
fields_.clear();
count_ = 0;
}
private:
void add(const ifield& field);
void add_sorted(const ifield& field);
std::vector<std::pair<irs::bstring, irs::doc_id_t>> sort_;
std::vector<const field*> id_to_field_;
field_map_t fields_;
size_t count_;
irs::document_mask doc_mask_;
};
namespace detail {
class term_reader : public irs::term_reader {
public:
term_reader(const tests::field& data):
data_(data), min_(data_.terms.begin()->value), max_(data_.terms.rbegin()->value) {
}
virtual irs::seek_term_iterator::ptr iterator() const override;
virtual const irs::field_meta& meta() const override;
virtual size_t size() const override;
virtual uint64_t docs_count() const override;
virtual const irs::bytes_ref& (min)() const override;
virtual const irs::bytes_ref& (max)() const override;
virtual const irs::attribute_view& attributes() const NOEXCEPT override;
private:
const tests::field& data_;
irs::bytes_ref max_;
irs::bytes_ref min_;
};
} // detail
struct index_meta_writer: public irs::index_meta_writer {
virtual std::string filename(
const irs::index_meta& meta
) const override;
virtual bool prepare(
irs::directory& dir,
irs::index_meta& meta
) override;
virtual bool commit() override;
virtual void rollback() NOEXCEPT override;
};
struct index_meta_reader : public irs::index_meta_reader {
virtual bool last_segments_file(
const irs::directory& dir, std::string& out
) const override;
virtual void read(
const irs::directory& dir,
irs::index_meta& meta,
const irs::string_ref& filename = irs::string_ref::NIL
) override;
};
struct segment_meta_writer : public irs::segment_meta_writer {
virtual void write(
irs::directory& dir,
std::string& filename,
const irs::segment_meta& meta
) override;
};
struct segment_meta_reader : public irs::segment_meta_reader {
virtual void read(
const irs::directory& dir,
irs::segment_meta& meta,
const irs::string_ref& filename = irs::string_ref::NIL
) override;
};
class document_mask_writer: public irs::document_mask_writer {
public:
document_mask_writer(const index_segment& data);
virtual std::string filename(
const irs::segment_meta& meta
) const override;
void write(
irs::directory& dir,
const irs::segment_meta& meta,
const irs::document_mask& docs_mask
) override;
private:
const index_segment& data_;
};
class field_reader : public irs::field_reader {
public:
field_reader( const index_segment& data );
field_reader(field_reader&& other) NOEXCEPT;
virtual void prepare(const irs::directory& dir, const irs::segment_meta& meta, const irs::document_mask& mask) override;
virtual const irs::term_reader* field(const irs::string_ref& field) const override;
virtual irs::field_iterator::ptr iterator() const override;
virtual size_t size() const override;
const index_segment& data() const {
return data_;
}
private:
std::vector<irs::term_reader::ptr> readers_;
const index_segment& data_;
};
class field_writer : public irs::field_writer {
public:
field_writer(const index_segment& data, const irs::flags& features = irs::flags());
/* returns features which should be checked
* in "write" method */
irs::flags features() const { return features_; }
/* sets features which should be checked
* in "write" method */
void features(const irs::flags& features) { features_ = features; }
virtual void prepare(const irs::flush_state& state) override;
virtual void write(const std::string& name, irs::field_id norm, const irs::flags& expected_field, irs::term_iterator& actual_term) override;
virtual void end() override;
private:
field_reader readers_;
irs::flags features_;
};
class format : public irs::format {
public:
DECLARE_FORMAT_TYPE();
DECLARE_FACTORY();
format();
format(const index_segment& data);
virtual irs::index_meta_writer::ptr get_index_meta_writer() const override;
virtual irs::index_meta_reader::ptr get_index_meta_reader() const override;
virtual irs::segment_meta_writer::ptr get_segment_meta_writer() const override;
virtual irs::segment_meta_reader::ptr get_segment_meta_reader() const override;
virtual document_mask_writer::ptr get_document_mask_writer() const override;
virtual irs::document_mask_reader::ptr get_document_mask_reader() const override;
virtual irs::field_writer::ptr get_field_writer(bool volatile_attributes) const override;
virtual irs::field_reader::ptr get_field_reader() const override;
virtual irs::column_meta_writer::ptr get_column_meta_writer() const override;
virtual irs::column_meta_reader::ptr get_column_meta_reader() const override;
virtual irs::columnstore_writer::ptr get_columnstore_writer() const override;
virtual irs::columnstore_reader::ptr get_columnstore_reader() const override;
private:
static const index_segment DEFAULT_SEGMENT;
const index_segment& data_;
};
typedef std::vector<index_segment> index_t;
void assert_term(
const irs::term_iterator& expected_term,
const irs::term_iterator& actual_term,
const irs::flags& features);
void assert_terms_next(
const irs::term_reader& expected_term_reader,
const irs::term_reader& actual_term_reader,
const irs::flags& features);
void assert_terms_seek(
const irs::term_reader& expected_term_reader,
const irs::term_reader& actual_term_reader,
const irs::flags& features,
size_t lookahead = 10); // number of steps to iterate after the seek
void assert_index(
const index_t& expected_index,
const irs::index_reader& actual_index,
const irs::flags& features,
size_t skip = 0 // do not validate the first 'skip' segments
);
void assert_index(
const irs::directory& dir,
irs::format::ptr codec,
const index_t& index,
const irs::flags& features,
size_t skip = 0 // no not validate the first 'skip' segments
);
} // tests
#endif