mirror of https://gitee.com/bigwinds/arangodb
* update iresearch * update iresearch * fix compilation errors * update iresearch
This commit is contained in:
parent
73932a784b
commit
35e2ebc729
|
@ -695,7 +695,7 @@ install:
|
|||
- |
|
||||
ICU_DIR=${DEPS_DIR}/icu
|
||||
if [[ -z "$(ls -A ${ICU_DIR})" ]]; then
|
||||
ICU_URL="https://kent.dl.sourceforge.net/project/icu/ICU4C/57.1/icu4c-57_1-src.tgz"
|
||||
ICU_URL="https://netix.dl.sourceforge.net/project/icu/ICU4C/57.1/icu4c-57_1-src.tgz"
|
||||
cd ${DEPS_DIR}
|
||||
wget -P icu ${ICU_URL}
|
||||
cd ${ICU_DIR}
|
||||
|
@ -721,7 +721,8 @@ install:
|
|||
travis_retry git clone --depth 1 --recursive --quiet ${BOOST_URL} ${BOOST_DIR} || exit 1
|
||||
else
|
||||
#BOOST_URL="https://mirrors.netix.net/sourceforge/b/bo/boost/boost/${BOOST_VERSION}/boost_${BOOST_VERSION//\./_}.tar.gz"
|
||||
BOOST_URL="https://kent.dl.sourceforge.net/project/boost/boost/${BOOST_VERSION}/boost_${BOOST_VERSION//\./_}.tar.gz"
|
||||
#BOOST_URL="https://kent.dl.sourceforge.net/project/boost/boost/${BOOST_VERSION}/boost_${BOOST_VERSION//\./_}.tar.gz"
|
||||
BOOST_URL="https://netix.dl.sourceforge.net/project/boost/boost/${BOOST_VERSION}/boost_${BOOST_VERSION//\./_}.tar.gz"
|
||||
mkdir -p ${BOOST_DIR}
|
||||
{ travis_retry wget --quiet -O - ${BOOST_URL} | tar --strip-components=1 -xz -C ${BOOST_DIR}; } || exit 1
|
||||
fi
|
||||
|
|
|
@ -187,7 +187,7 @@ install:
|
|||
############################################################################
|
||||
- set ICU_DIR=%DEPS_DIR%\icu
|
||||
- mkdir %ICU_DIR% && cd %ICU_DIR%
|
||||
- set ICU_URL="https://iweb.dl.sourceforge.net/project/icu/ICU4C/57.1/icu4c-57_1-Win64-msvc10.zip"
|
||||
- set ICU_URL="https://ayera.dl.sourceforge.net/project/icu/ICU4C/57.1/icu4c-57_1-Win64-msvc10.zip"
|
||||
- appveyor DownloadFile %ICU_URL% -FileName icu4c-57_1-win64-msvc10.zip
|
||||
- 7z x icu4c-57_1-win64-msvc10.zip -o. > nul
|
||||
- set ICU_ROOT=%ICU_DIR%\icu
|
||||
|
|
|
@ -144,6 +144,8 @@ set(IResearch_core_sources
|
|||
./utils/encryption.cpp
|
||||
./utils/ctr_encryption.cpp
|
||||
./utils/compression.cpp
|
||||
./utils/delta_compression.cpp
|
||||
./utils/lz4compression.cpp
|
||||
./utils/directory_utils.cpp
|
||||
./utils/file_utils.cpp
|
||||
./utils/mmap_utils.cpp
|
||||
|
@ -220,6 +222,7 @@ set(IResearch_core_headers
|
|||
./utils/bit_utils.hpp
|
||||
./utils/block_pool.hpp
|
||||
./utils/compression.hpp
|
||||
./utils/lz4compression.hpp
|
||||
./utils/file_utils.hpp
|
||||
./utils/fst.hpp
|
||||
./utils/fst_decl.hpp
|
||||
|
|
|
@ -254,13 +254,5 @@ analyzer_registrar::analyzer_registrar(
|
|||
}
|
||||
}
|
||||
|
||||
analyzer_registrar::operator bool() const NOEXCEPT {
|
||||
return registered_;
|
||||
}
|
||||
|
||||
NS_END // NS_BEGIN(analysis)
|
||||
NS_END // analysis
|
||||
NS_END
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- END-OF-FILE
|
||||
// -----------------------------------------------------------------------------
|
||||
|
|
|
@ -58,7 +58,11 @@ class IRESEARCH_API analyzer_registrar {
|
|||
normalizer_f normalizer,
|
||||
const char* source = nullptr
|
||||
);
|
||||
operator bool() const NOEXCEPT;
|
||||
|
||||
operator bool() const NOEXCEPT {
|
||||
return registered_;
|
||||
}
|
||||
|
||||
private:
|
||||
bool registered_;
|
||||
};
|
||||
|
|
|
@ -80,7 +80,7 @@ file_not_found::file_not_found(
|
|||
error_ += ".";
|
||||
} else {
|
||||
error_ += ": ";
|
||||
error_ + filename.c_str();
|
||||
error_.append(filename.c_str(), filename.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "store/directory.hpp"
|
||||
|
||||
#include "index/index_meta.hpp"
|
||||
#include "index/column_info.hpp"
|
||||
#include "index/iterators.hpp"
|
||||
|
||||
#include "utils/io_utils.hpp"
|
||||
|
@ -247,7 +248,7 @@ struct IRESEARCH_API columnstore_writer {
|
|||
virtual ~columnstore_writer() = default;
|
||||
|
||||
virtual void prepare(directory& dir, const segment_meta& meta) = 0;
|
||||
virtual column_t push_column() = 0;
|
||||
virtual column_t push_column(const column_info& info) = 0;
|
||||
virtual void rollback() NOEXCEPT = 0;
|
||||
virtual bool commit() = 0; // @return was anything actually flushed
|
||||
}; // columnstore_writer
|
||||
|
|
|
@ -51,6 +51,7 @@
|
|||
#include "utils/bit_packing.hpp"
|
||||
#include "utils/bit_utils.hpp"
|
||||
#include "utils/bitset.hpp"
|
||||
#include "utils/lz4compression.hpp"
|
||||
#include "utils/encryption.hpp"
|
||||
#include "utils/compression.hpp"
|
||||
#include "utils/directory_utils.hpp"
|
||||
|
@ -141,9 +142,27 @@ NS_END
|
|||
|
||||
NS_LOCAL
|
||||
|
||||
irs::bytes_ref DUMMY; // placeholder for visiting logic in columnstore
|
||||
using namespace irs;
|
||||
|
||||
using namespace iresearch;
|
||||
bytes_ref DUMMY; // placeholder for visiting logic in columnstore
|
||||
|
||||
class noop_compressor final : compression::compressor {
|
||||
public:
|
||||
static compression::compressor::ptr make() {
|
||||
typedef compression::compressor::ptr ptr;
|
||||
static noop_compressor INSTANCE;
|
||||
return ptr(ptr(), &INSTANCE);
|
||||
}
|
||||
|
||||
virtual bytes_ref compress(byte_type* in, size_t size, bstring& /*buf*/) {
|
||||
return bytes_ref(in, size);
|
||||
}
|
||||
|
||||
virtual void flush(data_output& /*out*/) { }
|
||||
|
||||
private:
|
||||
noop_compressor() = default;
|
||||
}; // noop_compressor
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// --SECTION-- features
|
||||
|
@ -2833,47 +2852,61 @@ bool meta_reader::read(column_meta& column) {
|
|||
// |Compressed block #1|
|
||||
// |Compressed block #2|
|
||||
// ...
|
||||
// |Bloom Filter| <- not implemented yet
|
||||
// |Last block #0 key|Block #0 offset|
|
||||
// |Last block #1 key|Block #1 offset| <-- Columnstore blocks index
|
||||
// |Last block #2 key|Block #2 offset|
|
||||
// ...
|
||||
// |Bloom filter offset| <- not implemented yet
|
||||
// |Footer|
|
||||
|
||||
const uint32_t INDEX_BLOCK_SIZE = 1024;
|
||||
const size_t MAX_DATA_BLOCK_SIZE = 8192;
|
||||
|
||||
// By default we treat columns as a variable length sparse columns
|
||||
/// @brief Column flags
|
||||
/// @note by default we treat columns as a variable length sparse columns
|
||||
enum ColumnProperty : uint32_t {
|
||||
CP_SPARSE = 0,
|
||||
CP_DENSE = 1, // keys can be presented as an array indices
|
||||
CP_FIXED = 2, // fixed length colums
|
||||
CP_MASK = 4, // column contains no data
|
||||
CP_DENSE = 1, // keys can be presented as an array indices
|
||||
CP_FIXED = 1 << 1, // fixed length colums
|
||||
CP_MASK = 1 << 2, // column contains no data
|
||||
CP_COLUMN_DENSE = 1 << 3, // column index is dense
|
||||
CP_COLUMN_ENCRYPT = 1 << 4 // column contains encrypted data
|
||||
}; // ColumnProperty
|
||||
|
||||
ENABLE_BITMASK_ENUM(ColumnProperty);
|
||||
|
||||
bool is_good_compression_ratio(size_t raw_size, size_t compressed_size) NOEXCEPT {
|
||||
// check if compressed is less than 12.5%
|
||||
return compressed_size < raw_size - (raw_size / 8U);
|
||||
}
|
||||
|
||||
ColumnProperty write_compact(
|
||||
irs::index_output& out,
|
||||
irs::compressor& compressor,
|
||||
const irs::bytes_ref& data) {
|
||||
index_output& out,
|
||||
bstring& encode_buf,
|
||||
encryption::stream* cipher,
|
||||
compression::compressor& compressor,
|
||||
bstring& data) {
|
||||
if (data.empty()) {
|
||||
out.write_byte(0); // zig_zag_encode32(0) == 0
|
||||
return CP_MASK;
|
||||
}
|
||||
|
||||
// compressor can only handle size of int32_t, so can use the negative flag as a compression flag
|
||||
compressor.compress(reinterpret_cast<const char*>(data.c_str()), data.size());
|
||||
const bytes_ref compressed = compressor.compress(&data[0], data.size(), encode_buf);
|
||||
|
||||
if (compressor.size() < data.size()) {
|
||||
assert(compressor.size() <= irs::integer_traits<int32_t>::const_max);
|
||||
irs::write_zvint(out, int32_t(compressor.size())); // compressed size
|
||||
out.write_bytes(compressor.c_str(), compressor.size());
|
||||
if (is_good_compression_ratio(data.size(), compressed.size())) {
|
||||
assert(compressed.size() <= irs::integer_traits<int32_t>::const_max);
|
||||
irs::write_zvint(out, int32_t(compressed.size())); // compressed size
|
||||
if (cipher) {
|
||||
cipher->encrypt(out.file_pointer(), const_cast<irs::byte_type*>(compressed.c_str()), compressed.size());
|
||||
}
|
||||
out.write_bytes(compressed.c_str(), compressed.size());
|
||||
irs::write_zvlong(out, data.size() - MAX_DATA_BLOCK_SIZE); // original size
|
||||
} else {
|
||||
assert(data.size() <= irs::integer_traits<int32_t>::const_max);
|
||||
irs::write_zvint(out, int32_t(0) - int32_t(data.size())); // -ve to mark uncompressed
|
||||
if (cipher) {
|
||||
cipher->encrypt(out.file_pointer(), const_cast<irs::byte_type*>(data.c_str()), data.size());
|
||||
}
|
||||
out.write_bytes(data.c_str(), data.size());
|
||||
}
|
||||
|
||||
|
@ -2882,7 +2915,8 @@ ColumnProperty write_compact(
|
|||
|
||||
void read_compact(
|
||||
irs::index_input& in,
|
||||
const irs::decompressor& decompressor,
|
||||
irs::encryption::stream* cipher,
|
||||
irs::compression::decompressor* decompressor,
|
||||
irs::bstring& encode_buf,
|
||||
irs::bstring& decode_buf) {
|
||||
const auto size = irs::read_zvint(in);
|
||||
|
@ -2904,9 +2938,21 @@ void read_compact(
|
|||
#else
|
||||
in.read_bytes(&(decode_buf[0]), buf_size);
|
||||
#endif // IRESEARCH_DEBUG
|
||||
|
||||
if (cipher) {
|
||||
cipher->decrypt(in.file_pointer() - buf_size, &(decode_buf[0]), buf_size);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (IRS_UNLIKELY(!decompressor)) {
|
||||
throw irs::index_error(string_utils::to_string(
|
||||
"while reading compact, error: can't decompress block of size %d for whithout decompressor",
|
||||
size
|
||||
));
|
||||
}
|
||||
|
||||
irs::string_utils::oversize(encode_buf, buf_size);
|
||||
|
||||
#ifdef IRESEARCH_DEBUG
|
||||
|
@ -2917,21 +2963,20 @@ void read_compact(
|
|||
in.read_bytes(&(encode_buf[0]), buf_size);
|
||||
#endif // IRESEARCH_DEBUG
|
||||
|
||||
if (cipher) {
|
||||
cipher->decrypt(in.file_pointer() - buf_size, &(encode_buf[0]), buf_size);
|
||||
}
|
||||
|
||||
// ensure that we have enough space to store decompressed data
|
||||
decode_buf.resize(irs::read_zvlong(in) + MAX_DATA_BLOCK_SIZE);
|
||||
|
||||
buf_size = decompressor.deflate(
|
||||
reinterpret_cast<const char*>(encode_buf.c_str()),
|
||||
buf_size,
|
||||
reinterpret_cast<char*>(&decode_buf[0]),
|
||||
decode_buf.size()
|
||||
const auto decoded = decompressor->decompress(
|
||||
&encode_buf[0], buf_size,
|
||||
&decode_buf[0], decode_buf.size()
|
||||
);
|
||||
|
||||
if (!irs::type_limits<irs::type_t::address_t>::valid(buf_size)) {
|
||||
throw irs::index_error(string_utils::to_string(
|
||||
"while reading compact, error: invalid buffer size '" IR_SIZE_T_SPECIFIER "'",
|
||||
buf_size
|
||||
));
|
||||
if (decoded.null()) {
|
||||
throw irs::index_error("error while reading compact");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3013,6 +3058,7 @@ class index_block {
|
|||
const auto block_size = math::ceil32(size, packed::BLOCK_SIZE_32);
|
||||
assert(block_size >= size);
|
||||
|
||||
assert(std::is_sorted(keys_, key_));
|
||||
const auto stats = encode::avg::encode(keys_, key_);
|
||||
const auto bits = encode::avg::write_block(
|
||||
out, stats.first, stats.second,
|
||||
|
@ -3031,6 +3077,7 @@ class index_block {
|
|||
const auto block_size = math::ceil64(size, packed::BLOCK_SIZE_64);
|
||||
assert(block_size >= size);
|
||||
|
||||
assert(std::is_sorted(offsets_, offset_));
|
||||
const auto stats = encode::avg::encode(offsets_, offset_);
|
||||
const auto bits = encode::avg::write_block(
|
||||
out, stats.first, stats.second,
|
||||
|
@ -3068,22 +3115,41 @@ class index_block {
|
|||
class writer final : public irs::columnstore_writer {
|
||||
public:
|
||||
static const int32_t FORMAT_MIN = 0;
|
||||
static const int32_t FORMAT_MAX = FORMAT_MIN;
|
||||
static const int32_t FORMAT_MAX = 1;
|
||||
|
||||
static const string_ref FORMAT_NAME;
|
||||
static const string_ref FORMAT_EXT;
|
||||
|
||||
explicit writer(int32_t version) NOEXCEPT
|
||||
: buf_(2*MAX_DATA_BLOCK_SIZE, 0),
|
||||
version_(version) {
|
||||
static_assert(
|
||||
2*MAX_DATA_BLOCK_SIZE >= INDEX_BLOCK_SIZE*sizeof(uint64_t),
|
||||
"buffer is not big enough"
|
||||
);
|
||||
|
||||
assert(version >= FORMAT_MIN && version <= FORMAT_MAX);
|
||||
}
|
||||
|
||||
virtual void prepare(directory& dir, const segment_meta& meta) override;
|
||||
virtual column_t push_column() override;
|
||||
virtual column_t push_column(const column_info& info) override;
|
||||
virtual bool commit() override;
|
||||
virtual void rollback() NOEXCEPT override;
|
||||
|
||||
private:
|
||||
class column final : public irs::columnstore_writer::column_output {
|
||||
public:
|
||||
explicit column(writer& ctx)
|
||||
explicit column(writer& ctx, const compression::type_id& type,
|
||||
const compression::compressor::ptr& compressor,
|
||||
encryption::stream* cipher)
|
||||
: ctx_(&ctx),
|
||||
blocks_index_(*ctx.alloc_) {
|
||||
comp_type_(type),
|
||||
comp_(compressor),
|
||||
cipher_(cipher),
|
||||
blocks_index_(*ctx.alloc_),
|
||||
block_buf_(2*MAX_DATA_BLOCK_SIZE, 0) {
|
||||
assert(comp_); // ensured by `push_column'
|
||||
block_buf_.clear(); // reset size to '0'
|
||||
}
|
||||
|
||||
void prepare(doc_id_t key) {
|
||||
|
@ -3109,7 +3175,17 @@ class writer final : public irs::columnstore_writer {
|
|||
|
||||
void finish() {
|
||||
auto& out = *ctx_->data_out_;
|
||||
write_enum(out, ColumnProperty(((column_props_ & CP_DENSE) << 3) | blocks_props_)); // column properties
|
||||
|
||||
// evaluate overall column properties
|
||||
auto column_props = blocks_props_;
|
||||
if (0 != (column_props_ & CP_DENSE)) { column_props |= CP_COLUMN_DENSE; }
|
||||
if (cipher_) { column_props |= CP_COLUMN_ENCRYPT; }
|
||||
|
||||
write_enum(out, column_props);
|
||||
if (ctx_->version_ > FORMAT_MIN) {
|
||||
write_string(out, comp_type_->name());
|
||||
comp_->flush(out); // flush compression dependent data
|
||||
}
|
||||
out.write_vint(block_index_.total()); // total number of items
|
||||
out.write_vint(max_); // max column key
|
||||
out.write_vint(avg_block_size_); // avg data block size
|
||||
|
@ -3128,7 +3204,9 @@ class writer final : public irs::columnstore_writer {
|
|||
flush_block();
|
||||
|
||||
// finish column blocks index
|
||||
column_index_.flush(blocks_index_.stream, ctx_->buf_);
|
||||
assert(ctx_->buf_.size() >= INDEX_BLOCK_SIZE*sizeof(uint64_t));
|
||||
auto* buf = reinterpret_cast<uint64_t*>(&ctx_->buf_[0]);
|
||||
column_index_.flush(blocks_index_.stream, buf);
|
||||
blocks_index_.stream.flush();
|
||||
}
|
||||
|
||||
|
@ -3137,11 +3215,11 @@ class writer final : public irs::columnstore_writer {
|
|||
}
|
||||
|
||||
virtual void write_byte(byte_type b) override {
|
||||
block_buf_.write_byte(b);
|
||||
block_buf_ += b;
|
||||
}
|
||||
|
||||
virtual void write_bytes(const byte_type* b, size_t size) override {
|
||||
block_buf_.write_bytes(b, size);
|
||||
block_buf_.append(b, size);
|
||||
}
|
||||
|
||||
virtual void reset() override {
|
||||
|
@ -3151,7 +3229,7 @@ class writer final : public irs::columnstore_writer {
|
|||
}
|
||||
|
||||
// reset to previous offset
|
||||
block_buf_.reset(block_index_.max_offset());
|
||||
block_buf_.resize(block_index_.max_offset());
|
||||
block_index_.pop_back();
|
||||
}
|
||||
|
||||
|
@ -3172,11 +3250,13 @@ class writer final : public irs::columnstore_writer {
|
|||
max_ = block_index_.max_key();
|
||||
|
||||
auto& out = *ctx_->data_out_;
|
||||
auto* buf = ctx_->buf_;
|
||||
|
||||
// write first block key & where block starts
|
||||
column_index_.push_back(block_index_.min_key(), out.file_pointer());
|
||||
|
||||
assert(ctx_->buf_.size() >= INDEX_BLOCK_SIZE*sizeof(uint64_t));
|
||||
auto* buf = reinterpret_cast<uint64_t*>(&ctx_->buf_[0]);
|
||||
|
||||
if (column_index_.full()) {
|
||||
column_index_.flush(blocks_index_.stream, buf);
|
||||
}
|
||||
|
@ -3192,13 +3272,14 @@ class writer final : public irs::columnstore_writer {
|
|||
// const auto res = expr0() | expr1();
|
||||
// otherwise it would violate format layout
|
||||
auto block_props = block_index_.flush(out, buf);
|
||||
block_props |= write_compact(out, ctx_->comp_, static_cast<bytes_ref>(block_buf_));
|
||||
block_props |= write_compact(out, ctx_->buf_, cipher_, *comp_, block_buf_);
|
||||
|
||||
length_ += block_buf_.size();
|
||||
|
||||
// refresh blocks properties
|
||||
blocks_props_ &= block_props;
|
||||
// reset buffer stream after flush
|
||||
block_buf_.reset();
|
||||
block_buf_.clear();
|
||||
|
||||
// refresh column properties
|
||||
// column is dense IFF
|
||||
|
@ -3208,11 +3289,14 @@ class writer final : public irs::columnstore_writer {
|
|||
}
|
||||
|
||||
writer* ctx_; // writer context
|
||||
const compression::type_id* comp_type_;
|
||||
compression::compressor::ptr comp_; // compressor used for column
|
||||
encryption::stream* cipher_;
|
||||
uint64_t length_{}; // size of all data blocks in the column
|
||||
index_block<INDEX_BLOCK_SIZE> block_index_; // current block index (per document key/offset)
|
||||
index_block<INDEX_BLOCK_SIZE> column_index_; // column block index (per block key/offset)
|
||||
memory_output blocks_index_; // blocks index
|
||||
bytes_output block_buf_{ 2*MAX_DATA_BLOCK_SIZE }; // data buffer
|
||||
bstring block_buf_; // data buffer
|
||||
doc_id_t max_{ doc_limits::invalid() }; // max key (among flushed blocks)
|
||||
ColumnProperty blocks_props_{ CP_DENSE | CP_FIXED | CP_MASK }; // aggregated column blocks properties
|
||||
ColumnProperty column_props_{ CP_DENSE }; // aggregated column block index properties
|
||||
|
@ -3221,18 +3305,17 @@ class writer final : public irs::columnstore_writer {
|
|||
}; // column
|
||||
|
||||
memory_allocator* alloc_{ &memory_allocator::global() };
|
||||
uint64_t buf_[INDEX_BLOCK_SIZE]; // reusable temporary buffer for packing
|
||||
std::deque<column> columns_; // pointers remain valid
|
||||
compressor comp_{ 2*MAX_DATA_BLOCK_SIZE };
|
||||
bstring buf_; // reusable temporary buffer for packing/compression
|
||||
index_output::ptr data_out_;
|
||||
std::string filename_;
|
||||
directory* dir_;
|
||||
encryption::stream::ptr data_out_cipher_;
|
||||
int32_t version_;
|
||||
}; // writer
|
||||
|
||||
template<>
|
||||
std::string file_name<columnstore_writer, segment_meta>(
|
||||
const segment_meta& meta
|
||||
) {
|
||||
std::string file_name<columnstore_writer, segment_meta>(const segment_meta& meta) {
|
||||
return file_name(meta.name, columns::writer::FORMAT_EXT);
|
||||
};
|
||||
|
||||
|
@ -3255,19 +3338,50 @@ void writer::prepare(directory& dir, const segment_meta& meta) {
|
|||
));
|
||||
}
|
||||
|
||||
format_utils::write_header(*data_out, FORMAT_NAME, FORMAT_MAX);
|
||||
format_utils::write_header(*data_out, FORMAT_NAME, version_);
|
||||
|
||||
encryption::stream::ptr data_out_cipher;
|
||||
|
||||
if (version_ > FORMAT_MIN) {
|
||||
bstring enc_header;
|
||||
auto* enc = get_encryption(dir.attributes());
|
||||
|
||||
const auto encrypt = irs::encrypt(filename, *data_out, enc, enc_header, data_out_cipher);
|
||||
assert(!encrypt || (data_out_cipher && data_out_cipher->block_size()));
|
||||
UNUSED(encrypt);
|
||||
}
|
||||
|
||||
alloc_ = &directory_utils::get_allocator(dir);
|
||||
|
||||
// noexcept block
|
||||
dir_ = &dir;
|
||||
data_out_ = std::move(data_out);
|
||||
data_out_cipher_ = std::move(data_out_cipher);
|
||||
filename_ = std::move(filename);
|
||||
}
|
||||
|
||||
columnstore_writer::column_t writer::push_column() {
|
||||
columnstore_writer::column_t writer::push_column(const column_info& info) {
|
||||
encryption::stream* cipher;
|
||||
const compression::type_id* compression;
|
||||
|
||||
if (version_ > FORMAT_MIN) {
|
||||
compression = info.compression();
|
||||
cipher = info.encryption() ? data_out_cipher_.get() : nullptr;
|
||||
} else {
|
||||
// we don't support encryption and custom
|
||||
// compression for 'FORMAT_MIN' version
|
||||
compression = compression::lz4::type();
|
||||
cipher = nullptr;
|
||||
}
|
||||
|
||||
auto compressor = compression::get_compressor(*compression, info.options());
|
||||
|
||||
if (!compressor) {
|
||||
compressor = noop_compressor::make();
|
||||
}
|
||||
|
||||
const auto id = columns_.size();
|
||||
columns_.emplace_back(*this);
|
||||
columns_.emplace_back(*this, info.compression(), compressor, cipher);
|
||||
auto& column = columns_.back();
|
||||
|
||||
return std::make_pair(id, [&column] (doc_id_t doc) -> column_output& {
|
||||
|
@ -3445,7 +3559,10 @@ class sparse_block : util::noncopyable {
|
|||
const bstring* data_{};
|
||||
}; // iterator
|
||||
|
||||
void load(index_input& in, decompressor& decomp, bstring& buf) {
|
||||
void load(index_input& in,
|
||||
compression::decompressor* decomp,
|
||||
encryption::stream* cipher,
|
||||
bstring& buf) {
|
||||
const uint32_t size = in.read_vint(); // total number of entries in a block
|
||||
|
||||
if (!size) {
|
||||
|
@ -3471,7 +3588,7 @@ class sparse_block : util::noncopyable {
|
|||
});
|
||||
|
||||
// read data
|
||||
read_compact(in, decomp, buf, data_);
|
||||
read_compact(in, cipher, decomp, buf, data_);
|
||||
end_ = index_ + size;
|
||||
}
|
||||
|
||||
|
@ -3628,7 +3745,10 @@ class dense_block : util::noncopyable {
|
|||
doc_id_t base_{};
|
||||
}; // iterator
|
||||
|
||||
void load(index_input& in, decompressor& decomp, bstring& buf) {
|
||||
void load(index_input& in,
|
||||
compression::decompressor* decomp,
|
||||
encryption::stream* cipher,
|
||||
bstring& buf) {
|
||||
const uint32_t size = in.read_vint(); // total number of entries in a block
|
||||
|
||||
if (!size) {
|
||||
|
@ -3655,7 +3775,7 @@ class dense_block : util::noncopyable {
|
|||
});
|
||||
|
||||
// read data
|
||||
read_compact(in, decomp, buf, data_);
|
||||
read_compact(in, cipher, decomp, buf, data_);
|
||||
end_ = index_ + size;
|
||||
}
|
||||
|
||||
|
@ -3805,7 +3925,10 @@ class dense_fixed_offset_block : util::noncopyable {
|
|||
doc_id_t value_back_{}; // last valid doc id
|
||||
}; // iterator
|
||||
|
||||
void load(index_input& in, decompressor& decomp, bstring& buf) {
|
||||
void load(index_input& in,
|
||||
compression::decompressor* decomp,
|
||||
encryption::stream* cipher,
|
||||
bstring& buf) {
|
||||
size_ = in.read_vint(); // total number of entries in a block
|
||||
|
||||
if (!size_) {
|
||||
|
@ -3830,7 +3953,7 @@ class dense_fixed_offset_block : util::noncopyable {
|
|||
}
|
||||
|
||||
// read data
|
||||
read_compact(in, decomp, buf, data_);
|
||||
read_compact(in, cipher, decomp, buf, data_);
|
||||
}
|
||||
|
||||
bool value(doc_id_t key, bytes_ref& out) const {
|
||||
|
@ -3945,7 +4068,10 @@ class sparse_mask_block : util::noncopyable {
|
|||
);
|
||||
}
|
||||
|
||||
void load(index_input& in, decompressor& /*decomp*/, bstring& buf) {
|
||||
void load(index_input& in,
|
||||
compression::decompressor* /*decomp*/,
|
||||
encryption::stream* /*cipher*/,
|
||||
bstring& buf) {
|
||||
size_ = in.read_vint(); // total number of entries in a block
|
||||
|
||||
if (!size_) {
|
||||
|
@ -4062,7 +4188,10 @@ class dense_mask_block {
|
|||
max_(doc_limits::invalid()) {
|
||||
}
|
||||
|
||||
void load(index_input& in, decompressor& /*decomp*/, bstring& /*buf*/) {
|
||||
void load(index_input& in,
|
||||
compression::decompressor* /*decomp*/,
|
||||
encryption::stream* /*cipher*/,
|
||||
bstring& /*buf*/) {
|
||||
const auto size = in.read_vint(); // total number of entries in a block
|
||||
|
||||
if (!size) {
|
||||
|
@ -4115,7 +4244,7 @@ class read_context
|
|||
public:
|
||||
DECLARE_SHARED_PTR(read_context);
|
||||
|
||||
static ptr make(const index_input& stream) {
|
||||
static ptr make(const index_input& stream, encryption::stream* cipher) {
|
||||
auto clone = stream.reopen(); // reopen thead-safe stream
|
||||
|
||||
if (!clone) {
|
||||
|
@ -4125,28 +4254,32 @@ class read_context
|
|||
throw io_error("Failed to reopen columnstore input in");
|
||||
}
|
||||
|
||||
return memory::make_shared<read_context>(std::move(clone));
|
||||
return memory::make_shared<read_context>(std::move(clone), cipher);
|
||||
}
|
||||
|
||||
read_context(index_input::ptr&& in = index_input::ptr(), const Allocator& alloc = Allocator())
|
||||
read_context(
|
||||
index_input::ptr&& in,
|
||||
encryption::stream* cipher,
|
||||
const Allocator& alloc = Allocator())
|
||||
: block_cache_traits<sparse_block, Allocator>::cache_t(typename block_cache_traits<sparse_block, Allocator>::allocator_t(alloc)),
|
||||
block_cache_traits<dense_block, Allocator>::cache_t(typename block_cache_traits<dense_block, Allocator>::allocator_t(alloc)),
|
||||
block_cache_traits<dense_fixed_offset_block, Allocator>::cache_t(typename block_cache_traits<dense_fixed_offset_block, Allocator>::allocator_t(alloc)),
|
||||
block_cache_traits<sparse_mask_block, Allocator>::cache_t(typename block_cache_traits<sparse_mask_block, Allocator>::allocator_t(alloc)),
|
||||
block_cache_traits<dense_mask_block, Allocator>::cache_t(typename block_cache_traits<dense_mask_block, Allocator>::allocator_t(alloc)),
|
||||
buf_(INDEX_BLOCK_SIZE*sizeof(uint32_t), 0),
|
||||
stream_(std::move(in)) {
|
||||
stream_(std::move(in)),
|
||||
cipher_(cipher) {
|
||||
}
|
||||
|
||||
template<typename Block, typename... Args>
|
||||
Block& emplace_back(uint64_t offset, Args&&... args) {
|
||||
Block& emplace_back(uint64_t offset, compression::decompressor* decomp, bool decrypt, Args&&... args) {
|
||||
typename block_cache_traits<Block, Allocator>::cache_t& cache = *this;
|
||||
|
||||
// add cache entry
|
||||
auto& block = cache.emplace_back(std::forward<Args>(args)...);
|
||||
|
||||
try {
|
||||
load(block, offset);
|
||||
load(block, decomp, decrypt, offset);
|
||||
} catch (...) {
|
||||
// failed to load block
|
||||
pop_back<Block>();
|
||||
|
@ -4158,9 +4291,9 @@ class read_context
|
|||
}
|
||||
|
||||
template<typename Block>
|
||||
void load(Block& block, uint64_t offset) {
|
||||
void load(Block& block, compression::decompressor* decomp, bool decrypt, uint64_t offset) {
|
||||
stream_->seek(offset); // seek to the offset
|
||||
block.load(*stream_, decomp_, buf_);
|
||||
block.load(*stream_, decomp, decrypt ? cipher_ : nullptr, buf_);
|
||||
}
|
||||
|
||||
template<typename Block>
|
||||
|
@ -4170,9 +4303,9 @@ class read_context
|
|||
}
|
||||
|
||||
private:
|
||||
decompressor decomp_; // decompressor
|
||||
bstring buf_; // temporary buffer for decoding/unpacking
|
||||
index_input::ptr stream_;
|
||||
encryption::stream* cipher_; // options cipher stream
|
||||
}; // read_context
|
||||
|
||||
typedef read_context<> read_context_t;
|
||||
|
@ -4183,16 +4316,20 @@ class context_provider: private util::noncopyable {
|
|||
: pool_(std::max(size_t(1), max_pool_size)) {
|
||||
}
|
||||
|
||||
void prepare(index_input::ptr&& stream) NOEXCEPT {
|
||||
void prepare(index_input::ptr&& stream, encryption::stream::ptr&& cipher) NOEXCEPT {
|
||||
assert(stream);
|
||||
|
||||
stream_ = std::move(stream);
|
||||
cipher_ = std::move(cipher);
|
||||
}
|
||||
|
||||
bounded_object_pool<read_context_t>::ptr get_context() const {
|
||||
return pool_.emplace(*stream_);
|
||||
return pool_.emplace(*stream_, cipher_.get());
|
||||
}
|
||||
|
||||
private:
|
||||
mutable bounded_object_pool<read_context_t> pool_;
|
||||
encryption::stream::ptr cipher_;
|
||||
index_input::ptr stream_;
|
||||
}; // context_provider
|
||||
|
||||
|
@ -4201,6 +4338,8 @@ class context_provider: private util::noncopyable {
|
|||
template<typename BlockRef>
|
||||
const typename BlockRef::block_t& load_block(
|
||||
const context_provider& ctxs,
|
||||
compression::decompressor* decomp,
|
||||
bool decrypt,
|
||||
BlockRef& ref) {
|
||||
typedef typename BlockRef::block_t block_t;
|
||||
|
||||
|
@ -4211,7 +4350,7 @@ const typename BlockRef::block_t& load_block(
|
|||
assert(ctx);
|
||||
|
||||
// load block
|
||||
const auto& block = ctx->template emplace_back<block_t>(ref.offset);
|
||||
const auto& block = ctx->template emplace_back<block_t>(ref.offset, decomp, decrypt);
|
||||
|
||||
// mark block as loaded
|
||||
if (ref.pblock.compare_exchange_strong(cached, &block)) {
|
||||
|
@ -4232,6 +4371,8 @@ const typename BlockRef::block_t& load_block(
|
|||
template<typename BlockRef>
|
||||
const typename BlockRef::block_t& load_block(
|
||||
const context_provider& ctxs,
|
||||
compression::decompressor* decomp,
|
||||
bool decrypt,
|
||||
const BlockRef& ref,
|
||||
typename BlockRef::block_t& block) {
|
||||
const auto* cached = ref.pblock.load();
|
||||
|
@ -4240,7 +4381,7 @@ const typename BlockRef::block_t& load_block(
|
|||
auto ctx = ctxs.get_context();
|
||||
assert(ctx);
|
||||
|
||||
ctx->load(block, ref.offset);
|
||||
ctx->load(block, decomp, decrypt, ref.offset);
|
||||
|
||||
cached = █
|
||||
}
|
||||
|
@ -4258,12 +4399,13 @@ class column
|
|||
DECLARE_UNIQUE_PTR(column);
|
||||
|
||||
explicit column(ColumnProperty props) NOEXCEPT
|
||||
: props_(props) {
|
||||
: props_(props),
|
||||
encrypted_(0 != (props & CP_COLUMN_ENCRYPT)) {
|
||||
}
|
||||
|
||||
virtual ~column() { }
|
||||
virtual ~column() = default;
|
||||
|
||||
virtual void read(data_input& in, uint64_t* /*buf*/) {
|
||||
virtual void read(data_input& in, uint64_t* /*buf*/, compression::decompressor::ptr decomp) {
|
||||
count_ = in.read_vint();
|
||||
max_ = in.read_vint();
|
||||
avg_block_size_ = in.read_vint();
|
||||
|
@ -4271,25 +4413,30 @@ class column
|
|||
if (!avg_block_count_) {
|
||||
avg_block_count_ = count_;
|
||||
}
|
||||
decomp_ = decomp;
|
||||
}
|
||||
|
||||
bool encrypted() const NOEXCEPT { return encrypted_; }
|
||||
doc_id_t max() const NOEXCEPT { return max_; }
|
||||
virtual size_t size() const NOEXCEPT override { return count_; }
|
||||
bool empty() const NOEXCEPT { return 0 == size(); }
|
||||
uint32_t avg_block_size() const NOEXCEPT { return avg_block_size_; }
|
||||
uint32_t avg_block_count() const NOEXCEPT { return avg_block_count_; }
|
||||
ColumnProperty props() const NOEXCEPT { return props_; }
|
||||
compression::decompressor* decompressor() const NOEXCEPT { return decomp_.get(); }
|
||||
|
||||
protected:
|
||||
// same as size() but returns uint32_t to avoid type convertions
|
||||
uint32_t count() const NOEXCEPT { return count_; }
|
||||
|
||||
private:
|
||||
compression::decompressor::ptr decomp_;
|
||||
doc_id_t max_{ doc_limits::eof() };
|
||||
uint32_t count_{};
|
||||
uint32_t avg_block_size_{};
|
||||
uint32_t avg_block_count_{};
|
||||
ColumnProperty props_{ CP_SPARSE };
|
||||
bool encrypted_{ false }; // cached encryption mark
|
||||
}; // column
|
||||
|
||||
template<typename Column>
|
||||
|
@ -4363,7 +4510,7 @@ class column_iterator final: public irs::doc_iterator {
|
|||
}
|
||||
|
||||
try {
|
||||
const auto& cached = load_block(*column_->ctxs_, *begin_);
|
||||
const auto& cached = load_block(*column_->ctxs_, column_->decompressor(), column_->encrypted(), *begin_);
|
||||
|
||||
if (block_ != cached) {
|
||||
block_.reset(cached, payload_);
|
||||
|
@ -4425,8 +4572,8 @@ class sparse_column final : public column {
|
|||
: column(props), ctxs_(&ctxs) {
|
||||
}
|
||||
|
||||
virtual void read(data_input& in, uint64_t* buf) override {
|
||||
column::read(in, buf); // read common header
|
||||
virtual void read(data_input& in, uint64_t* buf, compression::decompressor::ptr decomp) override {
|
||||
column::read(in, buf, decomp); // read common header
|
||||
|
||||
uint32_t blocks_count = in.read_vint(); // total number of column index blocks
|
||||
|
||||
|
@ -4496,7 +4643,7 @@ class sparse_column final : public column {
|
|||
return false;
|
||||
}
|
||||
|
||||
const auto& cached = load_block(*ctxs_, *it);
|
||||
const auto& cached = load_block(*ctxs_, decompressor(), encrypted(), *it);
|
||||
|
||||
return cached.value(key, value);
|
||||
};
|
||||
|
@ -4506,7 +4653,7 @@ class sparse_column final : public column {
|
|||
) const override {
|
||||
block_t block; // don't cache new blocks
|
||||
for (auto begin = refs_.begin(), end = refs_.end()-1; begin != end; ++begin) { // -1 for upper bound
|
||||
const auto& cached = load_block(*ctxs_, *begin, block);
|
||||
const auto& cached = load_block(*ctxs_, decompressor(), encrypted(), *begin, block);
|
||||
|
||||
if (!cached.visit(visitor)) {
|
||||
return false;
|
||||
|
@ -4617,8 +4764,8 @@ class dense_fixed_offset_column final : public column {
|
|||
: column(prop), ctxs_(&ctxs) {
|
||||
}
|
||||
|
||||
virtual void read(data_input& in, uint64_t* buf) override {
|
||||
column::read(in, buf); // read common header
|
||||
virtual void read(data_input& in, uint64_t* buf, compression::decompressor::ptr decomp) override {
|
||||
column::read(in, buf, decomp); // read common header
|
||||
|
||||
size_t blocks_count = in.read_vint(); // total number of column index blocks
|
||||
|
||||
|
@ -4677,17 +4824,15 @@ class dense_fixed_offset_column final : public column {
|
|||
|
||||
auto& ref = const_cast<block_ref&>(refs_[block_idx]);
|
||||
|
||||
const auto& cached = load_block(*ctxs_, ref);
|
||||
const auto& cached = load_block(*ctxs_, decompressor(), encrypted(), ref);
|
||||
|
||||
return cached.value(key, value);
|
||||
}
|
||||
|
||||
virtual bool visit(
|
||||
const columnstore_reader::values_visitor_f& visitor
|
||||
) const override {
|
||||
virtual bool visit(const columnstore_reader::values_visitor_f& visitor) const override {
|
||||
block_t block; // don't cache new blocks
|
||||
for (auto& ref : refs_) {
|
||||
const auto& cached = load_block(*ctxs_, ref, block);
|
||||
const auto& cached = load_block(*ctxs_, decompressor(), encrypted(), ref, block);
|
||||
|
||||
if (!cached.visit(visitor)) {
|
||||
return false;
|
||||
|
@ -4786,12 +4931,12 @@ class dense_fixed_offset_column<dense_mask_block> final : public column {
|
|||
: column(prop) {
|
||||
}
|
||||
|
||||
virtual void read(data_input& in, uint64_t* buf) override {
|
||||
virtual void read(data_input& in, uint64_t* buf, compression::decompressor::ptr decomp) override {
|
||||
// we treat data in blocks as "garbage" which could be
|
||||
// potentially removed on merge, so we don't validate
|
||||
// column properties using such blocks
|
||||
|
||||
column::read(in, buf); // read common header
|
||||
column::read(in, buf, decomp); // read common header
|
||||
|
||||
uint32_t blocks_count = in.read_vint(); // total number of column index blocks
|
||||
|
||||
|
@ -4924,25 +5069,25 @@ irs::doc_iterator::ptr dense_fixed_offset_column<dense_mask_block>::iterator() c
|
|||
typedef std::function<
|
||||
column::ptr(const context_provider& ctxs, ColumnProperty prop)
|
||||
> column_factory_f;
|
||||
// Column | Blocks
|
||||
const column_factory_f g_column_factories[] { // CP_DENSE | CP_MASK CP_FIXED CP_DENSE
|
||||
&sparse_column<sparse_block>::make, // 0 | 0 0 0
|
||||
&sparse_column<dense_block>::make, // 0 | 0 0 1
|
||||
&sparse_column<sparse_block>::make, // 0 | 0 1 0
|
||||
&sparse_column<dense_fixed_offset_block>::make, // 0 | 0 1 1
|
||||
nullptr, /* invalid properties, should never happen */ // 0 | 1 0 0
|
||||
nullptr, /* invalid properties, should never happen */ // 0 | 1 0 1
|
||||
&sparse_column<sparse_mask_block>::make, // 0 | 1 1 0
|
||||
&sparse_column<dense_mask_block>::make, // 0 | 1 1 1
|
||||
// Column | Blocks
|
||||
const column_factory_f COLUMN_FACTORIES[] { // CP_COLUMN_DENSE | CP_MASK CP_FIXED CP_DENSE
|
||||
&sparse_column<sparse_block>::make, // 0 | 0 0 0
|
||||
&sparse_column<dense_block>::make, // 0 | 0 0 1
|
||||
&sparse_column<sparse_block>::make, // 0 | 0 1 0
|
||||
&sparse_column<dense_fixed_offset_block>::make, // 0 | 0 1 1
|
||||
nullptr, /* invalid properties, should never happen */ // 0 | 1 0 0
|
||||
nullptr, /* invalid properties, should never happen */ // 0 | 1 0 1
|
||||
&sparse_column<sparse_mask_block>::make, // 0 | 1 1 0
|
||||
&sparse_column<dense_mask_block>::make, // 0 | 1 1 1
|
||||
|
||||
&sparse_column<sparse_block>::make, // 1 | 0 0 0
|
||||
&sparse_column<dense_block>::make, // 1 | 0 0 1
|
||||
&sparse_column<sparse_block>::make, // 1 | 0 1 0
|
||||
&dense_fixed_offset_column<dense_fixed_offset_block>::make, // 1 | 0 1 1
|
||||
nullptr, /* invalid properties, should never happen */ // 1 | 1 0 0
|
||||
nullptr, /* invalid properties, should never happen */ // 1 | 1 0 1
|
||||
&sparse_column<sparse_mask_block>::make, // 1 | 1 1 0
|
||||
&dense_fixed_offset_column<dense_mask_block>::make // 1 | 1 1 1
|
||||
&sparse_column<sparse_block>::make, // 1 | 0 0 0
|
||||
&sparse_column<dense_block>::make, // 1 | 0 0 1
|
||||
&sparse_column<sparse_block>::make, // 1 | 0 1 0
|
||||
&dense_fixed_offset_column<dense_fixed_offset_block>::make, // 1 | 0 1 1
|
||||
nullptr, /* invalid properties, should never happen */ // 1 | 1 0 0
|
||||
nullptr, /* invalid properties, should never happen */ // 1 | 1 0 1
|
||||
&sparse_column<sparse_mask_block>::make, // 1 | 1 1 0
|
||||
&dense_fixed_offset_column<dense_mask_block>::make // 1 | 1 1 1
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -4969,10 +5114,7 @@ class reader final: public columnstore_reader, public context_provider {
|
|||
std::vector<column::ptr> columns_;
|
||||
}; // reader
|
||||
|
||||
bool reader::prepare(
|
||||
const directory& dir,
|
||||
const segment_meta& meta
|
||||
) {
|
||||
bool reader::prepare(const directory& dir, const segment_meta& meta) {
|
||||
const auto filename = file_name<columnstore_writer>(meta);
|
||||
bool exists;
|
||||
|
||||
|
@ -4999,18 +5141,28 @@ bool reader::prepare(
|
|||
}
|
||||
|
||||
// check header
|
||||
format_utils::check_header(
|
||||
const auto version = format_utils::check_header(
|
||||
*stream,
|
||||
writer::FORMAT_NAME,
|
||||
writer::FORMAT_MIN,
|
||||
writer::FORMAT_MAX
|
||||
);
|
||||
|
||||
encryption::stream::ptr cipher;
|
||||
|
||||
if (version > writer::FORMAT_MIN) {
|
||||
auto* enc = get_encryption(dir.attributes());
|
||||
|
||||
if (irs::decrypt(filename, *stream, enc, cipher)) {
|
||||
assert(cipher && cipher->block_size());
|
||||
}
|
||||
}
|
||||
|
||||
// since columns data are too large
|
||||
// it is too costly to verify checksum of
|
||||
// the entire file. here we perform cheap
|
||||
// error detection which could recognize
|
||||
// some forms of corruption. */
|
||||
// some forms of corruption
|
||||
format_utils::read_checksum(*stream);
|
||||
|
||||
// seek to data start
|
||||
|
@ -5023,8 +5175,9 @@ bool reader::prepare(
|
|||
for (size_t i = 0, size = columns.capacity(); i < size; ++i) {
|
||||
// read column properties
|
||||
const auto props = read_enum<ColumnProperty>(*stream);
|
||||
const auto factory_id = (props & (~CP_COLUMN_ENCRYPT));
|
||||
|
||||
if (props >= IRESEARCH_COUNTOF(g_column_factories)) {
|
||||
if (factory_id >= IRESEARCH_COUNTOF(COLUMN_FACTORIES)) {
|
||||
throw index_error(string_utils::to_string(
|
||||
"Failed to load column id=" IR_SIZE_T_SPECIFIER ", got invalid properties=%d",
|
||||
i, static_cast<uint32_t>(props)
|
||||
|
@ -5032,7 +5185,7 @@ bool reader::prepare(
|
|||
}
|
||||
|
||||
// create column
|
||||
const auto& factory = g_column_factories[props];
|
||||
const auto& factory = COLUMN_FACTORIES[factory_id];
|
||||
|
||||
if (!factory) {
|
||||
static_assert(
|
||||
|
@ -5054,8 +5207,32 @@ bool reader::prepare(
|
|||
));
|
||||
}
|
||||
|
||||
compression::decompressor::ptr decomp;
|
||||
|
||||
if (version > writer::FORMAT_MIN) {
|
||||
const auto compression_id = read_string<std::string>(*stream);
|
||||
decomp = compression::get_decompressor(compression_id);
|
||||
|
||||
if (!decomp && !compression::exists(compression_id)) {
|
||||
throw index_error(string_utils::to_string(
|
||||
"Failed to load compression '%s' for column id=" IR_SIZE_T_SPECIFIER,
|
||||
compression_id.c_str(), i));
|
||||
}
|
||||
|
||||
if (decomp && !decomp->prepare(*stream)) {
|
||||
throw index_error(string_utils::to_string(
|
||||
"Failed to prepare compression '%s' for column id=" IR_SIZE_T_SPECIFIER,
|
||||
compression_id.c_str(), i));
|
||||
}
|
||||
} else {
|
||||
// we don't support encryption and custom
|
||||
// compression for 'FORMAT_MIN' version
|
||||
decomp = compression::get_decompressor(compression::lz4::type());
|
||||
assert(decomp);
|
||||
}
|
||||
|
||||
try {
|
||||
column->read(*stream, buf);
|
||||
column->read(*stream, buf, decomp);
|
||||
} catch (...) {
|
||||
IR_FRMT_ERROR("Failed to load column id=" IR_SIZE_T_SPECIFIER, i);
|
||||
|
||||
|
@ -5067,7 +5244,7 @@ bool reader::prepare(
|
|||
}
|
||||
|
||||
// noexcept
|
||||
context_provider::prepare(std::move(stream));
|
||||
context_provider::prepare(std::move(stream), std::move(cipher));
|
||||
columns_ = std::move(columns);
|
||||
|
||||
return true;
|
||||
|
@ -5300,7 +5477,7 @@ class format10 : public irs::version10::format {
|
|||
virtual column_meta_writer::ptr get_column_meta_writer() const override;
|
||||
virtual column_meta_reader::ptr get_column_meta_reader() const override final;
|
||||
|
||||
virtual columnstore_writer::ptr get_columnstore_writer() const override final;
|
||||
virtual columnstore_writer::ptr get_columnstore_writer() const override;
|
||||
virtual columnstore_reader::ptr get_columnstore_reader() const override final;
|
||||
|
||||
virtual postings_writer::ptr get_postings_writer(bool volatile_state) const override;
|
||||
|
@ -5378,7 +5555,9 @@ column_meta_reader::ptr format10::get_column_meta_reader() const {
|
|||
}
|
||||
|
||||
columnstore_writer::ptr format10::get_columnstore_writer() const {
|
||||
return memory::make_unique<columns::writer>();
|
||||
return memory::make_unique<columns::writer>(
|
||||
int32_t(columns::writer::FORMAT_MIN)
|
||||
);
|
||||
}
|
||||
|
||||
columnstore_reader::ptr format10::get_columnstore_reader() const {
|
||||
|
@ -5407,7 +5586,7 @@ REGISTER_FORMAT(::format10);
|
|||
// --SECTION-- format11
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
class format11 final : public format10 {
|
||||
class format11 : public format10 {
|
||||
public:
|
||||
DECLARE_FORMAT_TYPE();
|
||||
DECLARE_FACTORY();
|
||||
|
@ -5421,6 +5600,11 @@ class format11 final : public format10 {
|
|||
virtual segment_meta_writer::ptr get_segment_meta_writer() const override final;
|
||||
|
||||
virtual column_meta_writer::ptr get_column_meta_writer() const override final;
|
||||
|
||||
protected:
|
||||
explicit format11(const irs::format::type_id& type) NOEXCEPT
|
||||
: format10(type) {
|
||||
}
|
||||
}; // format11
|
||||
|
||||
index_meta_writer::ptr format11::get_index_meta_writer() const {
|
||||
|
@ -5460,6 +5644,36 @@ column_meta_writer::ptr format11::get_column_meta_writer() const {
|
|||
DEFINE_FORMAT_TYPE_NAMED(::format11, "1_1");
|
||||
REGISTER_FORMAT(::format11);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// --SECTION-- format12
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
class format12 final : public format11 {
|
||||
public:
|
||||
DECLARE_FORMAT_TYPE();
|
||||
DECLARE_FACTORY();
|
||||
|
||||
format12() NOEXCEPT : format11(format12::type()) { }
|
||||
|
||||
virtual columnstore_writer::ptr get_columnstore_writer() const override final;
|
||||
}; // format12
|
||||
|
||||
columnstore_writer::ptr format12::get_columnstore_writer() const {
|
||||
return memory::make_unique<columns::writer>(
|
||||
int32_t(columns::writer::FORMAT_MAX)
|
||||
);
|
||||
}
|
||||
|
||||
/*static*/ irs::format::ptr format12::make() {
|
||||
static const ::format12 INSTANCE;
|
||||
|
||||
// aliasing constructor
|
||||
return irs::format::ptr(irs::format::ptr(), &INSTANCE);
|
||||
}
|
||||
|
||||
DEFINE_FORMAT_TYPE_NAMED(::format12, "1_2");
|
||||
REGISTER_FORMAT(::format12);
|
||||
|
||||
NS_END
|
||||
|
||||
NS_ROOT
|
||||
|
@ -5469,6 +5683,7 @@ void init() {
|
|||
#ifndef IRESEARCH_DLL
|
||||
REGISTER_FORMAT(::format10);
|
||||
REGISTER_FORMAT(::format11);
|
||||
REGISTER_FORMAT(::format12);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Andrey Abramov
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef IRESEARCH_COLUMN_INFO_H
|
||||
#define IRESEARCH_COLUMN_INFO_H
|
||||
|
||||
#include "utils/string.hpp"
|
||||
#include "utils/compression.hpp"
|
||||
|
||||
#include <functional>
|
||||
|
||||
NS_ROOT
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @class column_info
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class column_info {
|
||||
public:
|
||||
column_info(const compression::type_id& compression,
|
||||
const compression::options& options,
|
||||
bool encryption) NOEXCEPT
|
||||
: compression_(&compression),
|
||||
options_(options),
|
||||
encryption_(encryption) {
|
||||
}
|
||||
|
||||
const compression::type_id& compression() const NOEXCEPT { return *compression_; }
|
||||
const compression::options& options() const NOEXCEPT { return options_; }
|
||||
bool encryption() const NOEXCEPT { return encryption_; }
|
||||
|
||||
private:
|
||||
const compression::type_id* compression_;
|
||||
const compression::options options_;
|
||||
bool encryption_;
|
||||
}; // column_info
|
||||
|
||||
typedef std::function<column_info(const string_ref)> column_info_provider_t;
|
||||
|
||||
NS_END
|
||||
|
||||
#endif // IRESEARCH_COLUMN_INFO_H
|
||||
|
|
@ -292,6 +292,7 @@ directory_reader_impl::directory_reader_impl(
|
|||
std::unordered_map<string_ref, size_t> reuse_candidates; // map by segment name to old segment id
|
||||
|
||||
for(size_t i = 0, count = cached_impl ? cached_impl->meta_.meta.size() : 0; i < count; ++i) {
|
||||
assert(cached_impl); // ensured by loop condition above
|
||||
auto itr = reuse_candidates.emplace(
|
||||
cached_impl->meta_.meta.segment(i).meta.name, i
|
||||
);
|
||||
|
@ -366,4 +367,4 @@ NS_END
|
|||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- END-OF-FILE
|
||||
// -----------------------------------------------------------------------------
|
||||
// -----------------------------------------------------------------------------
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include "utils/bit_utils.hpp"
|
||||
#include "utils/io_utils.hpp"
|
||||
#include "utils/log.hpp"
|
||||
#include "utils/lz4compression.hpp"
|
||||
#include "utils/map_utils.hpp"
|
||||
#include "utils/memory.hpp"
|
||||
#include "utils/object_pool.hpp"
|
||||
|
@ -55,6 +56,12 @@ using namespace irs;
|
|||
|
||||
const byte_block_pool EMPTY_POOL;
|
||||
|
||||
const column_info NORM_COLUMN{
|
||||
compression::lz4::type(),
|
||||
compression::options(),
|
||||
false
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- helpers
|
||||
// -----------------------------------------------------------------------------
|
||||
|
@ -784,7 +791,9 @@ void field_data::reset(doc_id_t doc_id) {
|
|||
|
||||
data_output& field_data::norms(columnstore_writer& writer) {
|
||||
if (!norms_) {
|
||||
auto handle = writer.push_column();
|
||||
// FIXME encoder for norms???
|
||||
// do not encrypt norms
|
||||
auto handle = writer.push_column(NORM_COLUMN);
|
||||
norms_ = std::move(handle.second);
|
||||
meta_.norm = handle.first;
|
||||
}
|
||||
|
|
|
@ -27,8 +27,8 @@
|
|||
#include "comparer.hpp"
|
||||
#include "formats/format_utils.hpp"
|
||||
#include "search/exclusion.hpp"
|
||||
#include "utils/bitset.hpp"
|
||||
#include "utils/bitvector.hpp"
|
||||
#include "utils/compression.hpp"
|
||||
#include "utils/directory_utils.hpp"
|
||||
#include "utils/index_utils.hpp"
|
||||
#include "utils/string_utils.hpp"
|
||||
|
@ -47,6 +47,11 @@ typedef range<irs::segment_writer::update_context> update_contexts_ref;
|
|||
|
||||
const size_t NON_UPDATE_RECORD = irs::integer_traits<size_t>::const_max; // non-update
|
||||
|
||||
const irs::column_info_provider_t DEFAULT_COLUMN_INFO = [](const irs::string_ref&) {
|
||||
// no compression, no encryption
|
||||
return irs::column_info{ irs::compression::raw::type(), {}, false };
|
||||
};
|
||||
|
||||
struct flush_segment_context {
|
||||
const size_t doc_id_begin_; // starting doc_id to consider in 'segment.meta' (inclusive)
|
||||
const size_t doc_id_end_; // ending doc_id to consider in 'segment.meta' (exclusive)
|
||||
|
@ -383,8 +388,7 @@ bool map_removals(
|
|||
const candidates_mapping_t& candidates_mapping,
|
||||
const irs::merge_writer& merger,
|
||||
irs::readers_cache& readers,
|
||||
irs::document_mask& docs_mask
|
||||
) {
|
||||
irs::document_mask& docs_mask) {
|
||||
assert(merger);
|
||||
|
||||
for (auto& mapping : candidates_mapping) {
|
||||
|
@ -594,7 +598,14 @@ index_writer::active_segment_context::active_segment_context(
|
|||
flush_ctx_(flush_ctx),
|
||||
pending_segment_context_offset_(pending_segment_context_offset),
|
||||
segments_active_(&segments_active) {
|
||||
assert(!flush_ctx || flush_ctx->pending_segment_contexts_[pending_segment_context_offset_].segment_ == ctx_); // thread-safe because pending_segment_contexts_ is a deque
|
||||
#ifdef IRESEARCH_DEBUG
|
||||
if (flush_ctx) {
|
||||
// ensure there are no active struct update operations (only needed for assert)
|
||||
SCOPED_LOCK_NAMED(flush_ctx->mutex_, lock);
|
||||
// assert that flush_ctx and ctx are compatible
|
||||
assert(flush_ctx->pending_segment_contexts_[pending_segment_context_offset_].segment_ == ctx_);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (ctx_) {
|
||||
++*segments_active_; // track here since garanteed to have 1 ref per active segment
|
||||
|
@ -602,8 +613,7 @@ index_writer::active_segment_context::active_segment_context(
|
|||
}
|
||||
|
||||
index_writer::active_segment_context::active_segment_context(
|
||||
active_segment_context&& other
|
||||
) NOEXCEPT
|
||||
active_segment_context&& other) NOEXCEPT
|
||||
: ctx_(std::move(other.ctx_)),
|
||||
flush_ctx_(std::move(other.flush_ctx_)),
|
||||
pending_segment_context_offset_(std::move(other.pending_segment_context_offset_)),
|
||||
|
@ -723,6 +733,7 @@ index_writer::documents_context::~documents_context() NOEXCEPT {
|
|||
|
||||
void index_writer::documents_context::reset() NOEXCEPT {
|
||||
tick_ = 0; // reset tick
|
||||
|
||||
auto& ctx = segment_.ctx();
|
||||
|
||||
if (!ctx) {
|
||||
|
@ -1025,16 +1036,17 @@ void index_writer::flush_context::reset() NOEXCEPT {
|
|||
index_writer::segment_context::segment_context(
|
||||
directory& dir,
|
||||
segment_meta_generator_t&& meta_generator,
|
||||
const comparer* comparator
|
||||
): active_count_(0),
|
||||
buffered_docs_(0),
|
||||
dirty_(false),
|
||||
dir_(dir),
|
||||
meta_generator_(std::move(meta_generator)),
|
||||
uncomitted_doc_id_begin_(doc_limits::min()),
|
||||
uncomitted_generation_offset_(0),
|
||||
uncomitted_modification_queries_(0),
|
||||
writer_(segment_writer::make(dir_, comparator)) {
|
||||
const column_info_provider_t& column_info,
|
||||
const comparer* comparator)
|
||||
: active_count_(0),
|
||||
buffered_docs_(0),
|
||||
dirty_(false),
|
||||
dir_(dir),
|
||||
meta_generator_(std::move(meta_generator)),
|
||||
uncomitted_doc_id_begin_(doc_limits::min()),
|
||||
uncomitted_generation_offset_(0),
|
||||
uncomitted_modification_queries_(0),
|
||||
writer_(segment_writer::make(dir_, column_info, comparator)) {
|
||||
assert(meta_generator_);
|
||||
}
|
||||
|
||||
|
@ -1081,9 +1093,9 @@ uint64_t index_writer::segment_context::flush() {
|
|||
index_writer::segment_context::ptr index_writer::segment_context::make(
|
||||
directory& dir,
|
||||
segment_meta_generator_t&& meta_generator,
|
||||
const comparer* comparator
|
||||
) {
|
||||
return memory::make_shared<segment_context>(dir, std::move(meta_generator), comparator);
|
||||
const column_info_provider_t& column_info,
|
||||
const comparer* comparator) {
|
||||
return memory::make_shared<segment_context>(dir, std::move(meta_generator), column_info, comparator);
|
||||
}
|
||||
|
||||
segment_writer::update_context index_writer::segment_context::make_update_context() {
|
||||
|
@ -1094,8 +1106,7 @@ segment_writer::update_context index_writer::segment_context::make_update_contex
|
|||
}
|
||||
|
||||
segment_writer::update_context index_writer::segment_context::make_update_context(
|
||||
const filter& filter
|
||||
) {
|
||||
const filter& filter) {
|
||||
auto generation = ++uncomitted_generation_offset_; // increment generation due to removal
|
||||
auto update_id = modification_queries_.size();
|
||||
|
||||
|
@ -1108,8 +1119,7 @@ segment_writer::update_context index_writer::segment_context::make_update_contex
|
|||
}
|
||||
|
||||
segment_writer::update_context index_writer::segment_context::make_update_context(
|
||||
const std::shared_ptr<filter>& filter
|
||||
) {
|
||||
const std::shared_ptr<filter>& filter) {
|
||||
assert(filter);
|
||||
auto generation = ++uncomitted_generation_offset_; // increment generation due to removal
|
||||
auto update_id = modification_queries_.size();
|
||||
|
@ -1123,8 +1133,7 @@ segment_writer::update_context index_writer::segment_context::make_update_contex
|
|||
}
|
||||
|
||||
segment_writer::update_context index_writer::segment_context::make_update_context(
|
||||
filter::ptr&& filter
|
||||
) {
|
||||
filter::ptr&& filter) {
|
||||
assert(filter);
|
||||
auto generation = ++uncomitted_generation_offset_; // increment generation due to removal
|
||||
auto update_id = modification_queries_.size();
|
||||
|
@ -1200,9 +1209,10 @@ index_writer::index_writer(
|
|||
size_t segment_pool_size,
|
||||
const segment_options& segment_limits,
|
||||
const comparer* comparator,
|
||||
const column_info_provider_t& column_info,
|
||||
index_meta&& meta,
|
||||
committed_state_t&& committed_state
|
||||
) NOEXCEPT :
|
||||
committed_state_t&& committed_state)
|
||||
: column_info_(column_info),
|
||||
comparator_(comparator),
|
||||
cached_readers_(dir),
|
||||
codec_(codec),
|
||||
|
@ -1216,6 +1226,7 @@ index_writer::index_writer(
|
|||
writer_(codec->get_index_meta_writer()),
|
||||
write_lock_(std::move(lock)),
|
||||
write_lock_file_ref_(std::move(lock_file_ref)) {
|
||||
assert(column_info); // ensured by 'make'
|
||||
assert(codec);
|
||||
flush_context_.store(&flush_context_pool_[0]);
|
||||
|
||||
|
@ -1256,6 +1267,9 @@ void index_writer::clear() {
|
|||
pending_meta.update_generation(meta_); // clone index metadata generation
|
||||
pending_meta.seg_counter_.store(meta_.counter()); // ensure counter() >= max(seg#)
|
||||
|
||||
// rollback already opened transaction if any
|
||||
writer_->rollback();
|
||||
|
||||
// write 1st phase of index_meta transaction
|
||||
if (!writer_->prepare(dir, pending_meta)) {
|
||||
throw illegal_state();
|
||||
|
@ -1288,8 +1302,7 @@ index_writer::ptr index_writer::make(
|
|||
directory& dir,
|
||||
format::ptr codec,
|
||||
OpenMode mode,
|
||||
const init_options& opts /*= init_options()*/
|
||||
) {
|
||||
const init_options& opts /*= init_options()*/) {
|
||||
std::vector<index_file_refs::ref_t> file_refs;
|
||||
index_lock::ptr lock;
|
||||
index_file_refs::ref_t lockfile_ref;
|
||||
|
@ -1351,6 +1364,7 @@ index_writer::ptr index_writer::make(
|
|||
opts.segment_pool_size,
|
||||
segment_options(opts),
|
||||
opts.comparator,
|
||||
opts.column_info ? opts.column_info : DEFAULT_COLUMN_INFO,
|
||||
std::move(meta),
|
||||
std::move(comitted_state)
|
||||
);
|
||||
|
@ -1385,8 +1399,7 @@ uint64_t index_writer::buffered_docs() const {
|
|||
bool index_writer::consolidate(
|
||||
const consolidation_policy_t& policy,
|
||||
format::ptr codec /*= nullptr*/,
|
||||
const merge_writer::flush_progress_t& progress /*= {}*/
|
||||
) {
|
||||
const merge_writer::flush_progress_t& progress /*= {}*/) {
|
||||
REGISTER_TIMER_DETAILED();
|
||||
|
||||
if (!codec) {
|
||||
|
@ -1490,7 +1503,7 @@ bool index_writer::consolidate(
|
|||
consolidation_segment.meta.name = file_name(meta_.increment()); // increment active meta, not fn arg
|
||||
|
||||
ref_tracking_directory dir(dir_); // track references for new segment
|
||||
merge_writer merger(dir, comparator_);
|
||||
merge_writer merger(dir, column_info_, comparator_);
|
||||
merger.reserve(candidates.size());
|
||||
|
||||
// add consolidated segments to the merge_writer
|
||||
|
@ -1669,8 +1682,7 @@ bool index_writer::consolidate(
|
|||
bool index_writer::import(
|
||||
const index_reader& reader,
|
||||
format::ptr codec /*= nullptr*/,
|
||||
const merge_writer::flush_progress_t& progress /*= {}*/
|
||||
) {
|
||||
const merge_writer::flush_progress_t& progress /*= {}*/) {
|
||||
if (!reader.live_docs_count()) {
|
||||
return true; // skip empty readers since no documents to import
|
||||
}
|
||||
|
@ -1685,7 +1697,7 @@ bool index_writer::import(
|
|||
segment.meta.name = file_name(meta_.increment());
|
||||
segment.meta.codec = codec;
|
||||
|
||||
merge_writer merger(dir);
|
||||
merge_writer merger(dir, column_info_, comparator_);
|
||||
merger.reserve(reader.size());
|
||||
|
||||
for (auto& segment : reader) {
|
||||
|
@ -1773,11 +1785,12 @@ index_writer::flush_context_ptr index_writer::get_flush_context(bool shared /*=
|
|||
}
|
||||
|
||||
index_writer::active_segment_context index_writer::get_segment_context(
|
||||
flush_context& ctx
|
||||
) {
|
||||
auto segments_active_decrement =
|
||||
irs::make_finally([this]()->void { --segments_active_; }); // release reservation (delcare before aquisition since operator++() is noexcept)
|
||||
auto segments_active = ++segments_active_; // increment counter to aquire reservation, if another thread tries to reserve last context then it'll be over limit
|
||||
flush_context& ctx) {
|
||||
// release reservation (delcare before aquisition since operator++() is noexcept)
|
||||
auto segments_active_decrement = irs::make_finally([this]()->void { --segments_active_; });
|
||||
// increment counter to aquire reservation, if another thread
|
||||
// tries to reserve last context then it'll be over limit
|
||||
auto segments_active = ++segments_active_;
|
||||
auto segment_count_max = segment_limits_.segment_count_max.load();
|
||||
|
||||
// no free segment_context available and maximum number of segments reached
|
||||
|
@ -1794,7 +1807,6 @@ index_writer::active_segment_context index_writer::get_segment_context(
|
|||
); // only nodes of type 'pending_segment_context' are added to 'pending_segment_contexts_freelist_'
|
||||
|
||||
if (freelist_node) {
|
||||
assert(ctx.pending_segment_contexts_[freelist_node->value].segment_ == freelist_node->segment_); // thread-safe because pending_segment_contexts_ is a deque
|
||||
assert(freelist_node->segment_.use_count() == 1); // +1 for the reference in 'pending_segment_contexts_'
|
||||
assert(!freelist_node->segment_->dirty_);
|
||||
return active_segment_context(
|
||||
|
@ -1809,14 +1821,16 @@ index_writer::active_segment_context index_writer::get_segment_context(
|
|||
auto meta_generator = [this]()->segment_meta {
|
||||
return segment_meta(file_name(meta_.increment()), codec_);
|
||||
};
|
||||
auto segment_ctx =
|
||||
segment_writer_pool_.emplace(dir_, std::move(meta_generator), comparator_).release();
|
||||
auto segment_ctx = segment_writer_pool_.emplace(
|
||||
dir_, std::move(meta_generator),
|
||||
column_info_, comparator_
|
||||
).release();
|
||||
auto segment_memory_max = segment_limits_.segment_memory_max.load();
|
||||
|
||||
// recreate writer if it reserved more memory than allowed by current limits
|
||||
if (segment_memory_max &&
|
||||
segment_memory_max < segment_ctx->writer_->memory_reserved()) {
|
||||
segment_ctx->writer_ = segment_writer::make(segment_ctx->dir_, comparator_);
|
||||
segment_ctx->writer_ = segment_writer::make(segment_ctx->dir_, column_info_, comparator_);
|
||||
}
|
||||
|
||||
return active_segment_context(segment_ctx, segments_active_);
|
||||
|
|
|
@ -21,10 +21,11 @@
|
|||
/// @author Vasiliy Nabatchikov
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef IRESEARCH_INDEXWRITER_H
|
||||
#define IRESEARCH_INDEXWRITER_H
|
||||
#ifndef IRESEARCH_INDEX_WRITER_H
|
||||
#define IRESEARCH_INDEX_WRITER_H
|
||||
|
||||
#include "field_meta.hpp"
|
||||
#include "column_info.hpp"
|
||||
#include "index_meta.hpp"
|
||||
#include "merge_writer.hpp"
|
||||
#include "segment_reader.hpp"
|
||||
|
@ -53,17 +54,21 @@ class directory_reader;
|
|||
class readers_cache final : util::noncopyable {
|
||||
public:
|
||||
struct key_t {
|
||||
std::string name;
|
||||
uint64_t version;
|
||||
key_t(const segment_meta& meta); // implicit constructor
|
||||
|
||||
bool operator<(const key_t& other) const NOEXCEPT {
|
||||
return name < other.name
|
||||
|| (name == other.name && version < other.version);
|
||||
}
|
||||
|
||||
bool operator==(const key_t& other) const NOEXCEPT {
|
||||
return name == other.name && version == other.version;
|
||||
}
|
||||
|
||||
std::string name;
|
||||
uint64_t version;
|
||||
};
|
||||
|
||||
struct key_hash_t {
|
||||
size_t operator()(const key_t& key) const NOEXCEPT {
|
||||
return std::hash<std::string>()(key.name);
|
||||
|
@ -108,11 +113,12 @@ ENABLE_BITMASK_ENUM(OpenMode);
|
|||
/// the same directory simultaneously.
|
||||
/// Thread safe.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class IRESEARCH_API index_writer:
|
||||
private atomic_shared_ptr_helper<std::pair<
|
||||
std::shared_ptr<index_meta>, std::vector<index_file_refs::ref_t>
|
||||
>>,
|
||||
private util::noncopyable {
|
||||
class IRESEARCH_API index_writer
|
||||
: private atomic_shared_ptr_helper<
|
||||
std::pair<
|
||||
std::shared_ptr<index_meta>, std::vector<index_file_refs::ref_t>
|
||||
>>,
|
||||
private util::noncopyable {
|
||||
private:
|
||||
struct flush_context; // forward declaration
|
||||
struct segment_context; // forward declaration
|
||||
|
@ -122,9 +128,7 @@ class IRESEARCH_API index_writer:
|
|||
void(*)(flush_context*) // sizeof(std::function<void(flush_context*)>) > sizeof(void(*)(flush_context*))
|
||||
> flush_context_ptr; // unique pointer required since need ponter declaration before class declaration e.g. for 'documents_context'
|
||||
|
||||
typedef std::shared_ptr<
|
||||
segment_context
|
||||
> segment_context_ptr; // declaration from segment_context::ptr below
|
||||
typedef std::shared_ptr<segment_context> segment_context_ptr; // declaration from segment_context::ptr below
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief segment references given out by flush_context to allow tracking
|
||||
|
@ -427,7 +431,12 @@ class IRESEARCH_API index_writer:
|
|||
//////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief options the the writer should use after creation
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
struct init_options: public segment_options {
|
||||
struct init_options : public segment_options {
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief returns column info the writer should use for columnstore
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
column_info_provider_t column_info;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief comparator defines physical order of documents in each segment
|
||||
/// produced by an index_writer.
|
||||
|
@ -457,18 +466,14 @@ class IRESEARCH_API index_writer:
|
|||
};
|
||||
|
||||
struct segment_hash {
|
||||
size_t operator()(
|
||||
const segment_meta* segment
|
||||
) const NOEXCEPT {
|
||||
size_t operator()(const segment_meta* segment) const NOEXCEPT {
|
||||
return hash_utils::hash(segment->name);
|
||||
}
|
||||
}; // segment_hash
|
||||
|
||||
struct segment_equal {
|
||||
size_t operator()(
|
||||
const segment_meta* lhs,
|
||||
const segment_meta* rhs
|
||||
) const NOEXCEPT {
|
||||
size_t operator()(const segment_meta* lhs,
|
||||
const segment_meta* rhs) const NOEXCEPT {
|
||||
return lhs->name == rhs->name;
|
||||
}
|
||||
}; // segment_equal
|
||||
|
@ -517,6 +522,7 @@ class IRESEARCH_API index_writer:
|
|||
////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Clears the existing index repository by staring an empty index.
|
||||
/// Previously opened readers still remain valid.
|
||||
/// @note call will rollback any opened transaction
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
void clear();
|
||||
|
||||
|
@ -801,8 +807,8 @@ class IRESEARCH_API index_writer:
|
|||
segment_writer::ptr writer_;
|
||||
index_meta::index_segment_t writer_meta_; // the segment_meta this writer was initialized with
|
||||
|
||||
DECLARE_FACTORY(directory& dir, segment_meta_generator_t&& meta_generator, const comparer* comparator);
|
||||
segment_context(directory& dir, segment_meta_generator_t&& meta_generator, const comparer* comparator);
|
||||
DECLARE_FACTORY(directory& dir, segment_meta_generator_t&& meta_generator, const column_info_provider_t& column_info, const comparer* comparator);
|
||||
segment_context(directory& dir, segment_meta_generator_t&& meta_generator, const column_info_provider_t& column_info, const comparer* comparator);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief flush current writer state into a materialized segment
|
||||
|
@ -1000,7 +1006,11 @@ class IRESEARCH_API index_writer:
|
|||
committed_state_t commit; // meta + references of next commit
|
||||
|
||||
operator bool() const NOEXCEPT { return ctx && commit; }
|
||||
void reset() NOEXCEPT { ctx.reset(), commit.reset(); }
|
||||
|
||||
void reset() NOEXCEPT {
|
||||
ctx.reset();
|
||||
commit.reset();
|
||||
}
|
||||
}; // pending_state_t
|
||||
|
||||
index_writer(
|
||||
|
@ -1011,9 +1021,10 @@ class IRESEARCH_API index_writer:
|
|||
size_t segment_pool_size,
|
||||
const segment_options& segment_limits,
|
||||
const comparer* comparator,
|
||||
index_meta&& meta,
|
||||
const column_info_provider_t& column_info,
|
||||
index_meta&& meta,
|
||||
committed_state_t&& committed_state
|
||||
) NOEXCEPT;
|
||||
);
|
||||
|
||||
pending_context_t flush_all(const before_commit_f& before_commit);
|
||||
|
||||
|
@ -1025,6 +1036,7 @@ class IRESEARCH_API index_writer:
|
|||
void abort(); // aborts transaction
|
||||
|
||||
IRESEARCH_API_PRIVATE_VARIABLES_BEGIN
|
||||
column_info_provider_t column_info_;
|
||||
const comparer* comparator_;
|
||||
readers_cache cached_readers_; // readers by segment name
|
||||
format::ptr codec_;
|
||||
|
@ -1048,4 +1060,4 @@ class IRESEARCH_API index_writer:
|
|||
|
||||
NS_END
|
||||
|
||||
#endif
|
||||
#endif // IRESEARCH_INDEX_WRITER_H
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "index/comparer.hpp"
|
||||
#include "utils/directory_utils.hpp"
|
||||
#include "utils/log.hpp"
|
||||
#include "utils/lz4compression.hpp"
|
||||
#include "utils/type_limits.hpp"
|
||||
#include "utils/version_utils.hpp"
|
||||
#include "store/store_utils.hpp"
|
||||
|
@ -43,6 +44,12 @@
|
|||
|
||||
NS_LOCAL
|
||||
|
||||
const irs::column_info NORM_COLUMN{
|
||||
irs::compression::lz4::type(),
|
||||
irs::compression::options(),
|
||||
false
|
||||
};
|
||||
|
||||
// mapping of old doc_id to new doc_id (reader doc_ids are sequential 0 based)
|
||||
// masked doc_ids have value of MASKED_DOC_ID
|
||||
typedef std::vector<irs::doc_id_t> doc_id_map_t;
|
||||
|
@ -958,8 +965,7 @@ class columnstore {
|
|||
bool insert(
|
||||
const irs::sub_reader& reader,
|
||||
irs::field_id column,
|
||||
const doc_map_f& doc_map
|
||||
) {
|
||||
const doc_map_f& doc_map) {
|
||||
const auto* column_reader = reader.column_reader(column);
|
||||
|
||||
if (!column_reader) {
|
||||
|
@ -1010,9 +1016,9 @@ class columnstore {
|
|||
return true;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
void reset(const irs::column_info& info) {
|
||||
if (!empty_) {
|
||||
column_ = writer_->push_column();
|
||||
column_ = writer_->push_column(info);
|
||||
empty_ = true;
|
||||
}
|
||||
}
|
||||
|
@ -1128,10 +1134,10 @@ bool write_columns(
|
|||
columnstore& cs,
|
||||
CompoundIterator& columns,
|
||||
irs::directory& dir,
|
||||
const irs::column_info_provider_t& column_info,
|
||||
const irs::segment_meta& meta,
|
||||
compound_column_meta_iterator_t& column_meta_itr,
|
||||
const irs::merge_writer::flush_progress_t& progress
|
||||
) {
|
||||
const irs::merge_writer::flush_progress_t& progress) {
|
||||
REGISTER_TIMER_DETAILED();
|
||||
assert(cs);
|
||||
assert(progress);
|
||||
|
@ -1159,7 +1165,8 @@ bool write_columns(
|
|||
column_meta_writer->prepare(dir, meta);
|
||||
|
||||
while (column_meta_itr.next()) {
|
||||
cs.reset();
|
||||
const auto& column_name = (*column_meta_itr).name;
|
||||
cs.reset(column_info(column_name));
|
||||
|
||||
// visit matched columns from merging segments and
|
||||
// write all survived values to the new segment
|
||||
|
@ -1172,7 +1179,7 @@ bool write_columns(
|
|||
}
|
||||
|
||||
if (!cs.empty()) {
|
||||
column_meta_writer->write((*column_meta_itr).name, cs.id());
|
||||
column_meta_writer->write(column_name, cs.id());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1187,10 +1194,10 @@ bool write_columns(
|
|||
bool write_columns(
|
||||
columnstore& cs,
|
||||
irs::directory& dir,
|
||||
const irs::column_info_provider_t& column_info,
|
||||
const irs::segment_meta& meta,
|
||||
compound_column_meta_iterator_t& column_itr,
|
||||
const irs::merge_writer::flush_progress_t& progress
|
||||
) {
|
||||
const irs::merge_writer::flush_progress_t& progress) {
|
||||
REGISTER_TIMER_DETAILED();
|
||||
assert(cs);
|
||||
assert(progress);
|
||||
|
@ -1207,7 +1214,8 @@ bool write_columns(
|
|||
cmw->prepare(dir, meta);
|
||||
|
||||
while (column_itr.next()) {
|
||||
cs.reset();
|
||||
const auto& column_name = (*column_itr).name;
|
||||
cs.reset(column_info(column_name));
|
||||
|
||||
// visit matched columns from merging segments and
|
||||
// write all survived values to the new segment
|
||||
|
@ -1216,7 +1224,7 @@ bool write_columns(
|
|||
}
|
||||
|
||||
if (!cs.empty()) {
|
||||
cmw->write((*column_itr).name, cs.id());
|
||||
cmw->write(column_name, cs.id());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1262,7 +1270,7 @@ bool write_fields(
|
|||
};
|
||||
|
||||
while (field_itr.next()) {
|
||||
cs.reset();
|
||||
cs.reset(NORM_COLUMN); // FIXME encoder for norms???
|
||||
|
||||
auto& field_meta = field_itr.meta();
|
||||
auto& field_features = field_meta.features;
|
||||
|
@ -1339,7 +1347,7 @@ bool write_fields(
|
|||
};
|
||||
|
||||
while (field_itr.next()) {
|
||||
cs.reset();
|
||||
cs.reset(NORM_COLUMN); // FIXME encoder for norms???
|
||||
|
||||
auto& field_meta = field_itr.meta();
|
||||
auto& field_features = field_meta.features;
|
||||
|
@ -1419,7 +1427,9 @@ merge_writer::reader_ctx::reader_ctx(irs::sub_reader::ptr reader) NOEXCEPT
|
|||
}
|
||||
|
||||
merge_writer::merge_writer() NOEXCEPT
|
||||
: dir_(noop_directory::instance()) {
|
||||
: dir_(noop_directory::instance()),
|
||||
column_info_(nullptr),
|
||||
comparator_(nullptr) {
|
||||
}
|
||||
|
||||
merge_writer::operator bool() const NOEXCEPT {
|
||||
|
@ -1429,8 +1439,7 @@ merge_writer::operator bool() const NOEXCEPT {
|
|||
bool merge_writer::flush(
|
||||
tracking_directory& dir,
|
||||
index_meta::index_segment_t& segment,
|
||||
const flush_progress_t& progress
|
||||
) {
|
||||
const flush_progress_t& progress) {
|
||||
REGISTER_TIMER_DETAILED();
|
||||
assert(progress);
|
||||
assert(!comparator_);
|
||||
|
@ -1500,7 +1509,7 @@ bool merge_writer::flush(
|
|||
}
|
||||
|
||||
// write columns
|
||||
if (!write_columns(cs, dir, segment.meta, columns_meta_itr, progress)) {
|
||||
if (!write_columns(cs, dir, *column_info_, segment.meta, columns_meta_itr, progress)) {
|
||||
return false; // flush failure
|
||||
}
|
||||
|
||||
|
@ -1525,11 +1534,11 @@ bool merge_writer::flush(
|
|||
bool merge_writer::flush_sorted(
|
||||
tracking_directory& dir,
|
||||
index_meta::index_segment_t& segment,
|
||||
const flush_progress_t& progress
|
||||
) {
|
||||
const flush_progress_t& progress) {
|
||||
REGISTER_TIMER_DETAILED();
|
||||
assert(progress);
|
||||
assert(comparator_);
|
||||
assert(column_info_ && *column_info_);
|
||||
|
||||
field_meta_map_t field_meta_map;
|
||||
compound_column_meta_iterator_t columns_meta_itr;
|
||||
|
@ -1617,7 +1626,9 @@ bool merge_writer::flush_sorted(
|
|||
auto writer = segment.meta.codec->get_columnstore_writer();
|
||||
writer->prepare(dir, segment.meta);
|
||||
|
||||
auto column = writer->push_column();
|
||||
// get column info for sorted column
|
||||
const auto info = (*column_info_)(string_ref::NIL);
|
||||
auto column = writer->push_column(info);
|
||||
|
||||
irs::doc_id_t next_id = irs::doc_limits::min();
|
||||
while (columns_it.next()) {
|
||||
|
@ -1674,7 +1685,7 @@ bool merge_writer::flush_sorted(
|
|||
}
|
||||
|
||||
// write columns
|
||||
if (!write_columns(cs, sorting_doc_it, dir, segment.meta, columns_meta_itr, progress)) {
|
||||
if (!write_columns(cs, sorting_doc_it, dir, *column_info_, segment.meta, columns_meta_itr, progress)) {
|
||||
return false; // flush failure
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
|
||||
#include <vector>
|
||||
|
||||
#include "column_info.hpp"
|
||||
#include "index_meta.hpp"
|
||||
#include "utils/memory.hpp"
|
||||
#include "utils/noncopyable.hpp"
|
||||
|
@ -55,13 +56,18 @@ class IRESEARCH_API merge_writer: public util::noncopyable {
|
|||
|
||||
explicit merge_writer(
|
||||
directory& dir,
|
||||
const column_info_provider_t& column_info,
|
||||
const comparer* comparator = nullptr) NOEXCEPT
|
||||
: dir_(dir), comparator_(comparator) {
|
||||
: dir_(dir),
|
||||
column_info_(&column_info),
|
||||
comparator_(comparator) {
|
||||
assert(column_info);
|
||||
}
|
||||
|
||||
merge_writer(merge_writer&& rhs) NOEXCEPT
|
||||
: dir_(rhs.dir_),
|
||||
readers_(std::move(rhs.readers_)),
|
||||
column_info_(rhs.column_info_),
|
||||
comparator_(rhs.comparator_){
|
||||
}
|
||||
|
||||
|
@ -118,7 +124,8 @@ class IRESEARCH_API merge_writer: public util::noncopyable {
|
|||
IRESEARCH_API_PRIVATE_VARIABLES_BEGIN
|
||||
directory& dir_;
|
||||
std::vector<reader_ctx> readers_;
|
||||
const comparer* comparator_{};
|
||||
const column_info_provider_t* column_info_;
|
||||
const comparer* comparator_;
|
||||
IRESEARCH_API_PRIVATE_VARIABLES_END
|
||||
}; // merge_writer
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "analysis/token_attributes.hpp"
|
||||
#include "utils/index_utils.hpp"
|
||||
#include "utils/log.hpp"
|
||||
#include "utils/lz4compression.hpp"
|
||||
#include "utils/map_utils.hpp"
|
||||
#include "utils/timer_utils.hpp"
|
||||
#include "utils/type_limits.hpp"
|
||||
|
@ -42,10 +43,13 @@ NS_ROOT
|
|||
segment_writer::stored_column::stored_column(
|
||||
const string_ref& name,
|
||||
columnstore_writer& columnstore,
|
||||
bool cache
|
||||
) : name(name.c_str(), name.size()) {
|
||||
const column_info_provider_t& column_info,
|
||||
bool cache)
|
||||
: name(name.c_str(), name.size()),
|
||||
stream(column_info(name)) {
|
||||
if (!cache) {
|
||||
std::tie(id, writer) = columnstore.push_column();
|
||||
auto& info = stream.info();
|
||||
std::tie(id, writer) = columnstore.push_column(info);
|
||||
} else {
|
||||
writer = [this](irs::doc_id_t doc)->columnstore_writer::column_output& {
|
||||
this->stream.prepare(doc);
|
||||
|
@ -77,8 +81,11 @@ doc_id_t segment_writer::begin(
|
|||
return doc_id_t(docs_cached() + doc_limits::min() - 1); // -1 for 0-based offset
|
||||
}
|
||||
|
||||
segment_writer::ptr segment_writer::make(directory& dir, const comparer* comparator) {
|
||||
return memory::maker<segment_writer>::make(dir, comparator);
|
||||
segment_writer::ptr segment_writer::make(
|
||||
directory& dir,
|
||||
const column_info_provider_t& column_info,
|
||||
const comparer* comparator) {
|
||||
return memory::maker<segment_writer>::make(dir, column_info, comparator);
|
||||
}
|
||||
|
||||
size_t segment_writer::memory_active() const NOEXCEPT {
|
||||
|
@ -130,9 +137,11 @@ bool segment_writer::remove(doc_id_t doc_id) {
|
|||
|
||||
segment_writer::segment_writer(
|
||||
directory& dir,
|
||||
const comparer* comparator
|
||||
) NOEXCEPT
|
||||
: fields_(comparator),
|
||||
const column_info_provider_t& column_info,
|
||||
const comparer* comparator) NOEXCEPT
|
||||
: sort_(column_info),
|
||||
fields_(comparator),
|
||||
column_info_(&column_info),
|
||||
dir_(dir),
|
||||
initialized_(false) {
|
||||
}
|
||||
|
@ -170,6 +179,7 @@ columnstore_writer::column_output& segment_writer::stream(
|
|||
const hashed_string_ref& name,
|
||||
const doc_id_t doc_id) {
|
||||
REGISTER_TIMER_DETAILED();
|
||||
assert(column_info_);
|
||||
|
||||
auto generator = [](
|
||||
const hashed_string_ref& key,
|
||||
|
@ -181,10 +191,10 @@ columnstore_writer::column_output& segment_writer::stream(
|
|||
// replace original reference to 'name' provided by the caller
|
||||
// with a reference to the cached copy in 'value'
|
||||
return map_utils::try_emplace_update_key(
|
||||
columns_, // container
|
||||
generator, // key generator
|
||||
name, // key
|
||||
name, *col_writer_, nullptr != fields_.comparator() // value // FIXME
|
||||
columns_, // container
|
||||
generator, // key generator
|
||||
name, // key
|
||||
name, *col_writer_, *column_info_, nullptr != fields_.comparator() // value // FIXME
|
||||
).first->second.writer(doc_id);
|
||||
}
|
||||
|
||||
|
@ -206,8 +216,7 @@ void segment_writer::flush_column_meta(const segment_meta& meta) {
|
|||
struct less_t {
|
||||
bool operator()(
|
||||
const stored_column* lhs,
|
||||
const stored_column* rhs
|
||||
) const NOEXCEPT {
|
||||
const stored_column* rhs) const NOEXCEPT {
|
||||
return lhs->name < rhs->name;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -24,11 +24,13 @@
|
|||
#ifndef IRESEARCH_TL_DOC_WRITER_H
|
||||
#define IRESEARCH_TL_DOC_WRITER_H
|
||||
|
||||
#include "column_info.hpp"
|
||||
#include "field_data.hpp"
|
||||
#include "sorted_column.hpp"
|
||||
#include "analysis/token_stream.hpp"
|
||||
#include "formats/formats.hpp"
|
||||
#include "utils/bitvector.hpp"
|
||||
#include "utils/compression.hpp"
|
||||
#include "utils/directory_utils.hpp"
|
||||
#include "utils/noncopyable.hpp"
|
||||
#include "utils/type_limits.hpp"
|
||||
|
@ -164,7 +166,11 @@ class IRESEARCH_API segment_writer: util::noncopyable {
|
|||
}; // document
|
||||
|
||||
DECLARE_UNIQUE_PTR(segment_writer);
|
||||
DECLARE_FACTORY(directory& dir, const comparer* comparator);
|
||||
DECLARE_FACTORY(
|
||||
directory& dir,
|
||||
const column_info_provider_t& column_info,
|
||||
const comparer* comparator
|
||||
);
|
||||
|
||||
struct update_context {
|
||||
size_t generation;
|
||||
|
@ -238,6 +244,7 @@ class IRESEARCH_API segment_writer: util::noncopyable {
|
|||
stored_column(
|
||||
const string_ref& name,
|
||||
columnstore_writer& columnstore,
|
||||
const column_info_provider_t& column_info,
|
||||
bool cache
|
||||
);
|
||||
|
||||
|
@ -248,13 +255,20 @@ class IRESEARCH_API segment_writer: util::noncopyable {
|
|||
}; // stored_column
|
||||
|
||||
struct sorted_column : util::noncopyable {
|
||||
sorted_column() = default;
|
||||
explicit sorted_column(
|
||||
const column_info_provider_t& column_info) NOEXCEPT
|
||||
: stream(column_info(string_ref::NIL)) { // get compression for sorted column
|
||||
}
|
||||
|
||||
irs::sorted_column stream;
|
||||
field_id id{ field_limits::invalid() };
|
||||
}; // sorted_column
|
||||
|
||||
segment_writer(directory& dir, const comparer* comparator) NOEXCEPT;
|
||||
segment_writer(
|
||||
directory& dir,
|
||||
const column_info_provider_t& column_info,
|
||||
const comparer* comparator
|
||||
) NOEXCEPT;
|
||||
|
||||
bool index(
|
||||
const hashed_string_ref& name,
|
||||
|
@ -399,6 +413,7 @@ class IRESEARCH_API segment_writer: util::noncopyable {
|
|||
std::unordered_set<field_data*> norm_fields_; // document fields for normalization
|
||||
std::string seg_name_;
|
||||
field_writer::ptr field_writer_;
|
||||
const column_info_provider_t* column_info_;
|
||||
column_meta_writer::ptr col_meta_writer_;
|
||||
columnstore_writer::ptr col_writer_;
|
||||
tracking_directory dir_;
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "comparer.hpp"
|
||||
#include "utils/type_limits.hpp"
|
||||
#include "utils/misc.hpp"
|
||||
#include "utils/lz4compression.hpp"
|
||||
|
||||
NS_ROOT
|
||||
|
||||
|
@ -93,7 +94,7 @@ std::pair<doc_map, field_id> sorted_column::flush(
|
|||
}
|
||||
|
||||
// flush sorted data
|
||||
auto column = writer.push_column();
|
||||
auto column = writer.push_column(info_);
|
||||
auto& column_writer = column.second;
|
||||
|
||||
new_doc_id = doc_limits::min();
|
||||
|
@ -186,15 +187,14 @@ void sorted_column::flush_sparse(
|
|||
field_id sorted_column::flush(
|
||||
columnstore_writer& writer,
|
||||
const doc_map& docmap,
|
||||
std::vector<std::pair<doc_id_t, doc_id_t>>& buffer
|
||||
) {
|
||||
std::vector<std::pair<doc_id_t, doc_id_t>>& buffer) {
|
||||
assert(docmap.size() < irs::doc_limits::eof());
|
||||
|
||||
if (index_.empty()) {
|
||||
return field_limits::invalid();
|
||||
}
|
||||
|
||||
auto column = writer.push_column();
|
||||
auto column = writer.push_column(info_);
|
||||
auto& column_writer = column.second;
|
||||
|
||||
// temporarily push sentinel
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#ifndef IRESEARCH_SORTED_COLUMN_H
|
||||
#define IRESEARCH_SORTED_COLUMN_H
|
||||
|
||||
#include "column_info.hpp"
|
||||
#include "formats/formats.hpp"
|
||||
#include "store/store_utils.hpp"
|
||||
|
||||
|
@ -39,7 +40,9 @@ class sorted_column final : public irs::columnstore_writer::column_output {
|
|||
public:
|
||||
typedef std::vector<std::pair<doc_id_t, doc_id_t>> flush_buffer_t;
|
||||
|
||||
sorted_column() = default;
|
||||
explicit sorted_column(const column_info& info)
|
||||
: info_(info) {
|
||||
}
|
||||
|
||||
void prepare(doc_id_t key) {
|
||||
assert(index_.empty() || key >= index_.back().first);
|
||||
|
@ -54,11 +57,11 @@ class sorted_column final : public irs::columnstore_writer::column_output {
|
|||
}
|
||||
|
||||
virtual void write_byte(byte_type b) override {
|
||||
data_buf_.write_byte(b);
|
||||
data_buf_ += b;
|
||||
}
|
||||
|
||||
virtual void write_bytes(const byte_type* b, size_t size) override {
|
||||
data_buf_.write_bytes(b, size);
|
||||
data_buf_.append(b, size);
|
||||
}
|
||||
|
||||
virtual void reset() override {
|
||||
|
@ -66,7 +69,7 @@ class sorted_column final : public irs::columnstore_writer::column_output {
|
|||
return;
|
||||
}
|
||||
|
||||
data_buf_.reset(index_.back().second);
|
||||
data_buf_.resize(index_.back().second);
|
||||
index_.pop_back();
|
||||
}
|
||||
|
||||
|
@ -79,7 +82,7 @@ class sorted_column final : public irs::columnstore_writer::column_output {
|
|||
}
|
||||
|
||||
void clear() NOEXCEPT {
|
||||
data_buf_.reset();
|
||||
data_buf_.clear();
|
||||
index_.clear();
|
||||
}
|
||||
|
||||
|
@ -105,6 +108,10 @@ class sorted_column final : public irs::columnstore_writer::column_output {
|
|||
return data_buf_.capacity() + index_.capacity()*sizeof(decltype(index_)::value_type);
|
||||
}
|
||||
|
||||
const column_info& info() const NOEXCEPT {
|
||||
return info_;
|
||||
}
|
||||
|
||||
private:
|
||||
void write_value(data_output& out, const size_t idx) {
|
||||
assert(idx + 1 < index_.size());
|
||||
|
@ -131,8 +138,9 @@ class sorted_column final : public irs::columnstore_writer::column_output {
|
|||
flush_buffer_t& buffer
|
||||
);
|
||||
|
||||
bytes_output data_buf_; // FIXME use memory_file or block_pool instead
|
||||
bstring data_buf_; // FIXME use memory_file or block_pool instead
|
||||
std::vector<std::pair<irs::doc_id_t, size_t>> index_; // doc_id + offset in 'data_buf_'
|
||||
column_info info_;
|
||||
}; // sorted_column
|
||||
|
||||
NS_END // ROOT
|
||||
|
|
|
@ -159,7 +159,7 @@
|
|||
|| ((_MSC_FULL_VER >= 191326128) && (_MSC_FULL_VER <= 191326132)) \
|
||||
|| ((_MSC_FULL_VER >= 191426430) && (_MSC_FULL_VER <= 191426433)) \
|
||||
|| ((_MSC_FULL_VER >= 191526726) && (_MSC_FULL_VER <= 191526732)) \
|
||||
|| ((_MSC_FULL_VER >= 191627023) && (_MSC_FULL_VER <= 191627031)))
|
||||
|| ((_MSC_FULL_VER >= 191627023) && (_MSC_FULL_VER <= 191627032)))
|
||||
#define MSVC2017_3456789_OPTIMIZED_WORKAROUND(...) __VA_ARGS__
|
||||
#else
|
||||
#define MSVC2017_3456789_OPTIMIZED_WORKAROUND(...)
|
||||
|
@ -336,6 +336,17 @@
|
|||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// likely/unlikely branch indicator
|
||||
// macro definitions similar to the ones at
|
||||
// https://kernelnewbies.org/FAQ/LikelyUnlikely
|
||||
#if defined(__GNUC__) || defined(__GNUG__)
|
||||
#define IRS_LIKELY(v) __builtin_expect(!!(v), 1)
|
||||
#define IRS_UNLIKELY(v) __builtin_expect(!!(v), 0)
|
||||
#else
|
||||
#define IRS_LIKELY(v) v
|
||||
#define IRS_UNLIKELY(v) v
|
||||
#endif
|
||||
|
||||
#ifdef IRESEARCH_DEBUG
|
||||
#define IRS_ASSERT(CHECK) \
|
||||
( (CHECK) ? void(0) : []{assert(!#CHECK);}() )
|
||||
|
|
|
@ -299,28 +299,6 @@ uint32_t write_block(
|
|||
NS_END // bitpack
|
||||
NS_END // encode
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// --SECTION-- I/O streams
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/* bytes_output */
|
||||
|
||||
bytes_output::bytes_output(size_t capacity) {
|
||||
buf_.reserve(capacity);
|
||||
}
|
||||
|
||||
bytes_output::bytes_output(bytes_output&& other) NOEXCEPT
|
||||
: buf_(std::move(other.buf_)) {
|
||||
}
|
||||
|
||||
bytes_output& bytes_output::operator=(bytes_output&& other) NOEXCEPT {
|
||||
if (this != &other) {
|
||||
buf_ = std::move(other.buf_);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// --SECTION-- bytes_ref_input implementation
|
||||
// ----------------------------------------------------------------------------
|
||||
|
|
|
@ -356,44 +356,23 @@ FORCE_INLINE bool shift_unpack_32(uint32_t in, uint32_t& out) NOEXCEPT {
|
|||
//////////////////////////////////////////////////////////////////////////////
|
||||
class IRESEARCH_API bytes_output final : public data_output {
|
||||
public:
|
||||
bytes_output() = default;
|
||||
explicit bytes_output(size_t capacity);
|
||||
bytes_output(bytes_output&& rhs) NOEXCEPT;
|
||||
bytes_output& operator=(bytes_output&& rhs) NOEXCEPT;
|
||||
|
||||
void reset(size_t size = 0) {
|
||||
buf_.resize(size);
|
||||
explicit bytes_output(bstring& buf) NOEXCEPT
|
||||
: buf_(&buf) {
|
||||
}
|
||||
|
||||
virtual void write_byte(byte_type b) override {
|
||||
buf_ += b;
|
||||
(*buf_) += b;
|
||||
}
|
||||
|
||||
virtual void write_bytes(const byte_type* b, size_t size) override {
|
||||
buf_.append(b, size);
|
||||
}
|
||||
|
||||
const byte_type* c_str() const NOEXCEPT {
|
||||
return buf_.c_str();
|
||||
}
|
||||
|
||||
size_t size() const NOEXCEPT {
|
||||
return buf_.size();
|
||||
}
|
||||
|
||||
size_t capacity() const NOEXCEPT {
|
||||
return buf_.capacity();
|
||||
}
|
||||
|
||||
operator bytes_ref() const NOEXCEPT {
|
||||
return buf_;
|
||||
buf_->append(b, size);
|
||||
}
|
||||
|
||||
virtual void close() override { }
|
||||
|
||||
private:
|
||||
IRESEARCH_API_PRIVATE_VARIABLES_BEGIN
|
||||
bstring buf_;
|
||||
bstring* buf_;
|
||||
IRESEARCH_API_PRIVATE_VARIABLES_END
|
||||
}; // bytes_output
|
||||
|
||||
|
@ -641,13 +620,11 @@ inline void decode(Iterator begin, Iterator end) {
|
|||
const auto second = begin+1;
|
||||
|
||||
std::transform(second, end, begin, second, std::plus<value_type>());
|
||||
|
||||
assert(std::is_sorted(begin, end));
|
||||
}
|
||||
|
||||
template<typename Iterator>
|
||||
inline void encode(Iterator begin, Iterator end) {
|
||||
assert(std::distance(begin, end) > 0 && std::is_sorted(begin, end));
|
||||
assert(std::distance(begin, end) > 0);
|
||||
|
||||
typedef typename std::iterator_traits<Iterator>::value_type value_type;
|
||||
const auto rend = irstd::make_reverse_iterator(begin);
|
||||
|
|
|
@ -29,14 +29,14 @@
|
|||
|
||||
NS_LOCAL
|
||||
|
||||
static std::thread::id INVALID;
|
||||
const auto RW_MUTEX_WAIT_TIMEOUT = std::chrono::milliseconds(100);
|
||||
|
||||
NS_END
|
||||
|
||||
NS_ROOT
|
||||
NS_BEGIN(async_utils)
|
||||
|
||||
busywait_mutex::busywait_mutex(): owner_(INVALID) {}
|
||||
busywait_mutex::busywait_mutex(): owner_(std::thread::id()) {}
|
||||
|
||||
busywait_mutex::~busywait_mutex() {
|
||||
assert(try_lock()); // ensure destroying an unlocked mutex
|
||||
|
@ -45,18 +45,17 @@ busywait_mutex::~busywait_mutex() {
|
|||
void busywait_mutex::lock() {
|
||||
auto this_thread_id = std::this_thread::get_id();
|
||||
|
||||
for (auto expected = INVALID;
|
||||
for (auto expected = std::thread::id();
|
||||
!owner_.compare_exchange_strong(expected, this_thread_id);
|
||||
expected = INVALID
|
||||
) {
|
||||
assert(this_thread_id != expected); // recursive lock aquisition attempted
|
||||
expected = std::thread::id()) {
|
||||
assert(this_thread_id != expected); // recursive lock acquisition attempted
|
||||
std::this_thread::yield();
|
||||
}
|
||||
}
|
||||
|
||||
bool busywait_mutex::try_lock() {
|
||||
auto this_thread_id = std::this_thread::get_id();
|
||||
auto expected = INVALID;
|
||||
auto expected = std::thread::id();
|
||||
|
||||
return owner_.compare_exchange_strong(expected, this_thread_id);
|
||||
}
|
||||
|
@ -64,9 +63,9 @@ bool busywait_mutex::try_lock() {
|
|||
void busywait_mutex::unlock() {
|
||||
auto expected = std::this_thread::get_id();
|
||||
|
||||
if (!owner_.compare_exchange_strong(expected, INVALID)) {
|
||||
if (!owner_.compare_exchange_strong(expected, std::thread::id())) {
|
||||
// try again since std::thread::id is garanteed to be '==' but may not be bit equal
|
||||
if (expected == std::this_thread::get_id() && owner_.compare_exchange_strong(expected, INVALID)) {
|
||||
if (expected == std::this_thread::get_id() && owner_.compare_exchange_strong(expected, std::thread::id())) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -77,6 +76,7 @@ void busywait_mutex::unlock() {
|
|||
read_write_mutex::read_write_mutex() NOEXCEPT
|
||||
: concurrent_count_(0),
|
||||
exclusive_count_(0),
|
||||
exclusive_owner_(std::thread::id()),
|
||||
exclusive_owner_recursion_count_(0) {
|
||||
}
|
||||
|
||||
|
@ -100,8 +100,7 @@ void read_write_mutex::lock_read() {
|
|||
|
||||
// yield if there is already a writer waiting
|
||||
// wait for notification (possibly with writers waiting) or no more writers waiting
|
||||
while (exclusive_count_ && std::cv_status::timeout == reader_cond_.wait_for(lock, std::chrono::milliseconds(100))) {
|
||||
}
|
||||
while (exclusive_count_ && std::cv_status::timeout == reader_cond_.wait_for(lock, RW_MUTEX_WAIT_TIMEOUT)) {}
|
||||
|
||||
++concurrent_count_;
|
||||
}
|
||||
|
@ -120,7 +119,7 @@ void read_write_mutex::lock_write() {
|
|||
// wait until lock is held exclusively by the current thread
|
||||
while (concurrent_count_) {
|
||||
try {
|
||||
writer_cond_.wait_for(lock, std::chrono::milliseconds(100));
|
||||
writer_cond_.wait_for(lock, RW_MUTEX_WAIT_TIMEOUT);
|
||||
} catch (...) {
|
||||
// 'wait_for' may throw according to specification
|
||||
}
|
||||
|
@ -218,11 +217,11 @@ void read_write_mutex::unlock(bool exclusive_only /*= false*/) {
|
|||
--concurrent_count_;
|
||||
#endif // IRESEARCH_DEBUG
|
||||
|
||||
// TODO: this should be changed to SCOPED_LOCK_NAMED, as right now it is not
|
||||
// FIXME: this should be changed to SCOPED_LOCK_NAMED, as right now it is not
|
||||
// guaranteed that we can succesfully acquire the mutex here. and if we don't,
|
||||
// there is no guarantee that the notify_all will wake up queued waiter.
|
||||
|
||||
TRY_SCOPED_LOCK_NAMED(mutex_, lock); // try to aquire mutex for use with cond
|
||||
|
||||
TRY_SCOPED_LOCK_NAMED(mutex_, lock); // try to acquire mutex for use with cond
|
||||
|
||||
// wake only writers since this is a reader
|
||||
// wake even without lock since writer may be waiting in lock_write() on cond
|
||||
|
|
|
@ -39,7 +39,7 @@ NS_BEGIN(util)
|
|||
//////////////////////////////////////////////////////////////////////////////
|
||||
class IRESEARCH_API const_attribute_store_provider {
|
||||
public:
|
||||
virtual ~const_attribute_store_provider() {}
|
||||
virtual ~const_attribute_store_provider() = default;
|
||||
virtual const irs::attribute_store& attributes() const NOEXCEPT = 0;
|
||||
};
|
||||
|
||||
|
@ -49,7 +49,6 @@ class IRESEARCH_API const_attribute_store_provider {
|
|||
//////////////////////////////////////////////////////////////////////////////
|
||||
class IRESEARCH_API attribute_store_provider: public const_attribute_store_provider {
|
||||
public:
|
||||
virtual ~attribute_store_provider() {}
|
||||
virtual irs::attribute_store& attributes() NOEXCEPT = 0;
|
||||
virtual const irs::attribute_store& attributes() const NOEXCEPT override final {
|
||||
return const_cast<attribute_store_provider*>(this)->attributes();
|
||||
|
@ -62,7 +61,7 @@ class IRESEARCH_API attribute_store_provider: public const_attribute_store_provi
|
|||
//////////////////////////////////////////////////////////////////////////////
|
||||
class IRESEARCH_API const_attribute_view_provider {
|
||||
public:
|
||||
virtual ~const_attribute_view_provider() {}
|
||||
virtual ~const_attribute_view_provider() = default;
|
||||
virtual const irs::attribute_view& attributes() const NOEXCEPT = 0;
|
||||
};
|
||||
|
||||
|
@ -72,7 +71,6 @@ class IRESEARCH_API const_attribute_view_provider {
|
|||
//////////////////////////////////////////////////////////////////////////////
|
||||
class IRESEARCH_API attribute_view_provider: public const_attribute_view_provider {
|
||||
public:
|
||||
virtual ~attribute_view_provider() {}
|
||||
virtual irs::attribute_view& attributes() NOEXCEPT = 0;
|
||||
virtual const irs::attribute_view& attributes() const NOEXCEPT override final {
|
||||
return const_cast<attribute_view_provider*>(this)->attributes();
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
NS_ROOT
|
||||
|
||||
template<typename Alloc>
|
||||
class dynamic_bitset_base : irs::compact<0, Alloc>, util::noncopyable {
|
||||
class dynamic_bitset_base : irs::compact<0, Alloc> {
|
||||
public:
|
||||
typedef size_t word_t;
|
||||
typedef typename std::allocator_traits<Alloc>::template rebind_alloc<word_t> allocator_type;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2016 by EMC Corporation, All Rights Reserved
|
||||
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
|
@ -15,104 +15,193 @@
|
|||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is EMC Corporation
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Andrey Abramov
|
||||
/// @author Vasiliy Nabatchikov
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "shared.hpp"
|
||||
#include "error/error.hpp"
|
||||
#include "utils/register.hpp"
|
||||
|
||||
#include "compression.hpp"
|
||||
#include "utils/string_utils.hpp"
|
||||
#include "utils/type_limits.hpp"
|
||||
|
||||
#include <lz4.h>
|
||||
// list of statically loaded scorers via init()
|
||||
#ifndef IRESEARCH_DLL
|
||||
#include "lz4compression.hpp"
|
||||
#include "delta_compression.hpp"
|
||||
#endif
|
||||
|
||||
NS_ROOT
|
||||
NS_LOCAL
|
||||
|
||||
void compressor::deleter::operator()(void *p) NOEXCEPT {
|
||||
LZ4_freeStream(reinterpret_cast<LZ4_stream_t*>(p));
|
||||
}
|
||||
|
||||
compressor::compressor(unsigned int chunk_size):
|
||||
dict_size_(0),
|
||||
stream_(LZ4_createStream()) {
|
||||
string_utils::oversize(buf_, LZ4_COMPRESSBOUND(chunk_size));
|
||||
}
|
||||
|
||||
void compressor::compress(const char* src, size_t size) {
|
||||
assert(size <= std::numeric_limits<int>::max()); // LZ4 API uses int
|
||||
auto src_size = static_cast<int>(size);
|
||||
auto* stream = reinterpret_cast<LZ4_stream_t*>(stream_.get());
|
||||
|
||||
// ensure LZ4 dictionary from the previous run is at the start of buf_
|
||||
{
|
||||
auto* dict_store = dict_size_ ? &(buf_[0]) : nullptr;
|
||||
|
||||
// move the LZ4 dictionary from the previous run to the start of buf_
|
||||
if (dict_store) {
|
||||
dict_size_ = LZ4_saveDict(stream, dict_store, dict_size_);
|
||||
assert(dict_size_ >= 0);
|
||||
}
|
||||
|
||||
string_utils::oversize(buf_, LZ4_compressBound(src_size) + dict_size_);
|
||||
|
||||
// reload the LZ4 dictionary if buf_ has changed
|
||||
if (&(buf_[0]) != dict_store) {
|
||||
dict_size_ = LZ4_loadDict(stream, &(buf_[0]), dict_size_);
|
||||
assert(dict_size_ >= 0);
|
||||
}
|
||||
struct value{
|
||||
explicit value(
|
||||
irs::compression::compressor_factory_f compressor_factory = nullptr,
|
||||
irs::compression::decompressor_factory_f decompressor_factory = nullptr)
|
||||
: compressor_factory_(compressor_factory),
|
||||
decompressor_factory_(decompressor_factory) {
|
||||
}
|
||||
|
||||
auto* buf = &(buf_[dict_size_]);
|
||||
auto buf_size = static_cast<int>(std::min(
|
||||
buf_.size() - dict_size_,
|
||||
static_cast<size_t>(std::numeric_limits<int>::max())) // LZ4 API uses int
|
||||
);
|
||||
|
||||
#if defined(LZ4_VERSION_NUMBER) && (LZ4_VERSION_NUMBER >= 10700)
|
||||
auto lz4_size = LZ4_compress_fast_continue(stream, src, buf, src_size, buf_size, 0); // 0 == use default acceleration
|
||||
#else
|
||||
auto lz4_size = LZ4_compress_limitedOutput_continue(stream, src, buf, src_size, buf_size); // use for LZ4 <= v1.6.0
|
||||
#endif
|
||||
|
||||
if (lz4_size < 0) {
|
||||
this->size_ = 0;
|
||||
|
||||
throw index_error("while compressing, error: LZ4 returned negative size");
|
||||
bool empty() const NOEXCEPT {
|
||||
return !compressor_factory_ || !decompressor_factory_;
|
||||
}
|
||||
|
||||
this->data_ = reinterpret_cast<const byte_type*>(buf);
|
||||
this->size_ = lz4_size;
|
||||
}
|
||||
bool operator==(const value& other) const NOEXCEPT {
|
||||
return compressor_factory_ == other.compressor_factory_ &&
|
||||
decompressor_factory_ == other.decompressor_factory_;
|
||||
}
|
||||
|
||||
void decompressor::deleter::operator()(void *p) NOEXCEPT {
|
||||
LZ4_freeStreamDecode(reinterpret_cast<LZ4_streamDecode_t*>(p));
|
||||
}
|
||||
bool operator!=(const value& other) const NOEXCEPT {
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
decompressor::decompressor()
|
||||
: stream_(LZ4_createStreamDecode()) {
|
||||
}
|
||||
|
||||
size_t decompressor::deflate(
|
||||
const char* src, size_t src_size,
|
||||
char* dst, size_t dst_size) const {
|
||||
assert(src_size <= integer_traits<int>::const_max); // LZ4 API uses int
|
||||
|
||||
auto& stream = *reinterpret_cast<LZ4_streamDecode_t*>(stream_.get());
|
||||
const irs::compression::compressor_factory_f compressor_factory_;
|
||||
const irs::compression::decompressor_factory_f decompressor_factory_;
|
||||
};
|
||||
|
||||
const auto lz4_size = LZ4_decompress_safe_continue(
|
||||
&stream,
|
||||
src,
|
||||
dst,
|
||||
static_cast<int>(src_size), // LZ4 API uses int
|
||||
static_cast<int>(std::min(dst_size, static_cast<size_t>(integer_traits<int>::const_max))) // LZ4 API uses int
|
||||
);
|
||||
const std::string FILENAME_PREFIX("libcompression-");
|
||||
|
||||
return lz4_size < 0
|
||||
? type_limits<type_t::address_t>::invalid() // corrupted index
|
||||
: lz4_size;
|
||||
}
|
||||
class compression_register
|
||||
: public irs::tagged_generic_register<irs::string_ref, value,
|
||||
irs::string_ref, compression_register> {
|
||||
protected:
|
||||
virtual std::string key_to_filename(const key_type& key) const override {
|
||||
std::string filename(FILENAME_PREFIX.size() + key.size(), 0);
|
||||
|
||||
std::memcpy(
|
||||
&filename[0],
|
||||
FILENAME_PREFIX.c_str(),
|
||||
FILENAME_PREFIX.size()
|
||||
);
|
||||
|
||||
irs::string_ref::traits_type::copy(
|
||||
&filename[0] + FILENAME_PREFIX.size(),
|
||||
key.c_str(),
|
||||
key.size()
|
||||
);
|
||||
|
||||
return filename;
|
||||
}
|
||||
};
|
||||
|
||||
NS_END
|
||||
|
||||
NS_ROOT
|
||||
NS_BEGIN(compression)
|
||||
|
||||
compression_registrar::compression_registrar(
|
||||
const type_id& type,
|
||||
compressor_factory_f compressor_factory,
|
||||
decompressor_factory_f decompressor_factory,
|
||||
const char* source /*= nullptr*/) {
|
||||
string_ref const source_ref(source);
|
||||
const auto new_entry = ::value(compressor_factory, decompressor_factory);
|
||||
|
||||
auto entry = compression_register::instance().set(
|
||||
type.name(),
|
||||
new_entry,
|
||||
source_ref.null() ? nullptr : &source_ref);
|
||||
|
||||
registered_ = entry.second;
|
||||
|
||||
if (!registered_ && new_entry != entry.first) {
|
||||
auto* registered_source = compression_register::instance().tag(type.name());
|
||||
|
||||
if (source && registered_source) {
|
||||
IR_FRMT_WARN(
|
||||
"type name collision detected while registering compression, ignoring: type '%s' from %s, previously from %s",
|
||||
type.name().c_str(),
|
||||
source,
|
||||
registered_source->c_str()
|
||||
);
|
||||
} else if (source) {
|
||||
IR_FRMT_WARN(
|
||||
"type name collision detected while registering compression, ignoring: type '%s' from %s",
|
||||
type.name().c_str(),
|
||||
source
|
||||
);
|
||||
} else if (registered_source) {
|
||||
IR_FRMT_WARN(
|
||||
"type name collision detected while registering compression, ignoring: type '%s', previously from %s",
|
||||
type.name().c_str(),
|
||||
registered_source->c_str()
|
||||
);
|
||||
} else {
|
||||
IR_FRMT_WARN(
|
||||
"type name collision detected while registering compression, ignoring: type '%s'",
|
||||
type.name().c_str()
|
||||
);
|
||||
}
|
||||
|
||||
IR_LOG_STACK_TRACE();
|
||||
}
|
||||
}
|
||||
|
||||
bool exists(const string_ref& name, bool load_library /*= true*/ ) {
|
||||
return !compression_register::instance().get(name, load_library).empty();
|
||||
}
|
||||
|
||||
compressor::ptr get_compressor(
|
||||
const string_ref& name,
|
||||
const options& opts,
|
||||
bool load_library /*= true*/) NOEXCEPT {
|
||||
try {
|
||||
auto* factory = compression_register::instance().get(name, load_library).compressor_factory_;
|
||||
|
||||
return factory ? factory(opts) : nullptr;
|
||||
} catch (...) {
|
||||
IR_FRMT_ERROR("Caught exception while getting an analyzer instance");
|
||||
IR_LOG_EXCEPTION();
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
decompressor::ptr get_decompressor(const string_ref& name, bool load_library /*= true*/) NOEXCEPT {
|
||||
try {
|
||||
auto* factory = compression_register::instance().get(name, load_library).decompressor_factory_;
|
||||
|
||||
return factory ? factory() : nullptr;
|
||||
} catch (...) {
|
||||
IR_FRMT_ERROR("Caught exception while getting an analyzer instance");
|
||||
IR_LOG_EXCEPTION();
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void init() {
|
||||
#ifndef IRESEARCH_DLL
|
||||
lz4::init();
|
||||
delta::init();
|
||||
raw::init();
|
||||
#endif
|
||||
}
|
||||
|
||||
void load_all(const std::string& path) {
|
||||
load_libraries(path, FILENAME_PREFIX, "");
|
||||
}
|
||||
|
||||
bool visit(const std::function<bool(const string_ref&)>& visitor) {
|
||||
compression_register::visitor_t wrapper = [&visitor](const string_ref& key)->bool {
|
||||
return visitor(key);
|
||||
};
|
||||
|
||||
return compression_register::instance().visit(wrapper);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- raw implementation
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
/*static*/ void raw::init() {
|
||||
#ifndef IRESEARCH_DLL
|
||||
// match registration below
|
||||
REGISTER_COMPRESSION(raw, &raw::compressor, &raw::decompressor);
|
||||
#endif
|
||||
}
|
||||
|
||||
DEFINE_COMPRESSION_TYPE(iresearch::compression::raw);
|
||||
|
||||
REGISTER_COMPRESSION(raw, &raw::compressor, &raw::decompressor);
|
||||
|
||||
NS_END // compression
|
||||
NS_END
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2016 by EMC Corporation, All Rights Reserved
|
||||
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
|
@ -15,67 +15,216 @@
|
|||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is EMC Corporation
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Andrey Abramov
|
||||
/// @author Vasiliy Nabatchikov
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef IRESEARCH_COMPRESSION_H
|
||||
#define IRESEARCH_COMPRESSION_H
|
||||
|
||||
#include "string.hpp"
|
||||
#include "type_id.hpp"
|
||||
#include "memory.hpp"
|
||||
#include "noncopyable.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <map>
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- compression definition
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
#define DECLARE_COMPRESSION_TYPE() DECLARE_TYPE_ID(iresearch::compression::type_id)
|
||||
#define DEFINE_COMPRESSION_TYPE_NAMED(class_type, class_name) \
|
||||
DEFINE_TYPE_ID(class_type, iresearch::compression::type_id) { \
|
||||
static iresearch::compression::type_id type(class_name); \
|
||||
return type; \
|
||||
}
|
||||
#define DEFINE_COMPRESSION_TYPE(class_type) DEFINE_COMPRESSION_TYPE_NAMED(class_type, #class_type)
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- compression registration
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
#define REGISTER_COMPRESSION__(compression_name, compressor_factory, decompressor_factory, line, source) \
|
||||
static iresearch::compression::compression_registrar compression_registrar ## _ ## line(compression_name::type(), compressor_factory, decompressor_factory, source)
|
||||
#define REGISTER_COMPRESSION_EXPANDER__(compression_name, compressor_factory, decompressor_factory, file, line) \
|
||||
REGISTER_COMPRESSION__(compression_name, compressor_factory, decompressor_factory, line, file ":" TOSTRING(line))
|
||||
#define REGISTER_COMPRESSION(compression_name, compressor_factory, decompressor_factory) \
|
||||
REGISTER_COMPRESSION_EXPANDER__(compression_name, compressor_factory, decompressor_factory, __FILE__, __LINE__)
|
||||
|
||||
NS_ROOT
|
||||
|
||||
class IRESEARCH_API compressor: public bytes_ref, private util::noncopyable {
|
||||
struct data_output;
|
||||
struct data_input;
|
||||
|
||||
NS_BEGIN(compression)
|
||||
|
||||
struct options {
|
||||
enum class Hint : byte_type {
|
||||
/// @brief use default compressor parameters
|
||||
DEFAULT = 0,
|
||||
|
||||
/// @brief prefer speed over compression ratio
|
||||
SPEED,
|
||||
|
||||
/// @brief prefer compression ratio over speed
|
||||
COMPRESSION
|
||||
};
|
||||
|
||||
/// @brief
|
||||
Hint hint{ Hint::DEFAULT };
|
||||
|
||||
options(Hint hint = Hint::DEFAULT)
|
||||
: hint(hint) {
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @class compressor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
struct IRESEARCH_API compressor {
|
||||
DECLARE_SHARED_PTR(compressor);
|
||||
|
||||
virtual ~compressor() = default;
|
||||
|
||||
/// @note caller is allowed to modify data pointed by 'in' up to 'size'
|
||||
virtual bytes_ref compress(byte_type* in, size_t size, bstring& buf) = 0;
|
||||
|
||||
/// @brief flush arbitrary payload relevant to compression
|
||||
virtual void flush(data_output& /*out*/) { /*NOOP*/ }
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @class compressor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
struct IRESEARCH_API decompressor {
|
||||
DECLARE_SHARED_PTR(decompressor);
|
||||
|
||||
virtual ~decompressor() = default;
|
||||
|
||||
/// @note caller is allowed to modify data pointed by 'src' up to 'src_size'
|
||||
/// @note caller is allowed to modify data pointed by 'dst' up to 'dst_size'
|
||||
virtual bytes_ref decompress(byte_type* src, size_t src_size,
|
||||
byte_type* dst, size_t dst_size) = 0;
|
||||
|
||||
virtual bool prepare(data_input& /*in*/) {
|
||||
// NOOP
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @class type_id
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class IRESEARCH_API type_id : public irs::type_id, private util::noncopyable {
|
||||
public:
|
||||
explicit compressor(unsigned int chunk_size);
|
||||
type_id(const string_ref& name) NOEXCEPT
|
||||
: name_(name) {
|
||||
}
|
||||
operator const type_id*() const NOEXCEPT { return this; }
|
||||
const string_ref& name() const NOEXCEPT { return name_; }
|
||||
|
||||
void compress(const char* src, size_t size);
|
||||
private:
|
||||
string_ref name_;
|
||||
};
|
||||
|
||||
inline void compress(const bytes_ref& src) {
|
||||
compress(ref_cast<char>(src).c_str(), src.size());
|
||||
typedef irs::compression::compressor::ptr(*compressor_factory_f)(const options&);
|
||||
typedef irs::compression::decompressor::ptr(*decompressor_factory_f)();
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- compression registration
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
class IRESEARCH_API compression_registrar {
|
||||
public:
|
||||
compression_registrar(const compression::type_id& type,
|
||||
compressor_factory_f compressor_factory,
|
||||
decompressor_factory_f decompressor_factory,
|
||||
const char* source = nullptr);
|
||||
|
||||
operator bool() const NOEXCEPT {
|
||||
return registered_;
|
||||
}
|
||||
|
||||
private:
|
||||
struct IRESEARCH_API deleter {
|
||||
void operator()(void* p) NOEXCEPT;
|
||||
};
|
||||
bool registered_;
|
||||
};
|
||||
|
||||
IRESEARCH_API_PRIVATE_VARIABLES_BEGIN
|
||||
std::string buf_;
|
||||
int dict_size_; // the size of the LZ4 dictionary from the previous call
|
||||
std::unique_ptr<void, deleter> stream_; // hide internal LZ4 implementation
|
||||
IRESEARCH_API_PRIVATE_VARIABLES_END
|
||||
}; // compressor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief checks whether an comopression with the specified name is registered
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
IRESEARCH_API bool exists(const string_ref& name, bool load_library = true);
|
||||
|
||||
class IRESEARCH_API decompressor {
|
||||
public:
|
||||
decompressor();
|
||||
decompressor(const decompressor&) = default;
|
||||
decompressor& operator=(const decompressor&) = default;
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief creates a compressor by name, or nullptr if not found
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
IRESEARCH_API compressor::ptr get_compressor(
|
||||
const string_ref& name,
|
||||
const options& opts,
|
||||
bool load_library = true) NOEXCEPT;
|
||||
|
||||
// returns number of decompressed bytes,
|
||||
// or integer_traits<size_t>::const_max in case of error
|
||||
size_t deflate(
|
||||
const char* src, size_t src_size,
|
||||
char* dst, size_t dst_size
|
||||
) const;
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief creates a compressor by name, or nullptr if not found
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline compressor::ptr get_compressor(
|
||||
const type_id& type,
|
||||
const options& opts,
|
||||
bool load_library = true) NOEXCEPT {
|
||||
return get_compressor(type.name(), opts, load_library);
|
||||
}
|
||||
|
||||
private:
|
||||
struct IRESEARCH_API deleter {
|
||||
void operator()(void* p) NOEXCEPT;
|
||||
};
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief creates a decompressor by name, or nullptr if not found
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
IRESEARCH_API decompressor::ptr get_decompressor(
|
||||
const string_ref& name,
|
||||
bool load_library = true) NOEXCEPT;
|
||||
|
||||
IRESEARCH_API_PRIVATE_VARIABLES_BEGIN
|
||||
std::unique_ptr<void, deleter> stream_; // hide internal LZ4 implementation
|
||||
IRESEARCH_API_PRIVATE_VARIABLES_END
|
||||
}; // decompressor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief creates a decompressor by name, or nullptr if not found
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
inline decompressor::ptr get_decompressor(
|
||||
const type_id& type,
|
||||
bool load_library = true) NOEXCEPT {
|
||||
return get_decompressor(type.name(), load_library);
|
||||
}
|
||||
|
||||
NS_END // NS_ROOT
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief for static lib reference all known compressions in lib
|
||||
/// for shared lib NOOP
|
||||
/// no explicit call of fn is required, existence of fn is sufficient
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
IRESEARCH_API void init();
|
||||
|
||||
#endif
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief load all compressions from plugins directory
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
IRESEARCH_API void load_all(const std::string& path);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief visit all loaded compressions, terminate early if visitor returns false
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
IRESEARCH_API bool visit(const std::function<bool(const string_ref&)>& visitor);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @class raw
|
||||
/// @brief no compression
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
struct IRESEARCH_API raw {
|
||||
DECLARE_COMPRESSION_TYPE();
|
||||
|
||||
static void init();
|
||||
|
||||
static compression::compressor::ptr compressor(const options& /*opts*/) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static compression::decompressor::ptr decompressor() {
|
||||
return nullptr;
|
||||
}
|
||||
}; // raw
|
||||
|
||||
NS_END // compression
|
||||
NS_END
|
||||
|
||||
#endif // IRESEARCH_COMPRESSION_H
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Andrey Abramov
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "shared.hpp"
|
||||
#include "delta_compression.hpp"
|
||||
#include "store/store_utils.hpp"
|
||||
|
||||
NS_LOCAL
|
||||
|
||||
irs::compression::delta_compressor COMPRESSOR;
|
||||
irs::compression::delta_decompressor DECOMPRESSOR;
|
||||
|
||||
NS_END
|
||||
|
||||
NS_ROOT
|
||||
NS_BEGIN(compression)
|
||||
|
||||
bytes_ref delta_compressor::compress(byte_type* src, size_t size, bstring& buf) {
|
||||
auto* begin = reinterpret_cast<uint64_t*>(src);
|
||||
auto* end = reinterpret_cast<uint64_t*>(src + size);
|
||||
encode::delta::encode(begin, end);
|
||||
|
||||
// ensure we have enough space in the worst case
|
||||
assert(end >= begin);
|
||||
buf.resize(size_t(std::distance(begin, end))*bytes_io<uint64_t>::const_max_vsize);
|
||||
|
||||
auto* out = const_cast<byte_type*>(buf.data());
|
||||
for (;begin != end; ++begin) {
|
||||
vwrite(out, zig_zag_encode64(int64_t(*begin)));
|
||||
}
|
||||
|
||||
assert(out >= buf.data());
|
||||
return { buf.c_str(), size_t(out - buf.data()) };
|
||||
}
|
||||
|
||||
bytes_ref delta_decompressor::decompress(
|
||||
byte_type* src, size_t src_size,
|
||||
byte_type* dst, size_t dst_size) {
|
||||
|
||||
auto* dst_end = reinterpret_cast<uint64_t*>(dst);
|
||||
|
||||
for (const auto* src_end = src + src_size; src != src_end; ++dst_end) {
|
||||
*dst_end = uint64_t(zig_zag_decode64(vread<uint64_t>(src)));
|
||||
}
|
||||
|
||||
encode::delta::decode(reinterpret_cast<uint64_t*>(dst), dst_end);
|
||||
|
||||
return bytes_ref(dst, dst_size);
|
||||
}
|
||||
|
||||
compressor::ptr delta::compressor(const options& /*opts*/) {
|
||||
return compressor::ptr(compressor::ptr(), &COMPRESSOR);
|
||||
}
|
||||
|
||||
decompressor::ptr delta::decompressor() {
|
||||
return decompressor::ptr(decompressor::ptr(), &DECOMPRESSOR);
|
||||
}
|
||||
|
||||
void delta::init() {
|
||||
// match registration below
|
||||
REGISTER_COMPRESSION(delta, &delta::compressor, &delta::decompressor);
|
||||
}
|
||||
|
||||
DEFINE_COMPRESSION_TYPE(iresearch::compression::delta);
|
||||
|
||||
REGISTER_COMPRESSION(delta, &delta::compressor, &delta::decompressor);
|
||||
|
||||
NS_END // compression
|
||||
NS_END
|
|
@ -0,0 +1,57 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Andrey Abramov
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef IRESEARCH_DELTA_COMPRESSION_H
|
||||
#define IRESEARCH_DELTA_COMPRESSION_H
|
||||
|
||||
#include "string.hpp"
|
||||
#include "compression.hpp"
|
||||
#include "noncopyable.hpp"
|
||||
|
||||
NS_ROOT
|
||||
NS_BEGIN(compression)
|
||||
|
||||
class IRESEARCH_API delta_compressor : public compressor, private util::noncopyable {
|
||||
public:
|
||||
virtual bytes_ref compress(byte_type* src, size_t size, bstring& out) override final;
|
||||
}; // delta_compressor
|
||||
|
||||
class IRESEARCH_API delta_decompressor : public decompressor, private util::noncopyable {
|
||||
public:
|
||||
/// @returns bytes_ref::NIL in case of error
|
||||
virtual bytes_ref decompress(byte_type* src, size_t src_size,
|
||||
byte_type* dst, size_t dst_size) override final;
|
||||
}; // delta_decompressor
|
||||
|
||||
struct IRESEARCH_API delta {
|
||||
DECLARE_COMPRESSION_TYPE();
|
||||
|
||||
static void init();
|
||||
static compression::compressor::ptr compressor(const options& opts);
|
||||
static compression::decompressor::ptr decompressor();
|
||||
}; // delta
|
||||
|
||||
NS_END // compression
|
||||
NS_END // NS_ROOT
|
||||
|
||||
#endif
|
||||
|
|
@ -28,9 +28,6 @@
|
|||
|
||||
NS_LOCAL
|
||||
|
||||
// FIXME
|
||||
// - FIX segment_consolidate_clear_commit
|
||||
|
||||
/// @returns percentage of live documents
|
||||
inline double_t fill_factor(const irs::segment_meta& segment) NOEXCEPT {
|
||||
return double(segment.live_docs_count)/segment.docs_count;
|
||||
|
@ -41,6 +38,8 @@ inline size_t size_without_removals(const irs::segment_meta& segment) NOEXCEPT{
|
|||
return size_t(segment.size * fill_factor(segment));
|
||||
}
|
||||
|
||||
NS_BEGIN(tier)
|
||||
|
||||
struct segment_stat {
|
||||
segment_stat(const irs::segment_meta& meta) NOEXCEPT
|
||||
: meta(&meta),
|
||||
|
@ -64,6 +63,10 @@ struct segment_stat {
|
|||
return lhs.size < rhs.size;
|
||||
}
|
||||
|
||||
operator const irs::segment_meta*() const NOEXCEPT {
|
||||
return meta;
|
||||
}
|
||||
|
||||
const irs::segment_meta* meta;
|
||||
size_t size; // approximate size of segment without removals
|
||||
double_t fill_factor;
|
||||
|
@ -104,32 +107,6 @@ struct consolidation_candidate {
|
|||
double_t score{ DBL_MIN }; // how good this permutation is
|
||||
};
|
||||
|
||||
struct consolidation {
|
||||
explicit consolidation(
|
||||
const consolidation_candidate& candidate
|
||||
) : size(candidate.size),
|
||||
score(candidate.score) {
|
||||
segments.reserve(candidate.count);
|
||||
for (const auto& segment : candidate) {
|
||||
segments.emplace_back(segment);
|
||||
}
|
||||
}
|
||||
|
||||
bool operator<(const consolidation& rhs) const NOEXCEPT {
|
||||
if (score < rhs.score) {
|
||||
return true;
|
||||
} else if (score > rhs.score) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return segments.size() > segments.size();
|
||||
}
|
||||
|
||||
std::vector<segment_stat> segments;
|
||||
size_t size{ 0 }; // estimated size of the level
|
||||
double_t score{ DBL_MIN }; // how good this permutation is
|
||||
};
|
||||
|
||||
/// @returns score of the consolidation bucket
|
||||
double_t consolidation_score(
|
||||
const consolidation_candidate& consolidation,
|
||||
|
@ -197,14 +174,14 @@ double_t consolidation_score(
|
|||
return score;
|
||||
}
|
||||
|
||||
NS_END // tier
|
||||
NS_END
|
||||
|
||||
NS_ROOT
|
||||
NS_BEGIN(index_utils)
|
||||
|
||||
index_writer::consolidation_policy_t consolidation_policy(
|
||||
const consolidate_bytes& options
|
||||
) {
|
||||
const consolidate_bytes& options) {
|
||||
return [options](
|
||||
std::set<const segment_meta*>& candidates,
|
||||
const index_meta& meta,
|
||||
|
@ -233,8 +210,7 @@ index_writer::consolidation_policy_t consolidation_policy(
|
|||
}
|
||||
|
||||
index_writer::consolidation_policy_t consolidation_policy(
|
||||
const consolidate_bytes_accum& options
|
||||
) {
|
||||
const consolidate_bytes_accum& options) {
|
||||
return [options](
|
||||
std::set<const segment_meta*>& candidates,
|
||||
const index_meta& meta,
|
||||
|
@ -280,8 +256,7 @@ index_writer::consolidation_policy_t consolidation_policy(
|
|||
}
|
||||
|
||||
index_writer::consolidation_policy_t consolidation_policy(
|
||||
const consolidate_count& options
|
||||
) {
|
||||
const consolidate_count& options) {
|
||||
return [options](
|
||||
std::set<const segment_meta*>& candidates,
|
||||
const index_meta& meta,
|
||||
|
@ -297,8 +272,7 @@ index_writer::consolidation_policy_t consolidation_policy(
|
|||
}
|
||||
|
||||
index_writer::consolidation_policy_t consolidation_policy(
|
||||
const consolidate_docs_fill& options
|
||||
) {
|
||||
const consolidate_docs_fill& options) {
|
||||
return [options](
|
||||
std::set<const segment_meta*>& candidates,
|
||||
const index_meta& meta,
|
||||
|
@ -318,8 +292,7 @@ index_writer::consolidation_policy_t consolidation_policy(
|
|||
}
|
||||
|
||||
index_writer::consolidation_policy_t consolidation_policy(
|
||||
const consolidate_docs_live& options
|
||||
) {
|
||||
const consolidate_docs_live& options) {
|
||||
return [options](
|
||||
std::set<const segment_meta*>& candidates,
|
||||
const index_meta& meta,
|
||||
|
@ -347,8 +320,8 @@ index_writer::consolidation_policy_t consolidation_policy(
|
|||
}
|
||||
|
||||
index_writer::consolidation_policy_t consolidation_policy(
|
||||
const consolidate_tier& options
|
||||
) {
|
||||
const consolidate_tier& options) {
|
||||
|
||||
// validate input
|
||||
const auto max_segments_per_tier = (std::max)(size_t(1), options.max_segments); // can't merge less than 1 segment
|
||||
auto min_segments_per_tier = (std::max)(size_t(1), options.min_segments); // can't merge less than 1 segment
|
||||
|
@ -360,8 +333,7 @@ index_writer::consolidation_policy_t consolidation_policy(
|
|||
return [max_segments_per_tier, min_segments_per_tier, floor_segment_bytes, max_segments_bytes, min_score](
|
||||
std::set<const segment_meta*>& candidates,
|
||||
const index_meta& meta,
|
||||
const index_writer::consolidating_segments_t& consolidating_segments
|
||||
)->void {
|
||||
const index_writer::consolidating_segments_t& consolidating_segments) -> void {
|
||||
size_t consolidating_size = 0; // size of segments in bytes that are currently under consolidation
|
||||
size_t min_segment_size = integer_traits<size_t>::const_max; // the smallest segment
|
||||
size_t total_index_size = 0; // total size in bytes of all segments in index
|
||||
|
@ -373,14 +345,19 @@ index_writer::consolidation_policy_t consolidation_policy(
|
|||
/// get sorted list of segments
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
std::set<segment_stat> sorted_segments;
|
||||
std::set<tier::segment_stat> sorted_segments;
|
||||
|
||||
// get sorted segments from index meta
|
||||
auto push_segments = [&sorted_segments](
|
||||
const std::string& /*filename*/,
|
||||
const irs::segment_meta& segment
|
||||
) {
|
||||
sorted_segments.insert(segment);
|
||||
const irs::segment_meta& segment) {
|
||||
if (segment.live_docs_count) {
|
||||
// skip empty segments, they'll be
|
||||
// removed from index by index_writer
|
||||
// during 'commit'
|
||||
sorted_segments.insert(segment);
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@ -408,6 +385,11 @@ index_writer::consolidation_policy_t consolidation_policy(
|
|||
}
|
||||
}
|
||||
|
||||
if (!total_docs_count) {
|
||||
// nothing to consolidate
|
||||
return;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
/// Stage 2
|
||||
/// filter out "too large segments", segment is meant to be treated as large if
|
||||
|
@ -434,16 +416,14 @@ index_writer::consolidation_policy_t consolidation_policy(
|
|||
/// find candidates
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
consolidation_candidate best(sorted_segments.begin());
|
||||
tier::consolidation_candidate best(sorted_segments.begin());
|
||||
|
||||
if (sorted_segments.size() >= min_segments_per_tier) {
|
||||
for (auto i = sorted_segments.begin(), end = sorted_segments.end(); i != end; ++i) {
|
||||
consolidation_candidate candidate(i);
|
||||
tier::consolidation_candidate candidate(i);
|
||||
|
||||
while (
|
||||
candidate.segments.second != end
|
||||
&& candidate.count < max_segments_per_tier
|
||||
) {
|
||||
while (candidate.segments.second != end
|
||||
&& candidate.count < max_segments_per_tier) {
|
||||
candidate.size += candidate.segments.second->size;
|
||||
|
||||
if (candidate.size > max_segments_bytes) {
|
||||
|
@ -459,7 +439,7 @@ index_writer::consolidation_policy_t consolidation_policy(
|
|||
continue;
|
||||
}
|
||||
|
||||
candidate.score = ::consolidation_score(
|
||||
candidate.score = tier::consolidation_score(
|
||||
candidate, max_segments_per_tier, floor_segment_bytes
|
||||
);
|
||||
|
||||
|
@ -480,9 +460,7 @@ index_writer::consolidation_policy_t consolidation_policy(
|
|||
/// pick the best candidate
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
for (auto& candidate : best) {
|
||||
candidates.insert(candidate.meta);
|
||||
}
|
||||
candidates.insert(best.begin(), best.end());
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -1919,7 +1919,7 @@ bool codecvtw_facet::append(
|
|||
// convert 'BUFFER_SIZE' at a time
|
||||
do {
|
||||
auto offset = buf.size();
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
status = U_ZERO_ERROR;
|
||||
|
||||
buf.resize(buf.size() + BUFFER_SIZE);
|
||||
|
||||
|
@ -2897,8 +2897,9 @@ typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, Cvt
|
|||
}
|
||||
}
|
||||
|
||||
icu_buf = ctx->icu_buf1_.length() < ctx->icu_buf1_.length()
|
||||
? &ctx->icu_buf1_ : &ctx->icu_buf0_;
|
||||
icu_buf = ctx->icu_buf1_.length() < ctx->icu_buf0_.length()
|
||||
? &ctx->icu_buf1_
|
||||
: &ctx->icu_buf0_;
|
||||
}
|
||||
|
||||
// ensure all letters are uppercased/lowercased
|
||||
|
@ -3754,4 +3755,4 @@ NS_END
|
|||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- END-OF-FILE
|
||||
// -----------------------------------------------------------------------------
|
||||
// -----------------------------------------------------------------------------
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Andrey Abramov
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "shared.hpp"
|
||||
#include "lz4compression.hpp"
|
||||
#include "error/error.hpp"
|
||||
#include "store/store_utils.hpp"
|
||||
#include "utils/string_utils.hpp"
|
||||
#include "utils/misc.hpp"
|
||||
#include "utils/type_limits.hpp"
|
||||
|
||||
#include <lz4.h>
|
||||
|
||||
NS_LOCAL
|
||||
|
||||
// can reuse stateless instances
|
||||
irs::compression::lz4::lz4compressor LZ4_BASIC_COMPRESSOR;
|
||||
irs::compression::lz4::lz4decompressor LZ4_BASIC_DECOMPRESSOR;
|
||||
|
||||
inline int acceleration(const irs::compression::options::Hint hint) NOEXCEPT {
|
||||
static const int FACTORS[] { 0, 2, 0 };
|
||||
assert(static_cast<size_t>(hint) < IRESEARCH_COUNTOF(FACTORS));
|
||||
|
||||
return FACTORS[static_cast<size_t>(hint)];
|
||||
}
|
||||
|
||||
NS_END
|
||||
|
||||
NS_ROOT
|
||||
|
||||
static_assert(
|
||||
sizeof(char) == sizeof(byte_type),
|
||||
"sizeof(char) != sizeof(byte_type)"
|
||||
);
|
||||
|
||||
NS_BEGIN(compression)
|
||||
|
||||
void LZ4_streamDecode_deleter::operator()(void *p) NOEXCEPT {
|
||||
if (p) {
|
||||
LZ4_freeStreamDecode(reinterpret_cast<LZ4_streamDecode_t*>(p));
|
||||
}
|
||||
}
|
||||
|
||||
void LZ4_stream_deleter::operator()(void *p) NOEXCEPT {
|
||||
if (p) {
|
||||
LZ4_freeStream(reinterpret_cast<LZ4_stream_t*>(p));
|
||||
}
|
||||
}
|
||||
|
||||
lz4stream lz4_make_stream() {
|
||||
return lz4stream(LZ4_createStream());
|
||||
}
|
||||
|
||||
lz4stream_decode lz4_make_stream_decode() {
|
||||
return lz4stream_decode(LZ4_createStreamDecode());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- lz4 compression
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
bytes_ref lz4::lz4compressor::compress(byte_type* src, size_t size, bstring& out) {
|
||||
assert(size <= integer_traits<int>::const_max); // LZ4 API uses int
|
||||
const auto src_size = static_cast<int>(size);
|
||||
|
||||
// ensure we have enough space to store compressed data
|
||||
string_utils::oversize(out, size_t(LZ4_COMPRESSBOUND(src_size)));
|
||||
|
||||
const auto* src_data = reinterpret_cast<const char*>(src);
|
||||
auto* buf = reinterpret_cast<char*>(&out[0]);
|
||||
const auto buf_size = static_cast<int>(out.size());
|
||||
const auto lz4_size = LZ4_compress_fast(src_data, buf, src_size, buf_size, acceleration_);
|
||||
|
||||
if (IRS_UNLIKELY(lz4_size < 0)) {
|
||||
throw index_error("while compressing, error: LZ4 returned negative size");
|
||||
}
|
||||
|
||||
return bytes_ref(reinterpret_cast<const byte_type*>(buf), size_t(lz4_size));
|
||||
}
|
||||
|
||||
bytes_ref lz4::lz4decompressor::decompress(
|
||||
byte_type* src, size_t src_size,
|
||||
byte_type* dst, size_t dst_size) {
|
||||
assert(src_size <= integer_traits<int>::const_max); // LZ4 API uses int
|
||||
|
||||
const auto lz4_size = LZ4_decompress_safe(
|
||||
reinterpret_cast<const char*>(src),
|
||||
reinterpret_cast<char*>(dst),
|
||||
static_cast<int>(src_size), // LZ4 API uses int
|
||||
static_cast<int>(std::min(dst_size, static_cast<size_t>(integer_traits<int>::const_max))) // LZ4 API uses int
|
||||
);
|
||||
|
||||
if (IRS_UNLIKELY(lz4_size < 0)) {
|
||||
return bytes_ref::NIL; // corrupted index
|
||||
}
|
||||
|
||||
return bytes_ref(dst, size_t(lz4_size));
|
||||
}
|
||||
|
||||
compressor::ptr lz4::compressor(const options& opts) {
|
||||
const auto acceleration = ::acceleration(opts.hint);
|
||||
|
||||
if (0 == acceleration) {
|
||||
return compressor::ptr(compressor::ptr(), &LZ4_BASIC_COMPRESSOR);
|
||||
}
|
||||
|
||||
return std::make_shared<lz4compressor>(acceleration);
|
||||
}
|
||||
|
||||
decompressor::ptr lz4::decompressor() {
|
||||
return decompressor::ptr(decompressor::ptr(), &LZ4_BASIC_DECOMPRESSOR);
|
||||
}
|
||||
|
||||
void lz4::init() {
|
||||
// match registration below
|
||||
REGISTER_COMPRESSION(lz4, &lz4::compressor, &lz4::decompressor);
|
||||
}
|
||||
|
||||
DEFINE_COMPRESSION_TYPE(iresearch::compression::lz4);
|
||||
REGISTER_COMPRESSION(lz4, &lz4::compressor, &lz4::decompressor);
|
||||
|
||||
NS_END // compression
|
||||
NS_END
|
|
@ -0,0 +1,80 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Andrey Abramov
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef IRESEARCH_LZ4COMPRESSION_H
|
||||
#define IRESEARCH_LZ4COMPRESSION_H
|
||||
|
||||
#include "string.hpp"
|
||||
#include "compression.hpp"
|
||||
#include "noncopyable.hpp"
|
||||
|
||||
#include <memory>
|
||||
|
||||
NS_ROOT
|
||||
NS_BEGIN(compression)
|
||||
|
||||
struct LZ4_stream_deleter {
|
||||
void operator()(void* p) NOEXCEPT;
|
||||
};
|
||||
|
||||
struct LZ4_streamDecode_deleter {
|
||||
void operator()(void* p) NOEXCEPT;
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<void, LZ4_stream_deleter> lz4stream;
|
||||
typedef std::unique_ptr<void, LZ4_streamDecode_deleter> lz4stream_decode;
|
||||
|
||||
lz4stream lz4_make_stream();
|
||||
lz4stream_decode lz4_make_stream_decode();
|
||||
|
||||
struct IRESEARCH_API lz4 {
|
||||
DECLARE_COMPRESSION_TYPE();
|
||||
|
||||
class IRESEARCH_API lz4compressor final : public compression::compressor {
|
||||
public:
|
||||
explicit lz4compressor(int acceleration = 0) NOEXCEPT
|
||||
: acceleration_(acceleration) {
|
||||
}
|
||||
|
||||
int acceleration() const NOEXCEPT { return acceleration_; }
|
||||
|
||||
virtual bytes_ref compress(byte_type* src, size_t size, bstring& out) override;
|
||||
|
||||
private:
|
||||
const int acceleration_{0}; // 0 - default acceleration
|
||||
};
|
||||
|
||||
class IRESEARCH_API lz4decompressor final : public compression::decompressor {
|
||||
public:
|
||||
virtual bytes_ref decompress(byte_type* src, size_t src_size,
|
||||
byte_type* dst, size_t dst_size) override;
|
||||
};
|
||||
|
||||
static void init();
|
||||
static compression::compressor::ptr compressor(const options& opts);
|
||||
static compression::decompressor::ptr decompressor();
|
||||
}; // lz4basic
|
||||
|
||||
NS_END // compression
|
||||
NS_END // NS_ROOT
|
||||
|
||||
#endif
|
|
@ -38,8 +38,13 @@ NS_ROOT
|
|||
NS_BEGIN(memory)
|
||||
|
||||
inline CONSTEXPR size_t align_up(size_t size, size_t alignment) NOEXCEPT {
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
assert(math::is_power2(alignment));
|
||||
return (size + alignment - 1) & (0 - alignment);
|
||||
#else
|
||||
return IRS_ASSERT(math::is_power2(alignment)),
|
||||
(size + alignment - 1) & (0 - alignment);
|
||||
#endif
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -419,6 +424,7 @@ template<
|
|||
free_ = std::move(rhs.free_);
|
||||
blocks_ = std::move(rhs.blocks_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
~memory_pool() NOEXCEPT {
|
||||
|
|
|
@ -62,7 +62,7 @@ class generic_register: public singleton<RegisterType> {
|
|||
typedef std::unordered_map<key_type, entry_type> register_map_t;
|
||||
typedef std::function<bool(const key_type& key)> visitor_t;
|
||||
|
||||
virtual ~generic_register() { }
|
||||
virtual ~generic_register() = default;
|
||||
|
||||
// @return the entry registered under the key and inf an insertion took place
|
||||
std::pair<entry_type, bool> set(
|
||||
|
@ -181,8 +181,6 @@ class tagged_generic_register: public generic_register<KeyType, EntryType, Regis
|
|||
typedef typename parent_type::entry_type entry_type;
|
||||
typedef TagType tag_type;
|
||||
|
||||
virtual ~tagged_generic_register() { }
|
||||
|
||||
// @return the entry registered under the key and if an insertion took place
|
||||
std::pair<entry_type, bool> set(
|
||||
const key_type& key,
|
||||
|
@ -214,4 +212,4 @@ class tagged_generic_register: public generic_register<KeyType, EntryType, Regis
|
|||
|
||||
NS_END
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -7,7 +7,7 @@ export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:$(pwd)/bin"
|
|||
ulimit -c unlimited
|
||||
|
||||
for i in `seq 1 1`; do
|
||||
for j in 1 ; do
|
||||
for j in 1 5 10 15 20 25; do
|
||||
MAX_LINES=${j}000000
|
||||
|
||||
rm -r iresearch.data || {
|
||||
|
|
|
@ -7,7 +7,7 @@ export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:$(pwd)/build/bin"
|
|||
ulimit -c unlimited
|
||||
|
||||
for i in `seq 1 1`; do
|
||||
for j in 25 ; do
|
||||
for j in 1 5 10 15 20 25; do
|
||||
MAX_LINES=${j}000000
|
||||
|
||||
# search
|
||||
|
|
|
@ -88,6 +88,7 @@ set(IReSearch_tests_sources
|
|||
./utils/async_utils_tests.cpp
|
||||
./utils/bitvector_tests.cpp
|
||||
./utils/container_utils_tests.cpp
|
||||
./utils/compression_test.cpp
|
||||
./utils/crc_test.cpp
|
||||
./utils/file_utils_tests.cpp
|
||||
./utils/map_utils_tests.cpp
|
||||
|
@ -155,6 +156,7 @@ add_executable(${IResearchTests_TARGET_NAME}-static
|
|||
./analysis/text_analyzer_tests.cpp
|
||||
./formats/formats_10_tests.cpp
|
||||
./formats/formats_11_tests.cpp
|
||||
./formats/formats_12_tests.cpp
|
||||
./iql/parser_test.cpp
|
||||
)
|
||||
|
||||
|
|
|
@ -237,7 +237,7 @@ TEST_P(format_11_test_case, open_ecnrypted_with_non_encrypted) {
|
|||
|
||||
ASSERT_TRUE(dir().attributes().contains<tests::rot13_encryption>());
|
||||
|
||||
// write segment with format10
|
||||
// write segment with format11
|
||||
{
|
||||
auto codec = irs::formats::get("1_1");
|
||||
ASSERT_NE(nullptr, codec);
|
||||
|
@ -269,7 +269,7 @@ TEST_P(format_11_test_case, open_non_ecnrypted_with_encrypted) {
|
|||
|
||||
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
|
||||
|
||||
// write segment with format10
|
||||
// write segment with format11
|
||||
{
|
||||
auto codec = irs::formats::get("1_1");
|
||||
ASSERT_NE(nullptr, codec);
|
||||
|
|
|
@ -0,0 +1,514 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Andrey Abramov
|
||||
/// @author Vasiliy Nabatchikov
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "tests_shared.hpp"
|
||||
#include "formats_test_case_base.hpp"
|
||||
#include "store/directory_attributes.hpp"
|
||||
|
||||
NS_LOCAL
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- format 12 specific tests
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
class format_12_test_case : public tests::directory_test_case_base {
|
||||
};
|
||||
|
||||
TEST_P(format_12_test_case, read_zero_block_encryption) {
|
||||
tests::json_doc_generator gen(
|
||||
resource("simple_sequential.json"),
|
||||
&tests::generic_json_field_factory
|
||||
);
|
||||
|
||||
tests::document const* doc1 = gen.next();
|
||||
|
||||
// replace encryption
|
||||
ASSERT_TRUE(dir().attributes().contains<tests::rot13_encryption>());
|
||||
|
||||
// write segment with format10
|
||||
{
|
||||
auto codec = irs::formats::get("1_2");
|
||||
ASSERT_NE(nullptr, codec);
|
||||
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
|
||||
ASSERT_NE(nullptr, writer);
|
||||
|
||||
ASSERT_TRUE(insert(*writer,
|
||||
doc1->indexed.begin(), doc1->indexed.end(),
|
||||
doc1->stored.begin(), doc1->stored.end()
|
||||
));
|
||||
|
||||
writer->commit();
|
||||
}
|
||||
|
||||
// replace encryption
|
||||
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
|
||||
dir().attributes().emplace<tests::rot13_encryption>(6);
|
||||
|
||||
// can't open encrypted index without encryption
|
||||
ASSERT_THROW(irs::directory_reader::open(dir()), irs::index_error);
|
||||
}
|
||||
|
||||
TEST_P(format_12_test_case, write_zero_block_encryption) {
|
||||
tests::json_doc_generator gen(
|
||||
resource("simple_sequential.json"),
|
||||
&tests::generic_json_field_factory
|
||||
);
|
||||
|
||||
tests::document const* doc1 = gen.next();
|
||||
|
||||
// replace encryption
|
||||
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
|
||||
dir().attributes().emplace<tests::rot13_encryption>(0);
|
||||
|
||||
// write segment with format10
|
||||
auto codec = irs::formats::get("1_2");
|
||||
ASSERT_NE(nullptr, codec);
|
||||
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
|
||||
ASSERT_NE(nullptr, writer);
|
||||
|
||||
ASSERT_THROW(insert(*writer,
|
||||
doc1->indexed.begin(), doc1->indexed.end(),
|
||||
doc1->stored.begin(), doc1->stored.end()
|
||||
), irs::index_error);
|
||||
}
|
||||
|
||||
TEST_P(format_12_test_case, fields_read_write_wrong_encryption) {
|
||||
// create sorted && unsorted terms
|
||||
typedef std::set<irs::bytes_ref> sorted_terms_t;
|
||||
typedef std::vector<irs::bytes_ref> unsorted_terms_t;
|
||||
sorted_terms_t sorted_terms;
|
||||
unsorted_terms_t unsorted_terms;
|
||||
|
||||
tests::json_doc_generator gen(
|
||||
resource("fst_prefixes.json"),
|
||||
[&sorted_terms, &unsorted_terms] (tests::document& doc, const std::string& name, const tests::json_doc_generator::json_value& data) {
|
||||
doc.insert(std::make_shared<tests::templates::string_field>(
|
||||
irs::string_ref(name),
|
||||
data.str
|
||||
));
|
||||
|
||||
auto ref = irs::ref_cast<irs::byte_type>((doc.indexed.end() - 1).as<tests::templates::string_field>().value());
|
||||
sorted_terms.emplace(ref);
|
||||
unsorted_terms.emplace_back(ref);
|
||||
});
|
||||
|
||||
// define field
|
||||
irs::field_meta field;
|
||||
field.name = "field";
|
||||
field.norm = 5;
|
||||
|
||||
auto codec = irs::formats::get("1_2");
|
||||
ASSERT_NE(nullptr, codec);
|
||||
ASSERT_TRUE(dir().attributes().contains<tests::rot13_encryption>());
|
||||
|
||||
// write fields
|
||||
{
|
||||
irs::flush_state state;
|
||||
state.dir = &dir();
|
||||
state.doc_count = 100;
|
||||
state.name = "segment_name";
|
||||
state.features = &field.features;
|
||||
|
||||
// should use sorted terms on write
|
||||
tests::format_test_case::terms<sorted_terms_t::iterator> terms(
|
||||
sorted_terms.begin(), sorted_terms.end()
|
||||
);
|
||||
|
||||
auto writer = codec->get_field_writer(false);
|
||||
ASSERT_NE(nullptr, writer);
|
||||
writer->prepare(state);
|
||||
writer->write(field.name, field.norm, field.features, terms);
|
||||
writer->end();
|
||||
}
|
||||
|
||||
irs::segment_meta meta;
|
||||
meta.name = "segment_name";
|
||||
irs::document_mask docs_mask;
|
||||
|
||||
auto reader = codec->get_field_reader();
|
||||
ASSERT_NE(nullptr, reader);
|
||||
|
||||
// can't open encrypted index without encryption
|
||||
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
|
||||
ASSERT_THROW(reader->prepare(dir(), meta, docs_mask), irs::index_error);
|
||||
|
||||
// can't open encrypted index with wrong encryption
|
||||
dir().attributes().emplace<tests::rot13_encryption>(6);
|
||||
ASSERT_THROW(reader->prepare(dir(), meta, docs_mask), irs::index_error);
|
||||
}
|
||||
|
||||
TEST_P(format_12_test_case, column_meta_read_write_wrong_encryption) {
|
||||
auto codec = irs::formats::get("1_2");
|
||||
ASSERT_NE(nullptr, codec);
|
||||
|
||||
ASSERT_TRUE(dir().attributes().contains<tests::rot13_encryption>());
|
||||
|
||||
irs::segment_meta meta;
|
||||
meta.name = "_1";
|
||||
|
||||
// write meta
|
||||
{
|
||||
auto writer = codec->get_column_meta_writer();
|
||||
irs::segment_meta meta1;
|
||||
|
||||
// write segment _1
|
||||
writer->prepare(dir(), meta);
|
||||
writer->write("_1_column1", 1);
|
||||
writer->write("_1_column2", 2);
|
||||
writer->write("_1_column0", 0);
|
||||
writer->flush();
|
||||
}
|
||||
|
||||
size_t count = 0;
|
||||
irs::field_id max_id = 0;
|
||||
|
||||
auto reader = codec->get_column_meta_reader();
|
||||
ASSERT_NE(nullptr, reader);
|
||||
|
||||
// can't open encrypted index without encryption
|
||||
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
|
||||
ASSERT_THROW(reader->prepare(dir(), meta, count, max_id), irs::index_error);
|
||||
|
||||
// can't open encrypted index with wrong encryption
|
||||
dir().attributes().emplace<tests::rot13_encryption>(6);
|
||||
ASSERT_THROW(reader->prepare(dir(), meta, count, max_id), irs::index_error);
|
||||
}
|
||||
|
||||
TEST_P(format_12_test_case, open_ecnrypted_with_wrong_encryption) {
|
||||
tests::json_doc_generator gen(
|
||||
resource("simple_sequential.json"),
|
||||
&tests::generic_json_field_factory
|
||||
);
|
||||
|
||||
tests::document const* doc1 = gen.next();
|
||||
|
||||
ASSERT_TRUE(dir().attributes().contains<tests::rot13_encryption>());
|
||||
|
||||
// write segment with format10
|
||||
{
|
||||
auto codec = irs::formats::get("1_2");
|
||||
ASSERT_NE(nullptr, codec);
|
||||
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
|
||||
ASSERT_NE(nullptr, writer);
|
||||
|
||||
ASSERT_TRUE(insert(*writer,
|
||||
doc1->indexed.begin(), doc1->indexed.end(),
|
||||
doc1->stored.begin(), doc1->stored.end()
|
||||
));
|
||||
|
||||
writer->commit();
|
||||
}
|
||||
|
||||
// can't open encrypted index with wrong encryption
|
||||
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
|
||||
dir().attributes().emplace<tests::rot13_encryption>(6);
|
||||
ASSERT_THROW(irs::directory_reader::open(dir()), irs::index_error);
|
||||
}
|
||||
|
||||
TEST_P(format_12_test_case, open_ecnrypted_with_non_encrypted) {
|
||||
tests::json_doc_generator gen(
|
||||
resource("simple_sequential.json"),
|
||||
&tests::generic_json_field_factory
|
||||
);
|
||||
|
||||
tests::document const* doc1 = gen.next();
|
||||
|
||||
ASSERT_TRUE(dir().attributes().contains<tests::rot13_encryption>());
|
||||
|
||||
// write segment with format11
|
||||
{
|
||||
auto codec = irs::formats::get("1_2");
|
||||
ASSERT_NE(nullptr, codec);
|
||||
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
|
||||
ASSERT_NE(nullptr, writer);
|
||||
|
||||
ASSERT_TRUE(insert(*writer,
|
||||
doc1->indexed.begin(), doc1->indexed.end(),
|
||||
doc1->stored.begin(), doc1->stored.end()
|
||||
));
|
||||
|
||||
writer->commit();
|
||||
}
|
||||
|
||||
// remove encryption
|
||||
dir().attributes().remove<tests::rot13_encryption>();
|
||||
|
||||
// can't open encrypted index without encryption
|
||||
ASSERT_THROW(irs::directory_reader::open(dir()), irs::index_error);
|
||||
}
|
||||
|
||||
TEST_P(format_12_test_case, open_non_ecnrypted_with_encrypted) {
|
||||
tests::json_doc_generator gen(
|
||||
resource("simple_sequential.json"),
|
||||
&tests::generic_json_field_factory
|
||||
);
|
||||
|
||||
tests::document const* doc1 = gen.next();
|
||||
|
||||
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
|
||||
|
||||
// write segment with format11
|
||||
{
|
||||
auto codec = irs::formats::get("1_2");
|
||||
ASSERT_NE(nullptr, codec);
|
||||
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
|
||||
ASSERT_NE(nullptr, writer);
|
||||
|
||||
ASSERT_TRUE(insert(*writer,
|
||||
doc1->indexed.begin(), doc1->indexed.end(),
|
||||
doc1->stored.begin(), doc1->stored.end()
|
||||
));
|
||||
|
||||
writer->commit();
|
||||
}
|
||||
|
||||
// add cipher
|
||||
dir().attributes().emplace<tests::rot13_encryption>(7);
|
||||
|
||||
// check index
|
||||
auto index = irs::directory_reader::open(dir());
|
||||
ASSERT_TRUE(index);
|
||||
ASSERT_EQ(1, index->size());
|
||||
ASSERT_EQ(1, index->docs_count());
|
||||
ASSERT_EQ(1, index->live_docs_count());
|
||||
|
||||
// check segment 0
|
||||
{
|
||||
auto& segment = index[0];
|
||||
ASSERT_EQ(1, segment.size());
|
||||
ASSERT_EQ(1, segment.docs_count());
|
||||
ASSERT_EQ(1, segment.live_docs_count());
|
||||
|
||||
std::unordered_set<irs::string_ref> expectedName = { "A" };
|
||||
const auto* column = segment.column_reader("name");
|
||||
ASSERT_NE(nullptr, column);
|
||||
auto values = column->values();
|
||||
ASSERT_EQ(expectedName.size(), segment.docs_count()); // total count of documents
|
||||
auto terms = segment.field("same");
|
||||
ASSERT_NE(nullptr, terms);
|
||||
auto termItr = terms->iterator();
|
||||
ASSERT_TRUE(termItr->next());
|
||||
|
||||
irs::bytes_ref actual_value;
|
||||
for (auto docsItr = termItr->postings(iresearch::flags()); docsItr->next();) {
|
||||
ASSERT_TRUE(values(docsItr->value(), actual_value));
|
||||
ASSERT_EQ(1, expectedName.erase(irs::to_string<irs::string_ref>(actual_value.c_str())));
|
||||
}
|
||||
|
||||
ASSERT_TRUE(expectedName.empty());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(format_12_test_case, open_10_with_12) {
|
||||
tests::json_doc_generator gen(
|
||||
resource("simple_sequential.json"),
|
||||
&tests::generic_json_field_factory
|
||||
);
|
||||
|
||||
tests::document const* doc1 = gen.next();
|
||||
|
||||
// write segment with format10
|
||||
{
|
||||
auto codec = irs::formats::get("1_0");
|
||||
ASSERT_NE(nullptr, codec);
|
||||
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
|
||||
ASSERT_NE(nullptr, writer);
|
||||
|
||||
ASSERT_TRUE(insert(*writer,
|
||||
doc1->indexed.begin(), doc1->indexed.end(),
|
||||
doc1->stored.begin(), doc1->stored.end()
|
||||
));
|
||||
|
||||
writer->commit();
|
||||
}
|
||||
|
||||
// check index
|
||||
auto codec = irs::formats::get("1_2");
|
||||
ASSERT_NE(nullptr, codec);
|
||||
auto index = irs::directory_reader::open(dir(), codec);
|
||||
ASSERT_TRUE(index);
|
||||
ASSERT_EQ(1, index->size());
|
||||
ASSERT_EQ(1, index->docs_count());
|
||||
ASSERT_EQ(1, index->live_docs_count());
|
||||
|
||||
// check segment 0
|
||||
{
|
||||
auto& segment = index[0];
|
||||
ASSERT_EQ(1, segment.size());
|
||||
ASSERT_EQ(1, segment.docs_count());
|
||||
ASSERT_EQ(1, segment.live_docs_count());
|
||||
|
||||
std::unordered_set<irs::string_ref> expectedName = { "A" };
|
||||
const auto* column = segment.column_reader("name");
|
||||
ASSERT_NE(nullptr, column);
|
||||
auto values = column->values();
|
||||
ASSERT_EQ(expectedName.size(), segment.docs_count()); // total count of documents
|
||||
auto terms = segment.field("same");
|
||||
ASSERT_NE(nullptr, terms);
|
||||
auto termItr = terms->iterator();
|
||||
ASSERT_TRUE(termItr->next());
|
||||
|
||||
irs::bytes_ref actual_value;
|
||||
for (auto docsItr = termItr->postings(iresearch::flags()); docsItr->next();) {
|
||||
ASSERT_TRUE(values(docsItr->value(), actual_value));
|
||||
ASSERT_EQ(1, expectedName.erase(irs::to_string<irs::string_ref>(actual_value.c_str())));
|
||||
}
|
||||
|
||||
ASSERT_TRUE(expectedName.empty());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(format_12_test_case, formats_10_12) {
|
||||
tests::json_doc_generator gen(
|
||||
resource("simple_sequential.json"),
|
||||
&tests::generic_json_field_factory
|
||||
);
|
||||
|
||||
tests::document const* doc1 = gen.next();
|
||||
tests::document const* doc2 = gen.next();
|
||||
|
||||
// write segment with format10
|
||||
{
|
||||
auto codec = irs::formats::get("1_0");
|
||||
ASSERT_NE(nullptr, codec);
|
||||
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
|
||||
ASSERT_NE(nullptr, writer);
|
||||
|
||||
ASSERT_TRUE(insert(*writer,
|
||||
doc1->indexed.begin(), doc1->indexed.end(),
|
||||
doc1->stored.begin(), doc1->stored.end()
|
||||
));
|
||||
|
||||
writer->commit();
|
||||
}
|
||||
|
||||
// write segment with format11
|
||||
{
|
||||
auto codec = irs::formats::get("1_2");
|
||||
ASSERT_NE(nullptr, codec);
|
||||
auto writer = irs::index_writer::make(dir(), codec, irs::OM_APPEND);
|
||||
ASSERT_NE(nullptr, writer);
|
||||
|
||||
ASSERT_TRUE(insert(*writer,
|
||||
doc2->indexed.begin(), doc2->indexed.end(),
|
||||
doc2->stored.begin(), doc2->stored.end()
|
||||
));
|
||||
|
||||
writer->commit();
|
||||
}
|
||||
|
||||
// check index
|
||||
auto index = irs::directory_reader::open(dir());
|
||||
ASSERT_TRUE(index);
|
||||
ASSERT_EQ(2, index->size());
|
||||
ASSERT_EQ(2, index->docs_count());
|
||||
ASSERT_EQ(2, index->live_docs_count());
|
||||
|
||||
// check segment 0
|
||||
{
|
||||
auto& segment = index[0];
|
||||
ASSERT_EQ(1, segment.size());
|
||||
ASSERT_EQ(1, segment.docs_count());
|
||||
ASSERT_EQ(1, segment.live_docs_count());
|
||||
|
||||
std::unordered_set<irs::string_ref> expectedName = { "A" };
|
||||
const auto* column = segment.column_reader("name");
|
||||
ASSERT_NE(nullptr, column);
|
||||
auto values = column->values();
|
||||
ASSERT_EQ(expectedName.size(), segment.docs_count()); // total count of documents
|
||||
auto terms = segment.field("same");
|
||||
ASSERT_NE(nullptr, terms);
|
||||
auto termItr = terms->iterator();
|
||||
ASSERT_TRUE(termItr->next());
|
||||
|
||||
irs::bytes_ref actual_value;
|
||||
for (auto docsItr = termItr->postings(iresearch::flags()); docsItr->next();) {
|
||||
ASSERT_TRUE(values(docsItr->value(), actual_value));
|
||||
ASSERT_EQ(1, expectedName.erase(irs::to_string<irs::string_ref>(actual_value.c_str())));
|
||||
}
|
||||
|
||||
ASSERT_TRUE(expectedName.empty());
|
||||
}
|
||||
|
||||
// check segment 1
|
||||
{
|
||||
auto& segment = index[1];
|
||||
ASSERT_EQ(1, segment.size());
|
||||
ASSERT_EQ(1, segment.docs_count());
|
||||
ASSERT_EQ(1, segment.live_docs_count());
|
||||
|
||||
std::unordered_set<irs::string_ref> expectedName = { "B" };
|
||||
const auto* column = segment.column_reader("name");
|
||||
ASSERT_NE(nullptr, column);
|
||||
auto values = column->values();
|
||||
ASSERT_EQ(expectedName.size(), segment.docs_count()); // total count of documents
|
||||
auto terms = segment.field("same");
|
||||
ASSERT_NE(nullptr, terms);
|
||||
auto termItr = terms->iterator();
|
||||
ASSERT_TRUE(termItr->next());
|
||||
|
||||
irs::bytes_ref actual_value;
|
||||
for (auto docsItr = termItr->postings(iresearch::flags()); docsItr->next();) {
|
||||
ASSERT_TRUE(values(docsItr->value(), actual_value));
|
||||
ASSERT_EQ(1, expectedName.erase(irs::to_string<irs::string_ref>(actual_value.c_str())));
|
||||
}
|
||||
|
||||
ASSERT_TRUE(expectedName.empty());
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
format_12_test,
|
||||
format_12_test_case,
|
||||
::testing::Values(
|
||||
&tests::rot13_cipher_directory<&tests::memory_directory, 16>,
|
||||
&tests::rot13_cipher_directory<&tests::fs_directory, 16>,
|
||||
&tests::rot13_cipher_directory<&tests::mmap_directory, 16>
|
||||
),
|
||||
tests::directory_test_case_base::to_string
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- generic tests
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
using tests::format_test_case;
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
format_12_test,
|
||||
format_test_case,
|
||||
::testing::Combine(
|
||||
::testing::Values(
|
||||
&tests::rot13_cipher_directory<&tests::memory_directory, 16>,
|
||||
&tests::rot13_cipher_directory<&tests::fs_directory, 16>,
|
||||
&tests::rot13_cipher_directory<&tests::mmap_directory, 16>,
|
||||
&tests::rot13_cipher_directory<&tests::memory_directory, 7>,
|
||||
&tests::rot13_cipher_directory<&tests::fs_directory, 7>,
|
||||
&tests::rot13_cipher_directory<&tests::mmap_directory, 7>
|
||||
),
|
||||
::testing::Values("1_2")
|
||||
),
|
||||
tests::to_string
|
||||
);
|
||||
|
||||
NS_END
|
|
@ -22,6 +22,7 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "formats_test_case_base.hpp"
|
||||
#include "utils/lz4compression.hpp"
|
||||
|
||||
namespace tests {
|
||||
|
||||
|
@ -796,10 +797,13 @@ TEST_P(format_test_case, columns_rw_sparse_column_dense_block) {
|
|||
|
||||
// write docs
|
||||
{
|
||||
|
||||
auto writer = codec()->get_columnstore_writer();
|
||||
writer->prepare(dir(), seg);
|
||||
auto column = writer->push_column();
|
||||
auto column = writer->push_column({
|
||||
irs::compression::lz4::type(),
|
||||
irs::compression::options(),
|
||||
bool(irs::get_encryption(dir().attributes()))
|
||||
});
|
||||
column_id = column.first;
|
||||
auto& column_handler = column.second;
|
||||
|
||||
|
@ -859,7 +863,11 @@ TEST_P(format_test_case, columns_rw_dense_mask) {
|
|||
{
|
||||
auto writer = codec()->get_columnstore_writer();
|
||||
writer->prepare(dir(), seg);
|
||||
auto column = writer->push_column();
|
||||
auto column = writer->push_column({
|
||||
irs::compression::lz4::type(),
|
||||
irs::compression::options(),
|
||||
bool(irs::get_encryption(dir().attributes()))
|
||||
});
|
||||
column_id = column.first;
|
||||
auto& column_handler = column.second;
|
||||
|
||||
|
@ -902,7 +910,11 @@ TEST_P(format_test_case, columns_rw_bit_mask) {
|
|||
auto writer = codec()->get_columnstore_writer();
|
||||
writer->prepare(dir(), segment);
|
||||
|
||||
auto column = writer->push_column();
|
||||
auto column = writer->push_column({
|
||||
irs::compression::lz4::type(),
|
||||
irs::compression::options(),
|
||||
bool(irs::get_encryption(dir().attributes()))
|
||||
});
|
||||
|
||||
id = column.first;
|
||||
auto& handle = column.second;
|
||||
|
@ -1125,9 +1137,10 @@ TEST_P(format_test_case, columns_rw_empty) {
|
|||
{
|
||||
auto writer = codec()->get_columnstore_writer();
|
||||
writer->prepare(dir(), meta0);
|
||||
column0_id = writer->push_column().first;
|
||||
|
||||
column0_id = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) }).first;
|
||||
ASSERT_EQ(0, column0_id);
|
||||
column1_id = writer->push_column().first;
|
||||
column1_id = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) }).first;
|
||||
ASSERT_EQ(1, column1_id);
|
||||
ASSERT_FALSE(writer->commit()); // flush empty columns
|
||||
}
|
||||
|
@ -1192,7 +1205,11 @@ TEST_P(format_test_case, columns_rw_same_col_empty_repeat) {
|
|||
);
|
||||
|
||||
if (res.second) {
|
||||
res.first->second = writer->push_column();
|
||||
res.first->second = writer->push_column({
|
||||
irs::compression::lz4::type(),
|
||||
irs::compression::options(),
|
||||
bool(irs::get_encryption(dir().attributes()))
|
||||
});
|
||||
}
|
||||
|
||||
auto& column = res.first->second.second;
|
||||
|
@ -1268,7 +1285,11 @@ TEST_P(format_test_case, columns_rw_big_document) {
|
|||
auto writer = codec()->get_columnstore_writer();
|
||||
writer->prepare(dir(), segment);
|
||||
|
||||
auto column = writer->push_column();
|
||||
auto column = writer->push_column({
|
||||
irs::compression::lz4::type(),
|
||||
irs::compression::options(),
|
||||
bool(irs::get_encryption(dir().attributes()))
|
||||
});
|
||||
id = column.first;
|
||||
|
||||
{
|
||||
|
@ -1432,7 +1453,7 @@ TEST_P(format_test_case, columns_rw_writer_reuse) {
|
|||
);
|
||||
|
||||
if (res.second) {
|
||||
res.first->second = writer->push_column();
|
||||
res.first->second = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
}
|
||||
|
||||
auto& column = res.first->second.second;
|
||||
|
@ -1461,7 +1482,7 @@ TEST_P(format_test_case, columns_rw_writer_reuse) {
|
|||
);
|
||||
|
||||
if (res.second) {
|
||||
res.first->second = writer->push_column();
|
||||
res.first->second = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
}
|
||||
|
||||
auto& column = res.first->second.second;
|
||||
|
@ -1488,7 +1509,7 @@ TEST_P(format_test_case, columns_rw_writer_reuse) {
|
|||
);
|
||||
|
||||
if (res.second) {
|
||||
res.first->second = writer->push_column();
|
||||
res.first->second = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
}
|
||||
|
||||
auto& column = res.first->second.second;
|
||||
|
@ -1677,7 +1698,7 @@ TEST_P(format_test_case, columns_rw_typed) {
|
|||
);
|
||||
|
||||
if (res.second) {
|
||||
res.first->second = writer->push_column();
|
||||
res.first->second = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
}
|
||||
|
||||
auto& column = res.first->second.second;
|
||||
|
@ -1923,8 +1944,8 @@ TEST_P(format_test_case, columns_rw_sparse_dense_offset_column_border_case) {
|
|||
auto writer = codec()->get_columnstore_writer();
|
||||
writer->prepare(dir(), meta0);
|
||||
|
||||
dense_fixed_offset_column = writer->push_column();
|
||||
sparse_fixed_offset_column = writer->push_column();
|
||||
dense_fixed_offset_column = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
sparse_fixed_offset_column = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
|
||||
irs::doc_id_t doc = irs::type_limits<irs::type_t::doc_id_t>::min();
|
||||
|
||||
|
@ -2120,26 +2141,26 @@ TEST_P(format_test_case, columns_rw) {
|
|||
{
|
||||
writer->prepare(dir(), meta0);
|
||||
|
||||
auto field0 = writer->push_column();
|
||||
auto field0 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
segment0_field0_id = field0.first;
|
||||
auto& field0_writer = field0.second;
|
||||
ASSERT_EQ(0, segment0_field0_id);
|
||||
auto field1 = writer->push_column();
|
||||
auto field1 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
segment0_field1_id = field1.first;
|
||||
auto& field1_writer = field1.second;
|
||||
ASSERT_EQ(1, segment0_field1_id);
|
||||
auto empty_field = writer->push_column(); // gap between filled columns
|
||||
auto empty_field = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) }); // gap between filled columns
|
||||
segment0_empty_column_id = empty_field.first;
|
||||
ASSERT_EQ(2, segment0_empty_column_id);
|
||||
auto field2 = writer->push_column();
|
||||
auto field2 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
segment0_field2_id = field2.first;
|
||||
auto& field2_writer = field2.second;
|
||||
ASSERT_EQ(3, segment0_field2_id);
|
||||
auto field3 = writer->push_column();
|
||||
auto field3 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
segment0_field3_id = field3.first;
|
||||
auto& field3_writer = field3.second;
|
||||
ASSERT_EQ(4, segment0_field3_id);
|
||||
auto field4 = writer->push_column();
|
||||
auto field4 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
segment0_field4_id = field4.first;
|
||||
auto& field4_writer = field4.second;
|
||||
ASSERT_EQ(5, segment0_field4_id);
|
||||
|
@ -2219,15 +2240,15 @@ TEST_P(format_test_case, columns_rw) {
|
|||
{
|
||||
writer->prepare(dir(), meta1);
|
||||
|
||||
auto field0 = writer->push_column();
|
||||
auto field0 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
segment1_field0_id = field0.first;
|
||||
auto& field0_writer = field0.second;
|
||||
ASSERT_EQ(0, segment1_field0_id);
|
||||
auto field1 = writer->push_column();
|
||||
auto field1 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
segment1_field1_id = field1.first;
|
||||
auto& field1_writer = field1.second;
|
||||
ASSERT_EQ(1, segment1_field1_id);
|
||||
auto field2 = writer->push_column();
|
||||
auto field2 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
|
||||
segment1_field2_id = field2.first;
|
||||
auto& field2_writer = field2.second;
|
||||
ASSERT_EQ(2, segment1_field2_id);
|
||||
|
|
|
@ -137,9 +137,10 @@ index_segment& index_segment::operator=(index_segment&& rhs) NOEXCEPT {
|
|||
}
|
||||
|
||||
void index_segment::add_sorted(const ifield& f) {
|
||||
irs::bytes_output out;
|
||||
irs::bstring buf;
|
||||
irs::bytes_output out(buf);
|
||||
if (f.write(out)) {
|
||||
const irs::bytes_ref value = out;
|
||||
const irs::bytes_ref value = buf;
|
||||
const auto doc_id = irs::doc_id_t((irs::doc_limits::min)() + count_);
|
||||
sort_.emplace_back(std::make_pair(irs::bstring(value.c_str(), value.size()), doc_id));
|
||||
}
|
||||
|
|
|
@ -147,6 +147,55 @@ TEST(consolidation_test_tier, test_max_consolidation_size) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(consolidation_test_tier, empty_meta) {
|
||||
irs::index_meta meta;
|
||||
irs::index_utils::consolidate_tier options;
|
||||
options.floor_segment_bytes = 1;
|
||||
options.max_segments = 10;
|
||||
options.min_segments = 1;
|
||||
options.max_segments_bytes = irs::integer_traits<size_t>::const_max;
|
||||
|
||||
irs::index_writer::consolidating_segments_t consolidating_segments;
|
||||
auto policy = irs::index_utils::consolidation_policy(options);
|
||||
std::set<const irs::segment_meta*> candidates;
|
||||
policy(candidates, meta, consolidating_segments);
|
||||
ASSERT_TRUE(candidates.empty());
|
||||
}
|
||||
|
||||
TEST(consolidation_test_tier, empty_consolidating_segment) {
|
||||
irs::index_meta meta;
|
||||
meta.add(irs::segment_meta("empty", nullptr, 1, 0, false, irs::segment_meta::file_set(), 1));
|
||||
|
||||
irs::index_utils::consolidate_tier options;
|
||||
options.floor_segment_bytes = 1;
|
||||
options.max_segments = 10;
|
||||
options.min_segments = 1;
|
||||
options.max_segments_bytes = irs::integer_traits<size_t>::const_max;
|
||||
|
||||
irs::index_writer::consolidating_segments_t consolidating_segments { &meta[0].meta };
|
||||
auto policy = irs::index_utils::consolidation_policy(options);
|
||||
std::set<const irs::segment_meta*> candidates;
|
||||
policy(candidates, meta, consolidating_segments);
|
||||
ASSERT_TRUE(candidates.empty()); // skip empty consolidating segments
|
||||
}
|
||||
|
||||
TEST(consolidation_test_tier, empty_segment) {
|
||||
irs::index_meta meta;
|
||||
meta.add(irs::segment_meta("empty", nullptr, 0, 0, false, irs::segment_meta::file_set(), 1));
|
||||
|
||||
irs::index_utils::consolidate_tier options;
|
||||
options.floor_segment_bytes = 1;
|
||||
options.max_segments = 10;
|
||||
options.min_segments = 1;
|
||||
options.max_segments_bytes = irs::integer_traits<size_t>::const_max;
|
||||
|
||||
irs::index_writer::consolidating_segments_t consolidating_segments { &meta[0].meta };
|
||||
auto policy = irs::index_utils::consolidation_policy(options);
|
||||
std::set<const irs::segment_meta*> candidates;
|
||||
policy(candidates, meta, consolidating_segments);
|
||||
ASSERT_TRUE(candidates.empty()); // skip empty segments
|
||||
}
|
||||
|
||||
TEST(consolidation_test_tier, test_max_consolidation_count) {
|
||||
// generate meta
|
||||
irs::index_meta meta;
|
||||
|
@ -1304,4 +1353,4 @@ TEST(consolidation_test_tier, test_skewed_segments) {
|
|||
|
||||
// -----------------------------------------------------------------------------
|
||||
// --SECTION-- END-OF-FILE
|
||||
// -----------------------------------------------------------------------------
|
||||
// -----------------------------------------------------------------------------
|
||||
|
|
|
@ -419,8 +419,8 @@ class json_doc_generator: public doc_generator_base {
|
|||
return *this;
|
||||
}
|
||||
|
||||
operator irs::string_ref() const { return irs::string_ref(data, size); };
|
||||
operator std::string() const { return std::string(data, size); };
|
||||
operator irs::string_ref() const { return irs::string_ref(data, size); }
|
||||
operator std::string() const { return std::string(data, size); }
|
||||
};
|
||||
|
||||
struct json_value {
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -186,15 +186,13 @@ class index_test_base : public virtual test_param_base<index_test_context> {
|
|||
irs::index_writer::ptr open_writer(
|
||||
irs::directory& dir,
|
||||
irs::OpenMode mode = irs::OM_CREATE,
|
||||
const irs::index_writer::init_options& options = {}
|
||||
) {
|
||||
const irs::index_writer::init_options& options = {}) {
|
||||
return irs::index_writer::make(dir, codec_, mode, options);
|
||||
}
|
||||
|
||||
irs::index_writer::ptr open_writer(
|
||||
irs::OpenMode mode = irs::OM_CREATE,
|
||||
const irs::index_writer::init_options& options = {}
|
||||
) {
|
||||
const irs::index_writer::init_options& options = {}) {
|
||||
return irs::index_writer::make(*dir_, codec_, mode, options);
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "iql/query_builder.hpp"
|
||||
#include "store/memory_directory.hpp"
|
||||
#include "utils/type_limits.hpp"
|
||||
#include "utils/lz4compression.hpp"
|
||||
#include "index/merge_writer.hpp"
|
||||
|
||||
namespace tests {
|
||||
|
@ -165,8 +166,12 @@ TEST_F(merge_writer_tests, test_merge_writer_columns_remove) {
|
|||
writer->commit();
|
||||
}
|
||||
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
auto reader = iresearch::directory_reader::open(dir, codec_ptr);
|
||||
irs::merge_writer writer(dir);
|
||||
irs::merge_writer writer(dir, column_info);
|
||||
|
||||
ASSERT_EQ(2, reader.size());
|
||||
ASSERT_EQ(2, reader[0].docs_count());
|
||||
|
@ -566,8 +571,12 @@ TEST_F(merge_writer_tests, test_merge_writer_columns) {
|
|||
writer->commit();
|
||||
}
|
||||
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
auto reader = iresearch::directory_reader::open(dir, codec_ptr);
|
||||
irs::merge_writer writer(dir);
|
||||
irs::merge_writer writer(dir, column_info);
|
||||
|
||||
ASSERT_EQ(2, reader.size());
|
||||
ASSERT_EQ(2, reader[0].docs_count());
|
||||
|
@ -1056,8 +1065,12 @@ TEST_F(merge_writer_tests, test_merge_writer) {
|
|||
return reader ? reader->docs_count() : 0;
|
||||
};
|
||||
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
auto reader = iresearch::directory_reader::open(dir, codec_ptr);
|
||||
irs::merge_writer writer(dir);
|
||||
irs::merge_writer writer(dir, column_info);
|
||||
|
||||
ASSERT_EQ(2, reader.size());
|
||||
ASSERT_EQ(2, reader[0].docs_count());
|
||||
|
@ -2207,9 +2220,13 @@ TEST_F(merge_writer_tests, test_merge_writer_add_segments) {
|
|||
|
||||
// merge 33 segments to writer (segments > 32 to trigger GCC 8.2.0 optimizer bug)
|
||||
{
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
irs::memory_directory dir;
|
||||
irs::index_meta::index_segment_t index_segment;
|
||||
irs::merge_writer writer(dir);
|
||||
irs::merge_writer writer(dir, column_info);
|
||||
|
||||
for (auto& sub_reader: reader) {
|
||||
writer.add(sub_reader);
|
||||
|
@ -2263,10 +2280,14 @@ TEST_F(merge_writer_tests, test_merge_writer_flush_progress) {
|
|||
|
||||
// test default progress (false)
|
||||
{
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
irs::memory_directory dir;
|
||||
irs::index_meta::index_segment_t index_segment;
|
||||
irs::merge_writer::flush_progress_t progress;
|
||||
irs::merge_writer writer(dir);
|
||||
irs::merge_writer writer(dir, column_info);
|
||||
|
||||
index_segment.meta.codec = codec_ptr;
|
||||
writer.add(reader[0]);
|
||||
|
@ -2285,10 +2306,14 @@ TEST_F(merge_writer_tests, test_merge_writer_flush_progress) {
|
|||
|
||||
// test always-false progress
|
||||
{
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
irs::memory_directory dir;
|
||||
irs::index_meta::index_segment_t index_segment;
|
||||
irs::merge_writer::flush_progress_t progress = []()->bool { return false; };
|
||||
irs::merge_writer writer(dir);
|
||||
irs::merge_writer writer(dir, column_info);
|
||||
|
||||
index_segment.meta.codec = codec_ptr;
|
||||
writer.add(reader[0]);
|
||||
|
@ -2311,11 +2336,15 @@ TEST_F(merge_writer_tests, test_merge_writer_flush_progress) {
|
|||
|
||||
// test always-true progress
|
||||
{
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
irs::memory_directory dir;
|
||||
irs::index_meta::index_segment_t index_segment;
|
||||
irs::merge_writer::flush_progress_t progress =
|
||||
[&progress_call_count]()->bool { ++progress_call_count; return true; };
|
||||
irs::merge_writer writer(dir);
|
||||
irs::merge_writer writer(dir, column_info);
|
||||
|
||||
index_segment.meta.codec = codec_ptr;
|
||||
writer.add(reader[0]);
|
||||
|
@ -2334,6 +2363,10 @@ TEST_F(merge_writer_tests, test_merge_writer_flush_progress) {
|
|||
|
||||
ASSERT_TRUE(progress_call_count); // there should have been at least some calls
|
||||
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
// test limited-true progress
|
||||
for (size_t i = 1; i < progress_call_count; ++i) { // +1 for pre-decrement in 'progress'
|
||||
size_t call_count = i;
|
||||
|
@ -2341,7 +2374,7 @@ TEST_F(merge_writer_tests, test_merge_writer_flush_progress) {
|
|||
irs::index_meta::index_segment_t index_segment;
|
||||
irs::merge_writer::flush_progress_t progress =
|
||||
[&call_count]()->bool { return --call_count; };
|
||||
irs::merge_writer writer(dir);
|
||||
irs::merge_writer writer(dir, column_info);
|
||||
|
||||
index_segment.meta.codec = codec_ptr;
|
||||
index_segment.meta.name = "merged";
|
||||
|
@ -2405,7 +2438,11 @@ TEST_F(merge_writer_tests, test_merge_writer_field_features) {
|
|||
|
||||
// test merge existing with feature subset (success)
|
||||
{
|
||||
irs::merge_writer writer(dir);
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
irs::merge_writer writer(dir, column_info);
|
||||
writer.add(reader[1]); // assume 1 is segment with text field
|
||||
writer.add(reader[0]); // assume 0 is segment with string field
|
||||
|
||||
|
@ -2417,7 +2454,11 @@ TEST_F(merge_writer_tests, test_merge_writer_field_features) {
|
|||
|
||||
// test merge existing with feature superset (fail)
|
||||
{
|
||||
irs::merge_writer writer(dir);
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
irs::merge_writer writer(dir, column_info);
|
||||
writer.add(reader[0]); // assume 0 is segment with text field
|
||||
writer.add(reader[1]); // assume 1 is segment with string field
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "index/index_tests.hpp"
|
||||
#include "store/memory_directory.hpp"
|
||||
#include "store/store_utils.hpp"
|
||||
#include "utils/lz4compression.hpp"
|
||||
|
||||
NS_LOCAL
|
||||
|
||||
|
@ -74,8 +75,12 @@ TEST_F(segment_writer_tests, invalid_actions) {
|
|||
stream.reset(true);
|
||||
field_t field(stream);
|
||||
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info( irs::compression::lz4::type(), {}, true );
|
||||
};
|
||||
|
||||
irs::memory_directory dir;
|
||||
auto writer = irs::segment_writer::make(dir, nullptr);
|
||||
auto writer = irs::segment_writer::make(dir, column_info, nullptr);
|
||||
ASSERT_EQ(0, writer->memory_active());
|
||||
|
||||
// store + store sorted
|
||||
|
@ -126,10 +131,14 @@ TEST_F(segment_writer_tests, memory_sorted_vs_unsorted) {
|
|||
}
|
||||
} less;
|
||||
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info( irs::compression::lz4::type(), {}, true );
|
||||
};
|
||||
|
||||
irs::memory_directory dir;
|
||||
auto writer_sorted = irs::segment_writer::make(dir, &less);
|
||||
auto writer_sorted = irs::segment_writer::make(dir, column_info, &less);
|
||||
ASSERT_EQ(0, writer_sorted->memory_active());
|
||||
auto writer_unsorted = irs::segment_writer::make(dir, nullptr);
|
||||
auto writer_unsorted = irs::segment_writer::make(dir, column_info, nullptr);
|
||||
ASSERT_EQ(0, writer_unsorted->memory_active());
|
||||
|
||||
irs::segment_meta segment;
|
||||
|
@ -181,8 +190,12 @@ TEST_F(segment_writer_tests, insert_sorted_without_comparator) {
|
|||
}
|
||||
} field;
|
||||
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info( irs::compression::lz4::type(), irs::compression::options(irs::compression::options::Hint::SPEED), true );
|
||||
};
|
||||
|
||||
irs::memory_directory dir;
|
||||
auto writer = irs::segment_writer::make(dir, nullptr);
|
||||
auto writer = irs::segment_writer::make(dir, column_info, nullptr);
|
||||
ASSERT_EQ(0, writer->memory_active());
|
||||
|
||||
irs::segment_meta segment;
|
||||
|
@ -227,8 +240,12 @@ TEST_F(segment_writer_tests, memory_store_sorted_field) {
|
|||
}
|
||||
} less;
|
||||
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true);
|
||||
};
|
||||
|
||||
irs::memory_directory dir;
|
||||
auto writer = irs::segment_writer::make(dir, &less);
|
||||
auto writer = irs::segment_writer::make(dir, column_info, &less);
|
||||
ASSERT_EQ(0, writer->memory_active());
|
||||
|
||||
irs::segment_meta segment;
|
||||
|
@ -273,8 +290,12 @@ TEST_F(segment_writer_tests, memory_store_field_sorted) {
|
|||
}
|
||||
} less;
|
||||
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info( irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
irs::memory_directory dir;
|
||||
auto writer = irs::segment_writer::make(dir, &less);
|
||||
auto writer = irs::segment_writer::make(dir, column_info, &less);
|
||||
ASSERT_EQ(0, writer->memory_active());
|
||||
|
||||
irs::segment_meta segment;
|
||||
|
@ -313,8 +334,12 @@ TEST_F(segment_writer_tests, memory_store_field_unsorted) {
|
|||
}
|
||||
} field;
|
||||
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info( irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
irs::memory_directory dir;
|
||||
auto writer = irs::segment_writer::make(dir, nullptr);
|
||||
auto writer = irs::segment_writer::make(dir, column_info, nullptr);
|
||||
ASSERT_EQ(0, writer->memory_active());
|
||||
|
||||
irs::segment_meta segment;
|
||||
|
@ -360,8 +385,12 @@ TEST_F(segment_writer_tests, memory_index_field) {
|
|||
stream.reset(true);
|
||||
field_t field(stream);
|
||||
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info( irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
irs::memory_directory dir;
|
||||
auto writer = irs::segment_writer::make(dir, nullptr);
|
||||
auto writer = irs::segment_writer::make(dir, column_info, nullptr);
|
||||
ASSERT_EQ(0, writer->memory_active());
|
||||
|
||||
for (size_t i = 0; i < 100; ++i) {
|
||||
|
@ -399,8 +428,12 @@ TEST_F(segment_writer_tests, index_field) {
|
|||
|
||||
// test missing token_stream attributes (increment)
|
||||
{
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info( irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
irs::memory_directory dir;
|
||||
auto writer = irs::segment_writer::make(dir, nullptr);
|
||||
auto writer = irs::segment_writer::make(dir, column_info, nullptr);
|
||||
irs::segment_writer::update_context ctx;
|
||||
token_stream_t stream;
|
||||
field_t field(stream);
|
||||
|
@ -418,8 +451,12 @@ TEST_F(segment_writer_tests, index_field) {
|
|||
|
||||
// test missing token_stream attributes (term_attribute)
|
||||
{
|
||||
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
|
||||
return irs::column_info( irs::compression::lz4::type(), irs::compression::options{}, true );
|
||||
};
|
||||
|
||||
irs::memory_directory dir;
|
||||
auto writer = irs::segment_writer::make(dir, nullptr);
|
||||
auto writer = irs::segment_writer::make(dir, column_info, nullptr);
|
||||
irs::segment_writer::update_context ctx;
|
||||
token_stream_t stream;
|
||||
field_t field(stream);
|
||||
|
|
|
@ -30,12 +30,13 @@
|
|||
#include "store/memory_directory.hpp"
|
||||
#include "utils/bitvector.hpp"
|
||||
#include "utils/bytes_utils.hpp"
|
||||
#include "utils/lz4compression.hpp"
|
||||
#include "utils/type_limits.hpp"
|
||||
|
||||
// FIXME check gaps && deleted docs
|
||||
|
||||
TEST(sorted_column_test, ctor) {
|
||||
irs::sorted_column col;
|
||||
irs::sorted_column col({ irs::compression::lz4::type(), {}, false });
|
||||
ASSERT_TRUE(col.empty());
|
||||
ASSERT_EQ(0, col.size());
|
||||
ASSERT_EQ(0, col.memory_active());
|
||||
|
@ -43,7 +44,7 @@ TEST(sorted_column_test, ctor) {
|
|||
}
|
||||
|
||||
TEST(sorted_column_test, flush_empty) {
|
||||
irs::sorted_column col;
|
||||
irs::sorted_column col({ irs::compression::lz4::type(), {}, false });
|
||||
ASSERT_TRUE(col.empty());
|
||||
ASSERT_EQ(0, col.size());
|
||||
ASSERT_EQ(0, col.memory_active());
|
||||
|
@ -159,7 +160,7 @@ TEST(sorted_column_test, insert_duplicates) {
|
|||
|
||||
writer->prepare(dir, segment);
|
||||
|
||||
irs::sorted_column col;
|
||||
irs::sorted_column col({ irs::compression::raw::type(), {}, true });
|
||||
ASSERT_TRUE(col.empty());
|
||||
ASSERT_EQ(0, col.size());
|
||||
ASSERT_EQ(0, col.memory_active());
|
||||
|
@ -270,7 +271,7 @@ TEST(sorted_column_test, sort) {
|
|||
|
||||
writer->prepare(dir, segment);
|
||||
|
||||
irs::sorted_column col;
|
||||
irs::sorted_column col({ irs::compression::lz4::type(), {}, true });
|
||||
ASSERT_TRUE(col.empty());
|
||||
ASSERT_EQ(0, col.size());
|
||||
ASSERT_EQ(0, col.memory_active());
|
||||
|
|
|
@ -143,7 +143,7 @@ TEST_P(sorted_index_test_case, simple_sequential) {
|
|||
|
||||
// check sorted column
|
||||
{
|
||||
std::vector<irs::bytes_output> column_payload;
|
||||
std::vector<irs::bstring> column_payload;
|
||||
gen.reset();
|
||||
|
||||
while (auto* doc = gen.next()) {
|
||||
|
@ -151,14 +151,15 @@ TEST_P(sorted_index_test_case, simple_sequential) {
|
|||
ASSERT_NE(nullptr, field);
|
||||
|
||||
column_payload.emplace_back();
|
||||
field->write(column_payload.back());
|
||||
irs::bytes_output out(column_payload.back());
|
||||
field->write(out);
|
||||
}
|
||||
|
||||
ASSERT_EQ(column_payload.size(), segment.docs_count());
|
||||
|
||||
std::sort(
|
||||
column_payload.begin(), column_payload.end(),
|
||||
[&less](const irs::bytes_output& lhs, const irs::bytes_output& rhs) {
|
||||
[&less](const irs::bstring& lhs, const irs::bstring& rhs) {
|
||||
return less(lhs, rhs);
|
||||
});
|
||||
|
||||
|
@ -189,8 +190,8 @@ TEST_P(sorted_index_test_case, simple_sequential) {
|
|||
for (auto& column_name : column_names) {
|
||||
struct doc {
|
||||
irs::doc_id_t id{ irs::doc_limits::eof() };
|
||||
irs::bytes_output order;
|
||||
irs::bytes_output value;
|
||||
irs::bstring order;
|
||||
irs::bstring value;
|
||||
};
|
||||
|
||||
std::vector<doc> column_docs;
|
||||
|
@ -207,11 +208,13 @@ TEST_P(sorted_index_test_case, simple_sequential) {
|
|||
auto* column = doc->stored.get(column_name);
|
||||
|
||||
auto& value = column_docs.back();
|
||||
sorted->write(value.order);
|
||||
irs::bytes_output order_out(value.order);
|
||||
sorted->write(order_out);
|
||||
|
||||
if (column) {
|
||||
value.id = id++;
|
||||
column->write(value.value);
|
||||
irs::bytes_output value_out(value.value);
|
||||
column->write(value_out);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -325,21 +328,22 @@ TEST_P(sorted_index_test_case, simple_sequential_consolidate) {
|
|||
// check sorted column
|
||||
{
|
||||
segment_gen.reset();
|
||||
std::vector<irs::bytes_output> column_payload;
|
||||
std::vector<irs::bstring> column_payload;
|
||||
|
||||
while (auto* doc = segment_gen.next()) {
|
||||
auto* field = doc->stored.get(sorted_column);
|
||||
ASSERT_NE(nullptr, field);
|
||||
|
||||
column_payload.emplace_back();
|
||||
field->write(column_payload.back());
|
||||
irs::bytes_output out(column_payload.back());
|
||||
field->write(out);
|
||||
}
|
||||
|
||||
ASSERT_EQ(column_payload.size(), segment.docs_count());
|
||||
|
||||
std::sort(
|
||||
column_payload.begin(), column_payload.end(),
|
||||
[&less](const irs::bytes_output& lhs, const irs::bytes_output& rhs) {
|
||||
[&less](const irs::bstring& lhs, const irs::bstring& rhs) {
|
||||
return less(lhs, rhs);
|
||||
});
|
||||
|
||||
|
@ -370,8 +374,8 @@ TEST_P(sorted_index_test_case, simple_sequential_consolidate) {
|
|||
for (auto& column_name : column_names) {
|
||||
struct doc {
|
||||
irs::doc_id_t id{ irs::doc_limits::eof() };
|
||||
irs::bytes_output order;
|
||||
irs::bytes_output value;
|
||||
irs::bstring order;
|
||||
irs::bstring value;
|
||||
};
|
||||
|
||||
std::vector<doc> column_docs;
|
||||
|
@ -388,11 +392,13 @@ TEST_P(sorted_index_test_case, simple_sequential_consolidate) {
|
|||
auto* column = doc->stored.get(column_name);
|
||||
|
||||
auto& value = column_docs.back();
|
||||
sorted->write(value.order);
|
||||
irs::bytes_output order_out(value.order);
|
||||
sorted->write(order_out);
|
||||
|
||||
if (column) {
|
||||
value.id = id++;
|
||||
column->write(value.value);
|
||||
irs::bytes_output value_out(value.value);
|
||||
column->write(value_out);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -475,21 +481,22 @@ TEST_P(sorted_index_test_case, simple_sequential_consolidate) {
|
|||
// check sorted column
|
||||
{
|
||||
gen.reset();
|
||||
std::vector<irs::bytes_output> column_payload;
|
||||
std::vector<irs::bstring> column_payload;
|
||||
|
||||
while (auto* doc = gen.next()) {
|
||||
auto* field = doc->stored.get(sorted_column);
|
||||
ASSERT_NE(nullptr, field);
|
||||
|
||||
column_payload.emplace_back();
|
||||
field->write(column_payload.back());
|
||||
irs::bytes_output out(column_payload.back());
|
||||
field->write(out);
|
||||
}
|
||||
|
||||
ASSERT_EQ(column_payload.size(), segment.docs_count());
|
||||
|
||||
std::sort(
|
||||
column_payload.begin(), column_payload.end(),
|
||||
[&less](const irs::bytes_output& lhs, const irs::bytes_output& rhs) {
|
||||
[&less](const irs::bstring& lhs, const irs::bstring& rhs) {
|
||||
return less(lhs, rhs);
|
||||
});
|
||||
|
||||
|
@ -520,8 +527,8 @@ TEST_P(sorted_index_test_case, simple_sequential_consolidate) {
|
|||
for (auto& column_name : column_names) {
|
||||
struct doc {
|
||||
irs::doc_id_t id{ irs::doc_limits::eof() };
|
||||
irs::bytes_output order;
|
||||
irs::bytes_output value;
|
||||
irs::bstring order;
|
||||
irs::bstring value;
|
||||
};
|
||||
|
||||
std::vector<doc> column_docs;
|
||||
|
@ -538,11 +545,13 @@ TEST_P(sorted_index_test_case, simple_sequential_consolidate) {
|
|||
auto* column = doc->stored.get(column_name);
|
||||
|
||||
auto& value = column_docs.back();
|
||||
sorted->write(value.order);
|
||||
irs::bytes_output order_out(value.order);
|
||||
sorted->write(order_out);
|
||||
|
||||
if (column) {
|
||||
value.id = id++;
|
||||
column->write(value.value);
|
||||
irs::bytes_output value_out(value.value);
|
||||
column->write(value_out);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -630,7 +639,7 @@ TEST_P(sorted_index_test_case, simple_sequential_already_sorted) {
|
|||
|
||||
// check sorted column
|
||||
{
|
||||
std::vector<irs::bytes_output> column_payload;
|
||||
std::vector<irs::bstring> column_payload;
|
||||
gen.reset();
|
||||
|
||||
while (auto* doc = gen.next()) {
|
||||
|
@ -638,14 +647,15 @@ TEST_P(sorted_index_test_case, simple_sequential_already_sorted) {
|
|||
ASSERT_NE(nullptr, field);
|
||||
|
||||
column_payload.emplace_back();
|
||||
field->write(column_payload.back());
|
||||
irs::bytes_output out(column_payload.back());
|
||||
field->write(out);
|
||||
}
|
||||
|
||||
ASSERT_EQ(column_payload.size(), segment.docs_count());
|
||||
|
||||
std::sort(
|
||||
column_payload.begin(), column_payload.end(),
|
||||
[&less](const irs::bytes_output& lhs, const irs::bytes_output& rhs) {
|
||||
[&less](const irs::bstring& lhs, const irs::bstring& rhs) {
|
||||
return less(lhs, rhs);
|
||||
});
|
||||
|
||||
|
@ -676,8 +686,8 @@ TEST_P(sorted_index_test_case, simple_sequential_already_sorted) {
|
|||
for (auto& column_name : column_names) {
|
||||
struct doc {
|
||||
irs::doc_id_t id{ irs::doc_limits::eof() };
|
||||
irs::bytes_output order;
|
||||
irs::bytes_output value;
|
||||
irs::bstring order;
|
||||
irs::bstring value;
|
||||
};
|
||||
|
||||
std::vector<doc> column_docs;
|
||||
|
@ -694,11 +704,13 @@ TEST_P(sorted_index_test_case, simple_sequential_already_sorted) {
|
|||
auto* column = doc->stored.get(column_name);
|
||||
|
||||
auto& value = column_docs.back();
|
||||
sorted->write(value.order);
|
||||
irs::bytes_output order_out(value.order);
|
||||
sorted->write(order_out);
|
||||
|
||||
if (column) {
|
||||
value.id = id++;
|
||||
column->write(value.value);
|
||||
irs::bytes_output value_out(value.value);
|
||||
column->write(value_out);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1354,7 +1366,7 @@ INSTANTIATE_TEST_CASE_P(
|
|||
&tests::fs_directory,
|
||||
&tests::mmap_directory
|
||||
),
|
||||
::testing::Values("1_1")
|
||||
::testing::Values("1_1", "1_2")
|
||||
),
|
||||
tests::to_string
|
||||
);
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "store/store_utils.hpp"
|
||||
#include "utils/bytes_utils.hpp"
|
||||
|
||||
using namespace iresearch;
|
||||
using namespace irs;
|
||||
|
||||
namespace tests {
|
||||
namespace detail {
|
||||
|
@ -86,7 +86,8 @@ void packed_read_write_core(const std::vector<uint32_t> &src) {
|
|||
assert(blocks);
|
||||
|
||||
// compress data to stream
|
||||
iresearch::bytes_output out;
|
||||
irs::bstring buf;
|
||||
irs::bytes_output out(buf);
|
||||
|
||||
// write first n compressed blocks
|
||||
{
|
||||
|
@ -99,7 +100,7 @@ void packed_read_write_core(const std::vector<uint32_t> &src) {
|
|||
|
||||
// decompress data from stream
|
||||
std::vector<uint32_t> read(src.size());
|
||||
iresearch::bytes_ref_input in(out);
|
||||
irs::bytes_ref_input in(buf);
|
||||
|
||||
// read first n compressed blocks
|
||||
{
|
||||
|
@ -113,21 +114,22 @@ void packed_read_write_core(const std::vector<uint32_t> &src) {
|
|||
ASSERT_EQ(src, read);
|
||||
}
|
||||
|
||||
using iresearch::data_input;
|
||||
using iresearch::data_output;
|
||||
using irs::data_input;
|
||||
using irs::data_output;
|
||||
|
||||
template<typename T>
|
||||
void read_write_core(
|
||||
const std::vector<T>& src,
|
||||
const std::function<T(data_input&)>& reader,
|
||||
const std::function<void(data_output&,T)>& writer) {
|
||||
iresearch::bytes_output out;
|
||||
irs::bstring buf;
|
||||
irs::bytes_output out(buf);
|
||||
std::for_each(
|
||||
src.begin(), src.end(),
|
||||
[&out,&writer](const T& v){ writer(out, v); }
|
||||
);
|
||||
|
||||
iresearch::bytes_input in( out);
|
||||
irs::bytes_input in(buf);
|
||||
std::for_each(
|
||||
src.begin(), src.end(),
|
||||
[&in,&reader](const T& v){ ASSERT_EQ(v, reader(in)); }
|
||||
|
@ -139,11 +141,12 @@ template<typename T>
|
|||
void read_write_core_nan(
|
||||
const std::function<T(data_input&)>& reader,
|
||||
const std::function<void(data_output&,T)>& writer) {
|
||||
iresearch::bytes_output out;
|
||||
irs::bstring buf;
|
||||
irs::bytes_output out(buf);
|
||||
writer(out, std::numeric_limits<T>::quiet_NaN());
|
||||
writer(out, std::numeric_limits<T>::signaling_NaN());
|
||||
|
||||
iresearch::bytes_input in(out);
|
||||
irs::bytes_input in(buf);
|
||||
ASSERT_TRUE(std::isnan(reader(in)));
|
||||
ASSERT_TRUE(std::isnan(reader(in)));
|
||||
}
|
||||
|
@ -153,21 +156,23 @@ void read_write_core_container(
|
|||
const Cont& src,
|
||||
const std::function<Cont(data_input&)>& reader,
|
||||
const std::function<data_output&(data_output&,const Cont&)>& writer) {
|
||||
iresearch::bytes_output out;
|
||||
irs::bstring buf;
|
||||
irs::bytes_output out(buf);
|
||||
writer(out, src);
|
||||
|
||||
iresearch::bytes_input in(out);
|
||||
irs::bytes_input in(buf);
|
||||
const Cont read = reader( in);
|
||||
ASSERT_EQ(src, read);
|
||||
}
|
||||
|
||||
void read_write_block(const std::vector<uint32_t>& source, std::vector<uint32_t>& enc_dec_buf) {
|
||||
// write block
|
||||
iresearch::bytes_output out;
|
||||
irs::bstring buf;
|
||||
irs::bytes_output out(buf);
|
||||
irs::encode::bitpack::write_block(out, &source[0], source.size(), &enc_dec_buf[0]);
|
||||
|
||||
// read block
|
||||
iresearch::bytes_input in(out);
|
||||
irs::bytes_input in(buf);
|
||||
std::vector<uint32_t> read(source.size());
|
||||
irs::encode::bitpack::read_block(in, source.size(), &enc_dec_buf[0], read.data());
|
||||
|
||||
|
@ -271,14 +276,14 @@ TEST(store_utils_tests, zvfloat_read_write) {
|
|||
-21532764.631984f,
|
||||
-9847.23427f
|
||||
},
|
||||
iresearch::read_zvfloat,
|
||||
iresearch::write_zvfloat
|
||||
irs::read_zvfloat,
|
||||
irs::write_zvfloat
|
||||
);
|
||||
|
||||
/* NaN case */
|
||||
tests::detail::read_write_core_nan<float_t>(
|
||||
iresearch::read_zvfloat,
|
||||
iresearch::write_zvfloat
|
||||
irs::read_zvfloat,
|
||||
irs::write_zvfloat
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -300,14 +305,14 @@ TEST(store_utils_tests, zvdouble_read_write) {
|
|||
-19274316.123,
|
||||
-98743098097.34352532
|
||||
},
|
||||
iresearch::read_zvdouble,
|
||||
iresearch::write_zvdouble
|
||||
irs::read_zvdouble,
|
||||
irs::write_zvdouble
|
||||
);
|
||||
|
||||
/* NaN case */
|
||||
tests::detail::read_write_core_nan<double_t>(
|
||||
iresearch::read_zvdouble,
|
||||
iresearch::write_zvdouble
|
||||
irs::read_zvdouble,
|
||||
irs::write_zvdouble
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -322,8 +327,8 @@ TEST( store_utils_tests, size_read_write) {
|
|||
size_t(12371792192121),
|
||||
size_t(9719496156)
|
||||
},
|
||||
iresearch::read_size,
|
||||
iresearch::write_size);
|
||||
irs::read_size,
|
||||
irs::write_size);
|
||||
}
|
||||
|
||||
TEST(store_utils_tests, zvint_read_write) {
|
||||
|
@ -337,8 +342,8 @@ TEST(store_utils_tests, zvint_read_write) {
|
|||
-911728376,
|
||||
-10725017
|
||||
},
|
||||
iresearch::read_zvint,
|
||||
iresearch::write_zvint);
|
||||
irs::read_zvint,
|
||||
irs::write_zvint);
|
||||
}
|
||||
|
||||
TEST(store_utils_tests, zvlong_read_write) {
|
||||
|
@ -352,8 +357,8 @@ TEST(store_utils_tests, zvlong_read_write) {
|
|||
-9184236868362391274LL,
|
||||
-91724962191921979LL
|
||||
},
|
||||
iresearch::read_zvlong,
|
||||
iresearch::write_zvlong);
|
||||
irs::read_zvlong,
|
||||
irs::write_zvlong);
|
||||
}
|
||||
|
||||
TEST(store_utils_tests, std_string_read_write) {
|
||||
|
@ -366,21 +371,21 @@ TEST(store_utils_tests, std_string_read_write) {
|
|||
std::string("lazy p1230142hlds"),
|
||||
std::string("dob sdofjasoufdsa")
|
||||
},
|
||||
iresearch::read_string<std::string>,
|
||||
iresearch::write_string<std::string>);
|
||||
irs::read_string<std::string>,
|
||||
irs::write_string<std::string>);
|
||||
}
|
||||
|
||||
TEST(store_utils_tests, bytes_read_write) {
|
||||
tests::detail::read_write_core<bstring>(
|
||||
{
|
||||
bstring(),
|
||||
bstring(iresearch::ref_cast<byte_type>(iresearch::string_ref("qalsdflsajfd"))),
|
||||
bstring(iresearch::ref_cast<byte_type>(iresearch::string_ref("jfdldsflaflj"))),
|
||||
bstring(iresearch::ref_cast<byte_type>(iresearch::string_ref("102174174010"))),
|
||||
bstring(iresearch::ref_cast<byte_type>(iresearch::string_ref("0182ljdskfaof")))
|
||||
bstring(irs::ref_cast<byte_type>(irs::string_ref("qalsdflsajfd"))),
|
||||
bstring(irs::ref_cast<byte_type>(irs::string_ref("jfdldsflaflj"))),
|
||||
bstring(irs::ref_cast<byte_type>(irs::string_ref("102174174010"))),
|
||||
bstring(irs::ref_cast<byte_type>(irs::string_ref("0182ljdskfaof")))
|
||||
},
|
||||
iresearch::read_string<bstring>,
|
||||
iresearch::write_string<bstring>);
|
||||
irs::read_string<bstring>,
|
||||
irs::write_string<bstring>);
|
||||
}
|
||||
|
||||
TEST( store_utils_tests, string_vector_read_write) {
|
||||
|
@ -392,10 +397,11 @@ TEST( store_utils_tests, string_vector_read_write) {
|
|||
"lazy", "dog", "mustard"
|
||||
};
|
||||
|
||||
iresearch::bytes_output out;
|
||||
irs::bstring buf;
|
||||
irs::bytes_output out(buf);
|
||||
write_strings(out, src);
|
||||
|
||||
iresearch::bytes_input in(out);
|
||||
irs::bytes_input in(buf);
|
||||
const container_t readed = read_strings<container_t>(in);
|
||||
|
||||
ASSERT_EQ(src, readed);
|
||||
|
@ -674,18 +680,19 @@ TEST(store_utils_tests, avg_encode_block_read_write) {
|
|||
std::vector<uint64_t> buf; // temporary buffer for bit packing
|
||||
buf.resize(values.size());
|
||||
|
||||
irs::bytes_output out;
|
||||
irs::bstring out_buf;
|
||||
irs::bytes_output out(out_buf);
|
||||
irs::encode::avg::write_block(
|
||||
out, stats.first, stats.second, avg_encoded.data(), avg_encoded.size(), buf.data()
|
||||
);
|
||||
|
||||
ASSERT_EQ(
|
||||
irs::bytes_io<uint64_t>::vsize(step) + irs::bytes_io<uint64_t>::vsize(step) + irs::bytes_io<uint32_t>::vsize(irs::encode::bitpack::ALL_EQUAL) + irs::bytes_io<uint64_t>::vsize(0), // base + avg + bits + single value
|
||||
out.size()
|
||||
out_buf.size()
|
||||
);
|
||||
|
||||
{
|
||||
irs::bytes_input in(out);
|
||||
irs::bytes_input in(out_buf);
|
||||
const uint64_t base = in.read_vlong();
|
||||
const uint64_t avg= in.read_vlong();
|
||||
const uint64_t bits = in.read_vint();
|
||||
|
@ -695,20 +702,20 @@ TEST(store_utils_tests, avg_encode_block_read_write) {
|
|||
}
|
||||
|
||||
{
|
||||
irs::bytes_input in(out);
|
||||
irs::bytes_input in(out_buf);
|
||||
ASSERT_TRUE(irs::encode::avg::check_block_rl64(in, step));
|
||||
}
|
||||
|
||||
{
|
||||
uint64_t base, avg;
|
||||
irs::bytes_input in(out);
|
||||
irs::bytes_input in(out_buf);
|
||||
ASSERT_TRUE(irs::encode::avg::read_block_rl64(in, base, avg));
|
||||
ASSERT_EQ(step, base);
|
||||
ASSERT_EQ(step, avg);
|
||||
}
|
||||
|
||||
{
|
||||
irs::bytes_input in(out);
|
||||
irs::bytes_input in(out_buf);
|
||||
|
||||
const uint64_t base = in.read_vlong();
|
||||
const uint64_t avg = in.read_vlong();
|
||||
|
|
|
@ -0,0 +1,194 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Andrey Abramov
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "tests_shared.hpp"
|
||||
#include "store/store_utils.hpp"
|
||||
#include "utils/lz4compression.hpp"
|
||||
#include "utils/delta_compression.hpp"
|
||||
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
|
||||
NS_LOCAL
|
||||
|
||||
struct dummy_compressor final : irs::compression::compressor {
|
||||
virtual irs::bytes_ref compress(irs::byte_type* in, size_t size, irs::bstring& /*buf*/) {
|
||||
return irs::bytes_ref::NIL;
|
||||
}
|
||||
|
||||
virtual void flush(data_output&) { }
|
||||
};
|
||||
|
||||
struct dummy_decompressor final : irs::compression::decompressor {
|
||||
virtual irs::bytes_ref decompress(
|
||||
irs::byte_type* src, size_t src_size,
|
||||
irs::byte_type* dst, size_t dst_size) {
|
||||
return irs::bytes_ref::NIL;
|
||||
}
|
||||
|
||||
virtual bool prepare(data_input&) { return true; }
|
||||
};
|
||||
|
||||
NS_END
|
||||
|
||||
TEST(compression_test, registration) {
|
||||
const irs::compression::type_id type("dummy_compression");
|
||||
|
||||
// check absent
|
||||
{
|
||||
ASSERT_FALSE(irs::compression::exists(type.name()));
|
||||
ASSERT_EQ(nullptr, irs::compression::get_compressor(type.name(), {}));
|
||||
ASSERT_EQ(nullptr, irs::compression::get_decompressor(type.name(), {}));
|
||||
auto visitor = [&type](const irs::string_ref& name) { return name != type.name(); };
|
||||
ASSERT_TRUE(irs::compression::visit(visitor));
|
||||
}
|
||||
|
||||
static size_t calls_count;
|
||||
irs::compression::compression_registrar initial(
|
||||
type,
|
||||
[](const irs::compression::options&) -> irs::compression::compressor::ptr {
|
||||
++calls_count;
|
||||
return std::make_shared<dummy_compressor>();
|
||||
},
|
||||
[]() -> irs::compression::decompressor::ptr {
|
||||
++calls_count;
|
||||
return std::make_shared<dummy_decompressor>();
|
||||
}
|
||||
);
|
||||
ASSERT_TRUE(initial); // registered
|
||||
|
||||
// check registered
|
||||
{
|
||||
ASSERT_TRUE(irs::compression::exists(type.name()));
|
||||
ASSERT_EQ(0, calls_count);
|
||||
ASSERT_NE(nullptr, irs::compression::get_compressor(type.name(), {}));
|
||||
ASSERT_EQ(1, calls_count);
|
||||
ASSERT_NE(nullptr, irs::compression::get_decompressor(type.name(), {}));
|
||||
ASSERT_EQ(2, calls_count);
|
||||
auto visitor = [&type](const irs::string_ref& name) { return name != type.name(); };
|
||||
ASSERT_FALSE(irs::compression::visit(visitor));
|
||||
}
|
||||
|
||||
irs::compression::compression_registrar duplicate(
|
||||
type,
|
||||
[](const irs::compression::options&) -> irs::compression::compressor::ptr { return nullptr; },
|
||||
[]() -> irs::compression::decompressor::ptr { return nullptr; }
|
||||
);
|
||||
ASSERT_FALSE(duplicate); // not registered
|
||||
|
||||
// check registered
|
||||
{
|
||||
ASSERT_TRUE(irs::compression::exists(type.name()));
|
||||
ASSERT_EQ(2, calls_count);
|
||||
ASSERT_NE(nullptr, irs::compression::get_compressor(type.name(), {}));
|
||||
ASSERT_EQ(3, calls_count);
|
||||
ASSERT_NE(nullptr, irs::compression::get_decompressor(type.name(), {}));
|
||||
ASSERT_EQ(4, calls_count);
|
||||
auto visitor = [&type](const irs::string_ref& name) { return name != type.name(); };
|
||||
ASSERT_FALSE(irs::compression::visit(visitor));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(compression_test, lz4) {
|
||||
using namespace iresearch;
|
||||
|
||||
std::vector<size_t> data(2047, 0);
|
||||
std::random_device rnd_device;
|
||||
std::mt19937 mersenne_engine {rnd_device()};
|
||||
std::uniform_int_distribution<size_t> dist {1, 2142152};
|
||||
auto generator = [&dist, &mersenne_engine](){ return dist(mersenne_engine); };
|
||||
|
||||
compression::lz4::lz4decompressor decompressor;
|
||||
compression::lz4::lz4compressor compressor;
|
||||
ASSERT_EQ(0, compressor.acceleration());
|
||||
|
||||
for (size_t i = 0; i < 10; ++i) {
|
||||
std::generate(data.begin(), data.end(), generator);
|
||||
|
||||
bstring compression_buf;
|
||||
bstring data_buf(data.size()*sizeof(size_t), 0);
|
||||
std::memcpy(&data_buf[0], data.data(), data_buf.size());
|
||||
|
||||
ASSERT_EQ(
|
||||
bytes_ref(reinterpret_cast<const byte_type*>(data.data()), data.size()*sizeof(size_t)),
|
||||
bytes_ref(data_buf)
|
||||
);
|
||||
|
||||
const auto compressed = compressor.compress(&data_buf[0], data_buf.size(), compression_buf);
|
||||
ASSERT_EQ(compressed, bytes_ref(compression_buf.c_str(), compressed.size()));
|
||||
|
||||
// lz4 doesn't modify data_buf
|
||||
ASSERT_EQ(
|
||||
bytes_ref(reinterpret_cast<const byte_type*>(data.data()), data.size()*sizeof(size_t)),
|
||||
bytes_ref(data_buf)
|
||||
);
|
||||
|
||||
bstring decompression_buf(data_buf.size(), 0); // ensure we have enough space in buffer
|
||||
const auto decompressed = decompressor.decompress(&compression_buf[0], compressed.size(),
|
||||
&decompression_buf[0], decompression_buf.size());
|
||||
|
||||
ASSERT_EQ(data_buf, decompression_buf);
|
||||
ASSERT_EQ(data_buf, decompressed);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(compression_test, delta) {
|
||||
using namespace iresearch;
|
||||
|
||||
std::vector<uint64_t> data(2047, 0);
|
||||
std::random_device rnd_device;
|
||||
std::mt19937 mersenne_engine {rnd_device()};
|
||||
std::uniform_int_distribution<uint64_t> dist {1, 52};
|
||||
auto generator = [&dist, &mersenne_engine](){ return dist(mersenne_engine); };
|
||||
|
||||
compression::delta_decompressor decompressor;
|
||||
compression::delta_compressor compressor;
|
||||
|
||||
for (size_t i = 0; i < 10; ++i) {
|
||||
std::generate(data.begin(), data.end(), generator);
|
||||
|
||||
bstring compression_buf;
|
||||
bstring data_buf(data.size()*sizeof(size_t), 0);
|
||||
std::memcpy(&data_buf[0], data.data(), data_buf.size());
|
||||
|
||||
ASSERT_EQ(
|
||||
bytes_ref(reinterpret_cast<const byte_type*>(data.data()), data.size()*sizeof(size_t)),
|
||||
bytes_ref(data_buf)
|
||||
);
|
||||
|
||||
const auto compressed = compressor.compress(&data_buf[0], data_buf.size(), compression_buf);
|
||||
ASSERT_EQ(compressed, bytes_ref(compression_buf.c_str(), compressed.size()));
|
||||
|
||||
bstring decompression_buf(data_buf.size(), 0); // ensure we have enough space in buffer
|
||||
const auto decompressed = decompressor.decompress(&compression_buf[0], compressed.size(),
|
||||
&decompression_buf[0], decompression_buf.size());
|
||||
|
||||
ASSERT_EQ(
|
||||
bytes_ref(reinterpret_cast<const byte_type*>(data.data()), data.size()*sizeof(size_t)),
|
||||
bytes_ref(decompression_buf)
|
||||
);
|
||||
ASSERT_EQ(
|
||||
bytes_ref(reinterpret_cast<const byte_type*>(data.data()), data.size()*sizeof(size_t)),
|
||||
bytes_ref(decompressed)
|
||||
);
|
||||
}
|
||||
}
|
|
@ -21,10 +21,6 @@
|
|||
/// @author Vasiliy Nabatchikov
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "store/mmap_directory.hpp"
|
||||
#include "store/store_utils.hpp"
|
||||
#include "utils/singleton.hpp"
|
||||
|
||||
#include "IResearchCommon.h"
|
||||
#include "IResearchFeature.h"
|
||||
#include "IResearchLinkHelper.h"
|
||||
|
@ -49,6 +45,13 @@
|
|||
|
||||
#include "IResearchLink.h"
|
||||
|
||||
#include "index/column_info.hpp"
|
||||
#include "store/mmap_directory.hpp"
|
||||
#include "store/store_utils.hpp"
|
||||
#include "utils/lz4compression.hpp"
|
||||
#include "utils/encryption.hpp"
|
||||
#include "utils/singleton.hpp"
|
||||
|
||||
using namespace std::literals;
|
||||
|
||||
namespace {
|
||||
|
@ -56,7 +59,7 @@ namespace {
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief the storage format used with IResearch writers
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
const irs::string_ref IRESEARCH_STORE_FORMAT("1_1");
|
||||
const irs::string_ref IRESEARCH_STORE_FORMAT("1_2");
|
||||
|
||||
typedef irs::async_utils::read_write_mutex::read_mutex ReadMutex;
|
||||
typedef irs::async_utils::read_write_mutex::write_mutex WriteMutex;
|
||||
|
@ -1090,10 +1093,24 @@ Result IResearchLink::initDataStore(InitCallback const& initCallback, bool sorte
|
|||
_lastCommittedTick = _dataStore._recoveryTick;
|
||||
_flushSubscription.reset(new IResearchFlushSubscription(_dataStore._recoveryTick));
|
||||
|
||||
|
||||
irs::index_writer::init_options options;
|
||||
options.lock_repository = false; // do not lock index, ArangoDB has its own lock
|
||||
options.comparator = sorted ? &_comparer : nullptr; // set comparator if requested
|
||||
|
||||
// setup columnstore compression/encryption if requested by storage engine
|
||||
auto const encrypt = (nullptr != irs::get_encryption(_dataStore._directory->attributes()));
|
||||
if (encrypt) {
|
||||
options.column_info = [](const irs::string_ref& name) -> irs::column_info {
|
||||
// do not waste resources to encrypt primary key column
|
||||
return { irs::compression::lz4::type(), {}, DocumentPrimaryKey::PK() != name };
|
||||
};
|
||||
} else {
|
||||
options.column_info = [](const irs::string_ref& /*name*/) -> irs::column_info {
|
||||
return { irs::compression::lz4::type(), {}, false };
|
||||
};
|
||||
}
|
||||
|
||||
auto openFlags = irs::OM_APPEND;
|
||||
if (!_dataStore._reader) {
|
||||
openFlags |= irs::OM_CREATE;
|
||||
|
|
Loading…
Reference in New Issue