1
0
Fork 0

bug-fix/internal-issue-#591 (#9639) (#9779)

* update iresearch

* update iresearch

* fix compilation errors

* update iresearch
This commit is contained in:
Andrey Abramov 2019-08-21 17:55:24 +03:00 committed by KVS85
parent 73932a784b
commit 35e2ebc729
53 changed files with 3366 additions and 774 deletions

View File

@ -695,7 +695,7 @@ install:
- |
ICU_DIR=${DEPS_DIR}/icu
if [[ -z "$(ls -A ${ICU_DIR})" ]]; then
ICU_URL="https://kent.dl.sourceforge.net/project/icu/ICU4C/57.1/icu4c-57_1-src.tgz"
ICU_URL="https://netix.dl.sourceforge.net/project/icu/ICU4C/57.1/icu4c-57_1-src.tgz"
cd ${DEPS_DIR}
wget -P icu ${ICU_URL}
cd ${ICU_DIR}
@ -721,7 +721,8 @@ install:
travis_retry git clone --depth 1 --recursive --quiet ${BOOST_URL} ${BOOST_DIR} || exit 1
else
#BOOST_URL="https://mirrors.netix.net/sourceforge/b/bo/boost/boost/${BOOST_VERSION}/boost_${BOOST_VERSION//\./_}.tar.gz"
BOOST_URL="https://kent.dl.sourceforge.net/project/boost/boost/${BOOST_VERSION}/boost_${BOOST_VERSION//\./_}.tar.gz"
#BOOST_URL="https://kent.dl.sourceforge.net/project/boost/boost/${BOOST_VERSION}/boost_${BOOST_VERSION//\./_}.tar.gz"
BOOST_URL="https://netix.dl.sourceforge.net/project/boost/boost/${BOOST_VERSION}/boost_${BOOST_VERSION//\./_}.tar.gz"
mkdir -p ${BOOST_DIR}
{ travis_retry wget --quiet -O - ${BOOST_URL} | tar --strip-components=1 -xz -C ${BOOST_DIR}; } || exit 1
fi

View File

@ -187,7 +187,7 @@ install:
############################################################################
- set ICU_DIR=%DEPS_DIR%\icu
- mkdir %ICU_DIR% && cd %ICU_DIR%
- set ICU_URL="https://iweb.dl.sourceforge.net/project/icu/ICU4C/57.1/icu4c-57_1-Win64-msvc10.zip"
- set ICU_URL="https://ayera.dl.sourceforge.net/project/icu/ICU4C/57.1/icu4c-57_1-Win64-msvc10.zip"
- appveyor DownloadFile %ICU_URL% -FileName icu4c-57_1-win64-msvc10.zip
- 7z x icu4c-57_1-win64-msvc10.zip -o. > nul
- set ICU_ROOT=%ICU_DIR%\icu

View File

@ -144,6 +144,8 @@ set(IResearch_core_sources
./utils/encryption.cpp
./utils/ctr_encryption.cpp
./utils/compression.cpp
./utils/delta_compression.cpp
./utils/lz4compression.cpp
./utils/directory_utils.cpp
./utils/file_utils.cpp
./utils/mmap_utils.cpp
@ -220,6 +222,7 @@ set(IResearch_core_headers
./utils/bit_utils.hpp
./utils/block_pool.hpp
./utils/compression.hpp
./utils/lz4compression.hpp
./utils/file_utils.hpp
./utils/fst.hpp
./utils/fst_decl.hpp

View File

@ -254,13 +254,5 @@ analyzer_registrar::analyzer_registrar(
}
}
analyzer_registrar::operator bool() const NOEXCEPT {
return registered_;
}
NS_END // NS_BEGIN(analysis)
NS_END // analysis
NS_END
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------

View File

@ -58,7 +58,11 @@ class IRESEARCH_API analyzer_registrar {
normalizer_f normalizer,
const char* source = nullptr
);
operator bool() const NOEXCEPT;
operator bool() const NOEXCEPT {
return registered_;
}
private:
bool registered_;
};

View File

@ -80,7 +80,7 @@ file_not_found::file_not_found(
error_ += ".";
} else {
error_ += ": ";
error_ + filename.c_str();
error_.append(filename.c_str(), filename.size());
}
}

View File

@ -29,6 +29,7 @@
#include "store/directory.hpp"
#include "index/index_meta.hpp"
#include "index/column_info.hpp"
#include "index/iterators.hpp"
#include "utils/io_utils.hpp"
@ -247,7 +248,7 @@ struct IRESEARCH_API columnstore_writer {
virtual ~columnstore_writer() = default;
virtual void prepare(directory& dir, const segment_meta& meta) = 0;
virtual column_t push_column() = 0;
virtual column_t push_column(const column_info& info) = 0;
virtual void rollback() NOEXCEPT = 0;
virtual bool commit() = 0; // @return was anything actually flushed
}; // columnstore_writer

View File

@ -51,6 +51,7 @@
#include "utils/bit_packing.hpp"
#include "utils/bit_utils.hpp"
#include "utils/bitset.hpp"
#include "utils/lz4compression.hpp"
#include "utils/encryption.hpp"
#include "utils/compression.hpp"
#include "utils/directory_utils.hpp"
@ -141,9 +142,27 @@ NS_END
NS_LOCAL
irs::bytes_ref DUMMY; // placeholder for visiting logic in columnstore
using namespace irs;
using namespace iresearch;
bytes_ref DUMMY; // placeholder for visiting logic in columnstore
class noop_compressor final : compression::compressor {
public:
static compression::compressor::ptr make() {
typedef compression::compressor::ptr ptr;
static noop_compressor INSTANCE;
return ptr(ptr(), &INSTANCE);
}
virtual bytes_ref compress(byte_type* in, size_t size, bstring& /*buf*/) {
return bytes_ref(in, size);
}
virtual void flush(data_output& /*out*/) { }
private:
noop_compressor() = default;
}; // noop_compressor
// ----------------------------------------------------------------------------
// --SECTION-- features
@ -2833,47 +2852,61 @@ bool meta_reader::read(column_meta& column) {
// |Compressed block #1|
// |Compressed block #2|
// ...
// |Bloom Filter| <- not implemented yet
// |Last block #0 key|Block #0 offset|
// |Last block #1 key|Block #1 offset| <-- Columnstore blocks index
// |Last block #2 key|Block #2 offset|
// ...
// |Bloom filter offset| <- not implemented yet
// |Footer|
const uint32_t INDEX_BLOCK_SIZE = 1024;
const size_t MAX_DATA_BLOCK_SIZE = 8192;
// By default we treat columns as a variable length sparse columns
/// @brief Column flags
/// @note by default we treat columns as a variable length sparse columns
enum ColumnProperty : uint32_t {
CP_SPARSE = 0,
CP_DENSE = 1, // keys can be presented as an array indices
CP_FIXED = 2, // fixed length colums
CP_MASK = 4, // column contains no data
CP_DENSE = 1, // keys can be presented as an array indices
CP_FIXED = 1 << 1, // fixed length colums
CP_MASK = 1 << 2, // column contains no data
CP_COLUMN_DENSE = 1 << 3, // column index is dense
CP_COLUMN_ENCRYPT = 1 << 4 // column contains encrypted data
}; // ColumnProperty
ENABLE_BITMASK_ENUM(ColumnProperty);
bool is_good_compression_ratio(size_t raw_size, size_t compressed_size) NOEXCEPT {
// check if compressed is less than 12.5%
return compressed_size < raw_size - (raw_size / 8U);
}
ColumnProperty write_compact(
irs::index_output& out,
irs::compressor& compressor,
const irs::bytes_ref& data) {
index_output& out,
bstring& encode_buf,
encryption::stream* cipher,
compression::compressor& compressor,
bstring& data) {
if (data.empty()) {
out.write_byte(0); // zig_zag_encode32(0) == 0
return CP_MASK;
}
// compressor can only handle size of int32_t, so can use the negative flag as a compression flag
compressor.compress(reinterpret_cast<const char*>(data.c_str()), data.size());
const bytes_ref compressed = compressor.compress(&data[0], data.size(), encode_buf);
if (compressor.size() < data.size()) {
assert(compressor.size() <= irs::integer_traits<int32_t>::const_max);
irs::write_zvint(out, int32_t(compressor.size())); // compressed size
out.write_bytes(compressor.c_str(), compressor.size());
if (is_good_compression_ratio(data.size(), compressed.size())) {
assert(compressed.size() <= irs::integer_traits<int32_t>::const_max);
irs::write_zvint(out, int32_t(compressed.size())); // compressed size
if (cipher) {
cipher->encrypt(out.file_pointer(), const_cast<irs::byte_type*>(compressed.c_str()), compressed.size());
}
out.write_bytes(compressed.c_str(), compressed.size());
irs::write_zvlong(out, data.size() - MAX_DATA_BLOCK_SIZE); // original size
} else {
assert(data.size() <= irs::integer_traits<int32_t>::const_max);
irs::write_zvint(out, int32_t(0) - int32_t(data.size())); // -ve to mark uncompressed
if (cipher) {
cipher->encrypt(out.file_pointer(), const_cast<irs::byte_type*>(data.c_str()), data.size());
}
out.write_bytes(data.c_str(), data.size());
}
@ -2882,7 +2915,8 @@ ColumnProperty write_compact(
void read_compact(
irs::index_input& in,
const irs::decompressor& decompressor,
irs::encryption::stream* cipher,
irs::compression::decompressor* decompressor,
irs::bstring& encode_buf,
irs::bstring& decode_buf) {
const auto size = irs::read_zvint(in);
@ -2904,9 +2938,21 @@ void read_compact(
#else
in.read_bytes(&(decode_buf[0]), buf_size);
#endif // IRESEARCH_DEBUG
if (cipher) {
cipher->decrypt(in.file_pointer() - buf_size, &(decode_buf[0]), buf_size);
}
return;
}
if (IRS_UNLIKELY(!decompressor)) {
throw irs::index_error(string_utils::to_string(
"while reading compact, error: can't decompress block of size %d for whithout decompressor",
size
));
}
irs::string_utils::oversize(encode_buf, buf_size);
#ifdef IRESEARCH_DEBUG
@ -2917,21 +2963,20 @@ void read_compact(
in.read_bytes(&(encode_buf[0]), buf_size);
#endif // IRESEARCH_DEBUG
if (cipher) {
cipher->decrypt(in.file_pointer() - buf_size, &(encode_buf[0]), buf_size);
}
// ensure that we have enough space to store decompressed data
decode_buf.resize(irs::read_zvlong(in) + MAX_DATA_BLOCK_SIZE);
buf_size = decompressor.deflate(
reinterpret_cast<const char*>(encode_buf.c_str()),
buf_size,
reinterpret_cast<char*>(&decode_buf[0]),
decode_buf.size()
const auto decoded = decompressor->decompress(
&encode_buf[0], buf_size,
&decode_buf[0], decode_buf.size()
);
if (!irs::type_limits<irs::type_t::address_t>::valid(buf_size)) {
throw irs::index_error(string_utils::to_string(
"while reading compact, error: invalid buffer size '" IR_SIZE_T_SPECIFIER "'",
buf_size
));
if (decoded.null()) {
throw irs::index_error("error while reading compact");
}
}
@ -3013,6 +3058,7 @@ class index_block {
const auto block_size = math::ceil32(size, packed::BLOCK_SIZE_32);
assert(block_size >= size);
assert(std::is_sorted(keys_, key_));
const auto stats = encode::avg::encode(keys_, key_);
const auto bits = encode::avg::write_block(
out, stats.first, stats.second,
@ -3031,6 +3077,7 @@ class index_block {
const auto block_size = math::ceil64(size, packed::BLOCK_SIZE_64);
assert(block_size >= size);
assert(std::is_sorted(offsets_, offset_));
const auto stats = encode::avg::encode(offsets_, offset_);
const auto bits = encode::avg::write_block(
out, stats.first, stats.second,
@ -3068,22 +3115,41 @@ class index_block {
class writer final : public irs::columnstore_writer {
public:
static const int32_t FORMAT_MIN = 0;
static const int32_t FORMAT_MAX = FORMAT_MIN;
static const int32_t FORMAT_MAX = 1;
static const string_ref FORMAT_NAME;
static const string_ref FORMAT_EXT;
explicit writer(int32_t version) NOEXCEPT
: buf_(2*MAX_DATA_BLOCK_SIZE, 0),
version_(version) {
static_assert(
2*MAX_DATA_BLOCK_SIZE >= INDEX_BLOCK_SIZE*sizeof(uint64_t),
"buffer is not big enough"
);
assert(version >= FORMAT_MIN && version <= FORMAT_MAX);
}
virtual void prepare(directory& dir, const segment_meta& meta) override;
virtual column_t push_column() override;
virtual column_t push_column(const column_info& info) override;
virtual bool commit() override;
virtual void rollback() NOEXCEPT override;
private:
class column final : public irs::columnstore_writer::column_output {
public:
explicit column(writer& ctx)
explicit column(writer& ctx, const compression::type_id& type,
const compression::compressor::ptr& compressor,
encryption::stream* cipher)
: ctx_(&ctx),
blocks_index_(*ctx.alloc_) {
comp_type_(type),
comp_(compressor),
cipher_(cipher),
blocks_index_(*ctx.alloc_),
block_buf_(2*MAX_DATA_BLOCK_SIZE, 0) {
assert(comp_); // ensured by `push_column'
block_buf_.clear(); // reset size to '0'
}
void prepare(doc_id_t key) {
@ -3109,7 +3175,17 @@ class writer final : public irs::columnstore_writer {
void finish() {
auto& out = *ctx_->data_out_;
write_enum(out, ColumnProperty(((column_props_ & CP_DENSE) << 3) | blocks_props_)); // column properties
// evaluate overall column properties
auto column_props = blocks_props_;
if (0 != (column_props_ & CP_DENSE)) { column_props |= CP_COLUMN_DENSE; }
if (cipher_) { column_props |= CP_COLUMN_ENCRYPT; }
write_enum(out, column_props);
if (ctx_->version_ > FORMAT_MIN) {
write_string(out, comp_type_->name());
comp_->flush(out); // flush compression dependent data
}
out.write_vint(block_index_.total()); // total number of items
out.write_vint(max_); // max column key
out.write_vint(avg_block_size_); // avg data block size
@ -3128,7 +3204,9 @@ class writer final : public irs::columnstore_writer {
flush_block();
// finish column blocks index
column_index_.flush(blocks_index_.stream, ctx_->buf_);
assert(ctx_->buf_.size() >= INDEX_BLOCK_SIZE*sizeof(uint64_t));
auto* buf = reinterpret_cast<uint64_t*>(&ctx_->buf_[0]);
column_index_.flush(blocks_index_.stream, buf);
blocks_index_.stream.flush();
}
@ -3137,11 +3215,11 @@ class writer final : public irs::columnstore_writer {
}
virtual void write_byte(byte_type b) override {
block_buf_.write_byte(b);
block_buf_ += b;
}
virtual void write_bytes(const byte_type* b, size_t size) override {
block_buf_.write_bytes(b, size);
block_buf_.append(b, size);
}
virtual void reset() override {
@ -3151,7 +3229,7 @@ class writer final : public irs::columnstore_writer {
}
// reset to previous offset
block_buf_.reset(block_index_.max_offset());
block_buf_.resize(block_index_.max_offset());
block_index_.pop_back();
}
@ -3172,11 +3250,13 @@ class writer final : public irs::columnstore_writer {
max_ = block_index_.max_key();
auto& out = *ctx_->data_out_;
auto* buf = ctx_->buf_;
// write first block key & where block starts
column_index_.push_back(block_index_.min_key(), out.file_pointer());
assert(ctx_->buf_.size() >= INDEX_BLOCK_SIZE*sizeof(uint64_t));
auto* buf = reinterpret_cast<uint64_t*>(&ctx_->buf_[0]);
if (column_index_.full()) {
column_index_.flush(blocks_index_.stream, buf);
}
@ -3192,13 +3272,14 @@ class writer final : public irs::columnstore_writer {
// const auto res = expr0() | expr1();
// otherwise it would violate format layout
auto block_props = block_index_.flush(out, buf);
block_props |= write_compact(out, ctx_->comp_, static_cast<bytes_ref>(block_buf_));
block_props |= write_compact(out, ctx_->buf_, cipher_, *comp_, block_buf_);
length_ += block_buf_.size();
// refresh blocks properties
blocks_props_ &= block_props;
// reset buffer stream after flush
block_buf_.reset();
block_buf_.clear();
// refresh column properties
// column is dense IFF
@ -3208,11 +3289,14 @@ class writer final : public irs::columnstore_writer {
}
writer* ctx_; // writer context
const compression::type_id* comp_type_;
compression::compressor::ptr comp_; // compressor used for column
encryption::stream* cipher_;
uint64_t length_{}; // size of all data blocks in the column
index_block<INDEX_BLOCK_SIZE> block_index_; // current block index (per document key/offset)
index_block<INDEX_BLOCK_SIZE> column_index_; // column block index (per block key/offset)
memory_output blocks_index_; // blocks index
bytes_output block_buf_{ 2*MAX_DATA_BLOCK_SIZE }; // data buffer
bstring block_buf_; // data buffer
doc_id_t max_{ doc_limits::invalid() }; // max key (among flushed blocks)
ColumnProperty blocks_props_{ CP_DENSE | CP_FIXED | CP_MASK }; // aggregated column blocks properties
ColumnProperty column_props_{ CP_DENSE }; // aggregated column block index properties
@ -3221,18 +3305,17 @@ class writer final : public irs::columnstore_writer {
}; // column
memory_allocator* alloc_{ &memory_allocator::global() };
uint64_t buf_[INDEX_BLOCK_SIZE]; // reusable temporary buffer for packing
std::deque<column> columns_; // pointers remain valid
compressor comp_{ 2*MAX_DATA_BLOCK_SIZE };
bstring buf_; // reusable temporary buffer for packing/compression
index_output::ptr data_out_;
std::string filename_;
directory* dir_;
encryption::stream::ptr data_out_cipher_;
int32_t version_;
}; // writer
template<>
std::string file_name<columnstore_writer, segment_meta>(
const segment_meta& meta
) {
std::string file_name<columnstore_writer, segment_meta>(const segment_meta& meta) {
return file_name(meta.name, columns::writer::FORMAT_EXT);
};
@ -3255,19 +3338,50 @@ void writer::prepare(directory& dir, const segment_meta& meta) {
));
}
format_utils::write_header(*data_out, FORMAT_NAME, FORMAT_MAX);
format_utils::write_header(*data_out, FORMAT_NAME, version_);
encryption::stream::ptr data_out_cipher;
if (version_ > FORMAT_MIN) {
bstring enc_header;
auto* enc = get_encryption(dir.attributes());
const auto encrypt = irs::encrypt(filename, *data_out, enc, enc_header, data_out_cipher);
assert(!encrypt || (data_out_cipher && data_out_cipher->block_size()));
UNUSED(encrypt);
}
alloc_ = &directory_utils::get_allocator(dir);
// noexcept block
dir_ = &dir;
data_out_ = std::move(data_out);
data_out_cipher_ = std::move(data_out_cipher);
filename_ = std::move(filename);
}
columnstore_writer::column_t writer::push_column() {
columnstore_writer::column_t writer::push_column(const column_info& info) {
encryption::stream* cipher;
const compression::type_id* compression;
if (version_ > FORMAT_MIN) {
compression = info.compression();
cipher = info.encryption() ? data_out_cipher_.get() : nullptr;
} else {
// we don't support encryption and custom
// compression for 'FORMAT_MIN' version
compression = compression::lz4::type();
cipher = nullptr;
}
auto compressor = compression::get_compressor(*compression, info.options());
if (!compressor) {
compressor = noop_compressor::make();
}
const auto id = columns_.size();
columns_.emplace_back(*this);
columns_.emplace_back(*this, info.compression(), compressor, cipher);
auto& column = columns_.back();
return std::make_pair(id, [&column] (doc_id_t doc) -> column_output& {
@ -3445,7 +3559,10 @@ class sparse_block : util::noncopyable {
const bstring* data_{};
}; // iterator
void load(index_input& in, decompressor& decomp, bstring& buf) {
void load(index_input& in,
compression::decompressor* decomp,
encryption::stream* cipher,
bstring& buf) {
const uint32_t size = in.read_vint(); // total number of entries in a block
if (!size) {
@ -3471,7 +3588,7 @@ class sparse_block : util::noncopyable {
});
// read data
read_compact(in, decomp, buf, data_);
read_compact(in, cipher, decomp, buf, data_);
end_ = index_ + size;
}
@ -3628,7 +3745,10 @@ class dense_block : util::noncopyable {
doc_id_t base_{};
}; // iterator
void load(index_input& in, decompressor& decomp, bstring& buf) {
void load(index_input& in,
compression::decompressor* decomp,
encryption::stream* cipher,
bstring& buf) {
const uint32_t size = in.read_vint(); // total number of entries in a block
if (!size) {
@ -3655,7 +3775,7 @@ class dense_block : util::noncopyable {
});
// read data
read_compact(in, decomp, buf, data_);
read_compact(in, cipher, decomp, buf, data_);
end_ = index_ + size;
}
@ -3805,7 +3925,10 @@ class dense_fixed_offset_block : util::noncopyable {
doc_id_t value_back_{}; // last valid doc id
}; // iterator
void load(index_input& in, decompressor& decomp, bstring& buf) {
void load(index_input& in,
compression::decompressor* decomp,
encryption::stream* cipher,
bstring& buf) {
size_ = in.read_vint(); // total number of entries in a block
if (!size_) {
@ -3830,7 +3953,7 @@ class dense_fixed_offset_block : util::noncopyable {
}
// read data
read_compact(in, decomp, buf, data_);
read_compact(in, cipher, decomp, buf, data_);
}
bool value(doc_id_t key, bytes_ref& out) const {
@ -3945,7 +4068,10 @@ class sparse_mask_block : util::noncopyable {
);
}
void load(index_input& in, decompressor& /*decomp*/, bstring& buf) {
void load(index_input& in,
compression::decompressor* /*decomp*/,
encryption::stream* /*cipher*/,
bstring& buf) {
size_ = in.read_vint(); // total number of entries in a block
if (!size_) {
@ -4062,7 +4188,10 @@ class dense_mask_block {
max_(doc_limits::invalid()) {
}
void load(index_input& in, decompressor& /*decomp*/, bstring& /*buf*/) {
void load(index_input& in,
compression::decompressor* /*decomp*/,
encryption::stream* /*cipher*/,
bstring& /*buf*/) {
const auto size = in.read_vint(); // total number of entries in a block
if (!size) {
@ -4115,7 +4244,7 @@ class read_context
public:
DECLARE_SHARED_PTR(read_context);
static ptr make(const index_input& stream) {
static ptr make(const index_input& stream, encryption::stream* cipher) {
auto clone = stream.reopen(); // reopen thead-safe stream
if (!clone) {
@ -4125,28 +4254,32 @@ class read_context
throw io_error("Failed to reopen columnstore input in");
}
return memory::make_shared<read_context>(std::move(clone));
return memory::make_shared<read_context>(std::move(clone), cipher);
}
read_context(index_input::ptr&& in = index_input::ptr(), const Allocator& alloc = Allocator())
read_context(
index_input::ptr&& in,
encryption::stream* cipher,
const Allocator& alloc = Allocator())
: block_cache_traits<sparse_block, Allocator>::cache_t(typename block_cache_traits<sparse_block, Allocator>::allocator_t(alloc)),
block_cache_traits<dense_block, Allocator>::cache_t(typename block_cache_traits<dense_block, Allocator>::allocator_t(alloc)),
block_cache_traits<dense_fixed_offset_block, Allocator>::cache_t(typename block_cache_traits<dense_fixed_offset_block, Allocator>::allocator_t(alloc)),
block_cache_traits<sparse_mask_block, Allocator>::cache_t(typename block_cache_traits<sparse_mask_block, Allocator>::allocator_t(alloc)),
block_cache_traits<dense_mask_block, Allocator>::cache_t(typename block_cache_traits<dense_mask_block, Allocator>::allocator_t(alloc)),
buf_(INDEX_BLOCK_SIZE*sizeof(uint32_t), 0),
stream_(std::move(in)) {
stream_(std::move(in)),
cipher_(cipher) {
}
template<typename Block, typename... Args>
Block& emplace_back(uint64_t offset, Args&&... args) {
Block& emplace_back(uint64_t offset, compression::decompressor* decomp, bool decrypt, Args&&... args) {
typename block_cache_traits<Block, Allocator>::cache_t& cache = *this;
// add cache entry
auto& block = cache.emplace_back(std::forward<Args>(args)...);
try {
load(block, offset);
load(block, decomp, decrypt, offset);
} catch (...) {
// failed to load block
pop_back<Block>();
@ -4158,9 +4291,9 @@ class read_context
}
template<typename Block>
void load(Block& block, uint64_t offset) {
void load(Block& block, compression::decompressor* decomp, bool decrypt, uint64_t offset) {
stream_->seek(offset); // seek to the offset
block.load(*stream_, decomp_, buf_);
block.load(*stream_, decomp, decrypt ? cipher_ : nullptr, buf_);
}
template<typename Block>
@ -4170,9 +4303,9 @@ class read_context
}
private:
decompressor decomp_; // decompressor
bstring buf_; // temporary buffer for decoding/unpacking
index_input::ptr stream_;
encryption::stream* cipher_; // options cipher stream
}; // read_context
typedef read_context<> read_context_t;
@ -4183,16 +4316,20 @@ class context_provider: private util::noncopyable {
: pool_(std::max(size_t(1), max_pool_size)) {
}
void prepare(index_input::ptr&& stream) NOEXCEPT {
void prepare(index_input::ptr&& stream, encryption::stream::ptr&& cipher) NOEXCEPT {
assert(stream);
stream_ = std::move(stream);
cipher_ = std::move(cipher);
}
bounded_object_pool<read_context_t>::ptr get_context() const {
return pool_.emplace(*stream_);
return pool_.emplace(*stream_, cipher_.get());
}
private:
mutable bounded_object_pool<read_context_t> pool_;
encryption::stream::ptr cipher_;
index_input::ptr stream_;
}; // context_provider
@ -4201,6 +4338,8 @@ class context_provider: private util::noncopyable {
template<typename BlockRef>
const typename BlockRef::block_t& load_block(
const context_provider& ctxs,
compression::decompressor* decomp,
bool decrypt,
BlockRef& ref) {
typedef typename BlockRef::block_t block_t;
@ -4211,7 +4350,7 @@ const typename BlockRef::block_t& load_block(
assert(ctx);
// load block
const auto& block = ctx->template emplace_back<block_t>(ref.offset);
const auto& block = ctx->template emplace_back<block_t>(ref.offset, decomp, decrypt);
// mark block as loaded
if (ref.pblock.compare_exchange_strong(cached, &block)) {
@ -4232,6 +4371,8 @@ const typename BlockRef::block_t& load_block(
template<typename BlockRef>
const typename BlockRef::block_t& load_block(
const context_provider& ctxs,
compression::decompressor* decomp,
bool decrypt,
const BlockRef& ref,
typename BlockRef::block_t& block) {
const auto* cached = ref.pblock.load();
@ -4240,7 +4381,7 @@ const typename BlockRef::block_t& load_block(
auto ctx = ctxs.get_context();
assert(ctx);
ctx->load(block, ref.offset);
ctx->load(block, decomp, decrypt, ref.offset);
cached = &block;
}
@ -4258,12 +4399,13 @@ class column
DECLARE_UNIQUE_PTR(column);
explicit column(ColumnProperty props) NOEXCEPT
: props_(props) {
: props_(props),
encrypted_(0 != (props & CP_COLUMN_ENCRYPT)) {
}
virtual ~column() { }
virtual ~column() = default;
virtual void read(data_input& in, uint64_t* /*buf*/) {
virtual void read(data_input& in, uint64_t* /*buf*/, compression::decompressor::ptr decomp) {
count_ = in.read_vint();
max_ = in.read_vint();
avg_block_size_ = in.read_vint();
@ -4271,25 +4413,30 @@ class column
if (!avg_block_count_) {
avg_block_count_ = count_;
}
decomp_ = decomp;
}
bool encrypted() const NOEXCEPT { return encrypted_; }
doc_id_t max() const NOEXCEPT { return max_; }
virtual size_t size() const NOEXCEPT override { return count_; }
bool empty() const NOEXCEPT { return 0 == size(); }
uint32_t avg_block_size() const NOEXCEPT { return avg_block_size_; }
uint32_t avg_block_count() const NOEXCEPT { return avg_block_count_; }
ColumnProperty props() const NOEXCEPT { return props_; }
compression::decompressor* decompressor() const NOEXCEPT { return decomp_.get(); }
protected:
// same as size() but returns uint32_t to avoid type convertions
uint32_t count() const NOEXCEPT { return count_; }
private:
compression::decompressor::ptr decomp_;
doc_id_t max_{ doc_limits::eof() };
uint32_t count_{};
uint32_t avg_block_size_{};
uint32_t avg_block_count_{};
ColumnProperty props_{ CP_SPARSE };
bool encrypted_{ false }; // cached encryption mark
}; // column
template<typename Column>
@ -4363,7 +4510,7 @@ class column_iterator final: public irs::doc_iterator {
}
try {
const auto& cached = load_block(*column_->ctxs_, *begin_);
const auto& cached = load_block(*column_->ctxs_, column_->decompressor(), column_->encrypted(), *begin_);
if (block_ != cached) {
block_.reset(cached, payload_);
@ -4425,8 +4572,8 @@ class sparse_column final : public column {
: column(props), ctxs_(&ctxs) {
}
virtual void read(data_input& in, uint64_t* buf) override {
column::read(in, buf); // read common header
virtual void read(data_input& in, uint64_t* buf, compression::decompressor::ptr decomp) override {
column::read(in, buf, decomp); // read common header
uint32_t blocks_count = in.read_vint(); // total number of column index blocks
@ -4496,7 +4643,7 @@ class sparse_column final : public column {
return false;
}
const auto& cached = load_block(*ctxs_, *it);
const auto& cached = load_block(*ctxs_, decompressor(), encrypted(), *it);
return cached.value(key, value);
};
@ -4506,7 +4653,7 @@ class sparse_column final : public column {
) const override {
block_t block; // don't cache new blocks
for (auto begin = refs_.begin(), end = refs_.end()-1; begin != end; ++begin) { // -1 for upper bound
const auto& cached = load_block(*ctxs_, *begin, block);
const auto& cached = load_block(*ctxs_, decompressor(), encrypted(), *begin, block);
if (!cached.visit(visitor)) {
return false;
@ -4617,8 +4764,8 @@ class dense_fixed_offset_column final : public column {
: column(prop), ctxs_(&ctxs) {
}
virtual void read(data_input& in, uint64_t* buf) override {
column::read(in, buf); // read common header
virtual void read(data_input& in, uint64_t* buf, compression::decompressor::ptr decomp) override {
column::read(in, buf, decomp); // read common header
size_t blocks_count = in.read_vint(); // total number of column index blocks
@ -4677,17 +4824,15 @@ class dense_fixed_offset_column final : public column {
auto& ref = const_cast<block_ref&>(refs_[block_idx]);
const auto& cached = load_block(*ctxs_, ref);
const auto& cached = load_block(*ctxs_, decompressor(), encrypted(), ref);
return cached.value(key, value);
}
virtual bool visit(
const columnstore_reader::values_visitor_f& visitor
) const override {
virtual bool visit(const columnstore_reader::values_visitor_f& visitor) const override {
block_t block; // don't cache new blocks
for (auto& ref : refs_) {
const auto& cached = load_block(*ctxs_, ref, block);
const auto& cached = load_block(*ctxs_, decompressor(), encrypted(), ref, block);
if (!cached.visit(visitor)) {
return false;
@ -4786,12 +4931,12 @@ class dense_fixed_offset_column<dense_mask_block> final : public column {
: column(prop) {
}
virtual void read(data_input& in, uint64_t* buf) override {
virtual void read(data_input& in, uint64_t* buf, compression::decompressor::ptr decomp) override {
// we treat data in blocks as "garbage" which could be
// potentially removed on merge, so we don't validate
// column properties using such blocks
column::read(in, buf); // read common header
column::read(in, buf, decomp); // read common header
uint32_t blocks_count = in.read_vint(); // total number of column index blocks
@ -4924,25 +5069,25 @@ irs::doc_iterator::ptr dense_fixed_offset_column<dense_mask_block>::iterator() c
typedef std::function<
column::ptr(const context_provider& ctxs, ColumnProperty prop)
> column_factory_f;
// Column | Blocks
const column_factory_f g_column_factories[] { // CP_DENSE | CP_MASK CP_FIXED CP_DENSE
&sparse_column<sparse_block>::make, // 0 | 0 0 0
&sparse_column<dense_block>::make, // 0 | 0 0 1
&sparse_column<sparse_block>::make, // 0 | 0 1 0
&sparse_column<dense_fixed_offset_block>::make, // 0 | 0 1 1
nullptr, /* invalid properties, should never happen */ // 0 | 1 0 0
nullptr, /* invalid properties, should never happen */ // 0 | 1 0 1
&sparse_column<sparse_mask_block>::make, // 0 | 1 1 0
&sparse_column<dense_mask_block>::make, // 0 | 1 1 1
// Column | Blocks
const column_factory_f COLUMN_FACTORIES[] { // CP_COLUMN_DENSE | CP_MASK CP_FIXED CP_DENSE
&sparse_column<sparse_block>::make, // 0 | 0 0 0
&sparse_column<dense_block>::make, // 0 | 0 0 1
&sparse_column<sparse_block>::make, // 0 | 0 1 0
&sparse_column<dense_fixed_offset_block>::make, // 0 | 0 1 1
nullptr, /* invalid properties, should never happen */ // 0 | 1 0 0
nullptr, /* invalid properties, should never happen */ // 0 | 1 0 1
&sparse_column<sparse_mask_block>::make, // 0 | 1 1 0
&sparse_column<dense_mask_block>::make, // 0 | 1 1 1
&sparse_column<sparse_block>::make, // 1 | 0 0 0
&sparse_column<dense_block>::make, // 1 | 0 0 1
&sparse_column<sparse_block>::make, // 1 | 0 1 0
&dense_fixed_offset_column<dense_fixed_offset_block>::make, // 1 | 0 1 1
nullptr, /* invalid properties, should never happen */ // 1 | 1 0 0
nullptr, /* invalid properties, should never happen */ // 1 | 1 0 1
&sparse_column<sparse_mask_block>::make, // 1 | 1 1 0
&dense_fixed_offset_column<dense_mask_block>::make // 1 | 1 1 1
&sparse_column<sparse_block>::make, // 1 | 0 0 0
&sparse_column<dense_block>::make, // 1 | 0 0 1
&sparse_column<sparse_block>::make, // 1 | 0 1 0
&dense_fixed_offset_column<dense_fixed_offset_block>::make, // 1 | 0 1 1
nullptr, /* invalid properties, should never happen */ // 1 | 1 0 0
nullptr, /* invalid properties, should never happen */ // 1 | 1 0 1
&sparse_column<sparse_mask_block>::make, // 1 | 1 1 0
&dense_fixed_offset_column<dense_mask_block>::make // 1 | 1 1 1
};
//////////////////////////////////////////////////////////////////////////////
@ -4969,10 +5114,7 @@ class reader final: public columnstore_reader, public context_provider {
std::vector<column::ptr> columns_;
}; // reader
bool reader::prepare(
const directory& dir,
const segment_meta& meta
) {
bool reader::prepare(const directory& dir, const segment_meta& meta) {
const auto filename = file_name<columnstore_writer>(meta);
bool exists;
@ -4999,18 +5141,28 @@ bool reader::prepare(
}
// check header
format_utils::check_header(
const auto version = format_utils::check_header(
*stream,
writer::FORMAT_NAME,
writer::FORMAT_MIN,
writer::FORMAT_MAX
);
encryption::stream::ptr cipher;
if (version > writer::FORMAT_MIN) {
auto* enc = get_encryption(dir.attributes());
if (irs::decrypt(filename, *stream, enc, cipher)) {
assert(cipher && cipher->block_size());
}
}
// since columns data are too large
// it is too costly to verify checksum of
// the entire file. here we perform cheap
// error detection which could recognize
// some forms of corruption. */
// some forms of corruption
format_utils::read_checksum(*stream);
// seek to data start
@ -5023,8 +5175,9 @@ bool reader::prepare(
for (size_t i = 0, size = columns.capacity(); i < size; ++i) {
// read column properties
const auto props = read_enum<ColumnProperty>(*stream);
const auto factory_id = (props & (~CP_COLUMN_ENCRYPT));
if (props >= IRESEARCH_COUNTOF(g_column_factories)) {
if (factory_id >= IRESEARCH_COUNTOF(COLUMN_FACTORIES)) {
throw index_error(string_utils::to_string(
"Failed to load column id=" IR_SIZE_T_SPECIFIER ", got invalid properties=%d",
i, static_cast<uint32_t>(props)
@ -5032,7 +5185,7 @@ bool reader::prepare(
}
// create column
const auto& factory = g_column_factories[props];
const auto& factory = COLUMN_FACTORIES[factory_id];
if (!factory) {
static_assert(
@ -5054,8 +5207,32 @@ bool reader::prepare(
));
}
compression::decompressor::ptr decomp;
if (version > writer::FORMAT_MIN) {
const auto compression_id = read_string<std::string>(*stream);
decomp = compression::get_decompressor(compression_id);
if (!decomp && !compression::exists(compression_id)) {
throw index_error(string_utils::to_string(
"Failed to load compression '%s' for column id=" IR_SIZE_T_SPECIFIER,
compression_id.c_str(), i));
}
if (decomp && !decomp->prepare(*stream)) {
throw index_error(string_utils::to_string(
"Failed to prepare compression '%s' for column id=" IR_SIZE_T_SPECIFIER,
compression_id.c_str(), i));
}
} else {
// we don't support encryption and custom
// compression for 'FORMAT_MIN' version
decomp = compression::get_decompressor(compression::lz4::type());
assert(decomp);
}
try {
column->read(*stream, buf);
column->read(*stream, buf, decomp);
} catch (...) {
IR_FRMT_ERROR("Failed to load column id=" IR_SIZE_T_SPECIFIER, i);
@ -5067,7 +5244,7 @@ bool reader::prepare(
}
// noexcept
context_provider::prepare(std::move(stream));
context_provider::prepare(std::move(stream), std::move(cipher));
columns_ = std::move(columns);
return true;
@ -5300,7 +5477,7 @@ class format10 : public irs::version10::format {
virtual column_meta_writer::ptr get_column_meta_writer() const override;
virtual column_meta_reader::ptr get_column_meta_reader() const override final;
virtual columnstore_writer::ptr get_columnstore_writer() const override final;
virtual columnstore_writer::ptr get_columnstore_writer() const override;
virtual columnstore_reader::ptr get_columnstore_reader() const override final;
virtual postings_writer::ptr get_postings_writer(bool volatile_state) const override;
@ -5378,7 +5555,9 @@ column_meta_reader::ptr format10::get_column_meta_reader() const {
}
columnstore_writer::ptr format10::get_columnstore_writer() const {
return memory::make_unique<columns::writer>();
return memory::make_unique<columns::writer>(
int32_t(columns::writer::FORMAT_MIN)
);
}
columnstore_reader::ptr format10::get_columnstore_reader() const {
@ -5407,7 +5586,7 @@ REGISTER_FORMAT(::format10);
// --SECTION-- format11
// ----------------------------------------------------------------------------
class format11 final : public format10 {
class format11 : public format10 {
public:
DECLARE_FORMAT_TYPE();
DECLARE_FACTORY();
@ -5421,6 +5600,11 @@ class format11 final : public format10 {
virtual segment_meta_writer::ptr get_segment_meta_writer() const override final;
virtual column_meta_writer::ptr get_column_meta_writer() const override final;
protected:
explicit format11(const irs::format::type_id& type) NOEXCEPT
: format10(type) {
}
}; // format11
index_meta_writer::ptr format11::get_index_meta_writer() const {
@ -5460,6 +5644,36 @@ column_meta_writer::ptr format11::get_column_meta_writer() const {
DEFINE_FORMAT_TYPE_NAMED(::format11, "1_1");
REGISTER_FORMAT(::format11);
// ----------------------------------------------------------------------------
// --SECTION-- format12
// ----------------------------------------------------------------------------
class format12 final : public format11 {
public:
DECLARE_FORMAT_TYPE();
DECLARE_FACTORY();
format12() NOEXCEPT : format11(format12::type()) { }
virtual columnstore_writer::ptr get_columnstore_writer() const override final;
}; // format12
columnstore_writer::ptr format12::get_columnstore_writer() const {
return memory::make_unique<columns::writer>(
int32_t(columns::writer::FORMAT_MAX)
);
}
/*static*/ irs::format::ptr format12::make() {
static const ::format12 INSTANCE;
// aliasing constructor
return irs::format::ptr(irs::format::ptr(), &INSTANCE);
}
DEFINE_FORMAT_TYPE_NAMED(::format12, "1_2");
REGISTER_FORMAT(::format12);
NS_END
NS_ROOT
@ -5469,6 +5683,7 @@ void init() {
#ifndef IRESEARCH_DLL
REGISTER_FORMAT(::format10);
REGISTER_FORMAT(::format11);
REGISTER_FORMAT(::format12);
#endif
}

View File

@ -0,0 +1,61 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Andrey Abramov
////////////////////////////////////////////////////////////////////////////////
#ifndef IRESEARCH_COLUMN_INFO_H
#define IRESEARCH_COLUMN_INFO_H
#include "utils/string.hpp"
#include "utils/compression.hpp"
#include <functional>
NS_ROOT
////////////////////////////////////////////////////////////////////////////////
/// @class column_info
////////////////////////////////////////////////////////////////////////////////
class column_info {
public:
column_info(const compression::type_id& compression,
const compression::options& options,
bool encryption) NOEXCEPT
: compression_(&compression),
options_(options),
encryption_(encryption) {
}
const compression::type_id& compression() const NOEXCEPT { return *compression_; }
const compression::options& options() const NOEXCEPT { return options_; }
bool encryption() const NOEXCEPT { return encryption_; }
private:
const compression::type_id* compression_;
const compression::options options_;
bool encryption_;
}; // column_info
typedef std::function<column_info(const string_ref)> column_info_provider_t;
NS_END
#endif // IRESEARCH_COLUMN_INFO_H

View File

@ -292,6 +292,7 @@ directory_reader_impl::directory_reader_impl(
std::unordered_map<string_ref, size_t> reuse_candidates; // map by segment name to old segment id
for(size_t i = 0, count = cached_impl ? cached_impl->meta_.meta.size() : 0; i < count; ++i) {
assert(cached_impl); // ensured by loop condition above
auto itr = reuse_candidates.emplace(
cached_impl->meta_.meta.segment(i).meta.name, i
);
@ -366,4 +367,4 @@ NS_END
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// -----------------------------------------------------------------------------

View File

@ -38,6 +38,7 @@
#include "utils/bit_utils.hpp"
#include "utils/io_utils.hpp"
#include "utils/log.hpp"
#include "utils/lz4compression.hpp"
#include "utils/map_utils.hpp"
#include "utils/memory.hpp"
#include "utils/object_pool.hpp"
@ -55,6 +56,12 @@ using namespace irs;
const byte_block_pool EMPTY_POOL;
const column_info NORM_COLUMN{
compression::lz4::type(),
compression::options(),
false
};
// -----------------------------------------------------------------------------
// --SECTION-- helpers
// -----------------------------------------------------------------------------
@ -784,7 +791,9 @@ void field_data::reset(doc_id_t doc_id) {
data_output& field_data::norms(columnstore_writer& writer) {
if (!norms_) {
auto handle = writer.push_column();
// FIXME encoder for norms???
// do not encrypt norms
auto handle = writer.push_column(NORM_COLUMN);
norms_ = std::move(handle.second);
meta_.norm = handle.first;
}

View File

@ -27,8 +27,8 @@
#include "comparer.hpp"
#include "formats/format_utils.hpp"
#include "search/exclusion.hpp"
#include "utils/bitset.hpp"
#include "utils/bitvector.hpp"
#include "utils/compression.hpp"
#include "utils/directory_utils.hpp"
#include "utils/index_utils.hpp"
#include "utils/string_utils.hpp"
@ -47,6 +47,11 @@ typedef range<irs::segment_writer::update_context> update_contexts_ref;
const size_t NON_UPDATE_RECORD = irs::integer_traits<size_t>::const_max; // non-update
const irs::column_info_provider_t DEFAULT_COLUMN_INFO = [](const irs::string_ref&) {
// no compression, no encryption
return irs::column_info{ irs::compression::raw::type(), {}, false };
};
struct flush_segment_context {
const size_t doc_id_begin_; // starting doc_id to consider in 'segment.meta' (inclusive)
const size_t doc_id_end_; // ending doc_id to consider in 'segment.meta' (exclusive)
@ -383,8 +388,7 @@ bool map_removals(
const candidates_mapping_t& candidates_mapping,
const irs::merge_writer& merger,
irs::readers_cache& readers,
irs::document_mask& docs_mask
) {
irs::document_mask& docs_mask) {
assert(merger);
for (auto& mapping : candidates_mapping) {
@ -594,7 +598,14 @@ index_writer::active_segment_context::active_segment_context(
flush_ctx_(flush_ctx),
pending_segment_context_offset_(pending_segment_context_offset),
segments_active_(&segments_active) {
assert(!flush_ctx || flush_ctx->pending_segment_contexts_[pending_segment_context_offset_].segment_ == ctx_); // thread-safe because pending_segment_contexts_ is a deque
#ifdef IRESEARCH_DEBUG
if (flush_ctx) {
// ensure there are no active struct update operations (only needed for assert)
SCOPED_LOCK_NAMED(flush_ctx->mutex_, lock);
// assert that flush_ctx and ctx are compatible
assert(flush_ctx->pending_segment_contexts_[pending_segment_context_offset_].segment_ == ctx_);
}
#endif
if (ctx_) {
++*segments_active_; // track here since garanteed to have 1 ref per active segment
@ -602,8 +613,7 @@ index_writer::active_segment_context::active_segment_context(
}
index_writer::active_segment_context::active_segment_context(
active_segment_context&& other
) NOEXCEPT
active_segment_context&& other) NOEXCEPT
: ctx_(std::move(other.ctx_)),
flush_ctx_(std::move(other.flush_ctx_)),
pending_segment_context_offset_(std::move(other.pending_segment_context_offset_)),
@ -723,6 +733,7 @@ index_writer::documents_context::~documents_context() NOEXCEPT {
void index_writer::documents_context::reset() NOEXCEPT {
tick_ = 0; // reset tick
auto& ctx = segment_.ctx();
if (!ctx) {
@ -1025,16 +1036,17 @@ void index_writer::flush_context::reset() NOEXCEPT {
index_writer::segment_context::segment_context(
directory& dir,
segment_meta_generator_t&& meta_generator,
const comparer* comparator
): active_count_(0),
buffered_docs_(0),
dirty_(false),
dir_(dir),
meta_generator_(std::move(meta_generator)),
uncomitted_doc_id_begin_(doc_limits::min()),
uncomitted_generation_offset_(0),
uncomitted_modification_queries_(0),
writer_(segment_writer::make(dir_, comparator)) {
const column_info_provider_t& column_info,
const comparer* comparator)
: active_count_(0),
buffered_docs_(0),
dirty_(false),
dir_(dir),
meta_generator_(std::move(meta_generator)),
uncomitted_doc_id_begin_(doc_limits::min()),
uncomitted_generation_offset_(0),
uncomitted_modification_queries_(0),
writer_(segment_writer::make(dir_, column_info, comparator)) {
assert(meta_generator_);
}
@ -1081,9 +1093,9 @@ uint64_t index_writer::segment_context::flush() {
index_writer::segment_context::ptr index_writer::segment_context::make(
directory& dir,
segment_meta_generator_t&& meta_generator,
const comparer* comparator
) {
return memory::make_shared<segment_context>(dir, std::move(meta_generator), comparator);
const column_info_provider_t& column_info,
const comparer* comparator) {
return memory::make_shared<segment_context>(dir, std::move(meta_generator), column_info, comparator);
}
segment_writer::update_context index_writer::segment_context::make_update_context() {
@ -1094,8 +1106,7 @@ segment_writer::update_context index_writer::segment_context::make_update_contex
}
segment_writer::update_context index_writer::segment_context::make_update_context(
const filter& filter
) {
const filter& filter) {
auto generation = ++uncomitted_generation_offset_; // increment generation due to removal
auto update_id = modification_queries_.size();
@ -1108,8 +1119,7 @@ segment_writer::update_context index_writer::segment_context::make_update_contex
}
segment_writer::update_context index_writer::segment_context::make_update_context(
const std::shared_ptr<filter>& filter
) {
const std::shared_ptr<filter>& filter) {
assert(filter);
auto generation = ++uncomitted_generation_offset_; // increment generation due to removal
auto update_id = modification_queries_.size();
@ -1123,8 +1133,7 @@ segment_writer::update_context index_writer::segment_context::make_update_contex
}
segment_writer::update_context index_writer::segment_context::make_update_context(
filter::ptr&& filter
) {
filter::ptr&& filter) {
assert(filter);
auto generation = ++uncomitted_generation_offset_; // increment generation due to removal
auto update_id = modification_queries_.size();
@ -1200,9 +1209,10 @@ index_writer::index_writer(
size_t segment_pool_size,
const segment_options& segment_limits,
const comparer* comparator,
const column_info_provider_t& column_info,
index_meta&& meta,
committed_state_t&& committed_state
) NOEXCEPT :
committed_state_t&& committed_state)
: column_info_(column_info),
comparator_(comparator),
cached_readers_(dir),
codec_(codec),
@ -1216,6 +1226,7 @@ index_writer::index_writer(
writer_(codec->get_index_meta_writer()),
write_lock_(std::move(lock)),
write_lock_file_ref_(std::move(lock_file_ref)) {
assert(column_info); // ensured by 'make'
assert(codec);
flush_context_.store(&flush_context_pool_[0]);
@ -1256,6 +1267,9 @@ void index_writer::clear() {
pending_meta.update_generation(meta_); // clone index metadata generation
pending_meta.seg_counter_.store(meta_.counter()); // ensure counter() >= max(seg#)
// rollback already opened transaction if any
writer_->rollback();
// write 1st phase of index_meta transaction
if (!writer_->prepare(dir, pending_meta)) {
throw illegal_state();
@ -1288,8 +1302,7 @@ index_writer::ptr index_writer::make(
directory& dir,
format::ptr codec,
OpenMode mode,
const init_options& opts /*= init_options()*/
) {
const init_options& opts /*= init_options()*/) {
std::vector<index_file_refs::ref_t> file_refs;
index_lock::ptr lock;
index_file_refs::ref_t lockfile_ref;
@ -1351,6 +1364,7 @@ index_writer::ptr index_writer::make(
opts.segment_pool_size,
segment_options(opts),
opts.comparator,
opts.column_info ? opts.column_info : DEFAULT_COLUMN_INFO,
std::move(meta),
std::move(comitted_state)
);
@ -1385,8 +1399,7 @@ uint64_t index_writer::buffered_docs() const {
bool index_writer::consolidate(
const consolidation_policy_t& policy,
format::ptr codec /*= nullptr*/,
const merge_writer::flush_progress_t& progress /*= {}*/
) {
const merge_writer::flush_progress_t& progress /*= {}*/) {
REGISTER_TIMER_DETAILED();
if (!codec) {
@ -1490,7 +1503,7 @@ bool index_writer::consolidate(
consolidation_segment.meta.name = file_name(meta_.increment()); // increment active meta, not fn arg
ref_tracking_directory dir(dir_); // track references for new segment
merge_writer merger(dir, comparator_);
merge_writer merger(dir, column_info_, comparator_);
merger.reserve(candidates.size());
// add consolidated segments to the merge_writer
@ -1669,8 +1682,7 @@ bool index_writer::consolidate(
bool index_writer::import(
const index_reader& reader,
format::ptr codec /*= nullptr*/,
const merge_writer::flush_progress_t& progress /*= {}*/
) {
const merge_writer::flush_progress_t& progress /*= {}*/) {
if (!reader.live_docs_count()) {
return true; // skip empty readers since no documents to import
}
@ -1685,7 +1697,7 @@ bool index_writer::import(
segment.meta.name = file_name(meta_.increment());
segment.meta.codec = codec;
merge_writer merger(dir);
merge_writer merger(dir, column_info_, comparator_);
merger.reserve(reader.size());
for (auto& segment : reader) {
@ -1773,11 +1785,12 @@ index_writer::flush_context_ptr index_writer::get_flush_context(bool shared /*=
}
index_writer::active_segment_context index_writer::get_segment_context(
flush_context& ctx
) {
auto segments_active_decrement =
irs::make_finally([this]()->void { --segments_active_; }); // release reservation (delcare before aquisition since operator++() is noexcept)
auto segments_active = ++segments_active_; // increment counter to aquire reservation, if another thread tries to reserve last context then it'll be over limit
flush_context& ctx) {
// release reservation (delcare before aquisition since operator++() is noexcept)
auto segments_active_decrement = irs::make_finally([this]()->void { --segments_active_; });
// increment counter to aquire reservation, if another thread
// tries to reserve last context then it'll be over limit
auto segments_active = ++segments_active_;
auto segment_count_max = segment_limits_.segment_count_max.load();
// no free segment_context available and maximum number of segments reached
@ -1794,7 +1807,6 @@ index_writer::active_segment_context index_writer::get_segment_context(
); // only nodes of type 'pending_segment_context' are added to 'pending_segment_contexts_freelist_'
if (freelist_node) {
assert(ctx.pending_segment_contexts_[freelist_node->value].segment_ == freelist_node->segment_); // thread-safe because pending_segment_contexts_ is a deque
assert(freelist_node->segment_.use_count() == 1); // +1 for the reference in 'pending_segment_contexts_'
assert(!freelist_node->segment_->dirty_);
return active_segment_context(
@ -1809,14 +1821,16 @@ index_writer::active_segment_context index_writer::get_segment_context(
auto meta_generator = [this]()->segment_meta {
return segment_meta(file_name(meta_.increment()), codec_);
};
auto segment_ctx =
segment_writer_pool_.emplace(dir_, std::move(meta_generator), comparator_).release();
auto segment_ctx = segment_writer_pool_.emplace(
dir_, std::move(meta_generator),
column_info_, comparator_
).release();
auto segment_memory_max = segment_limits_.segment_memory_max.load();
// recreate writer if it reserved more memory than allowed by current limits
if (segment_memory_max &&
segment_memory_max < segment_ctx->writer_->memory_reserved()) {
segment_ctx->writer_ = segment_writer::make(segment_ctx->dir_, comparator_);
segment_ctx->writer_ = segment_writer::make(segment_ctx->dir_, column_info_, comparator_);
}
return active_segment_context(segment_ctx, segments_active_);

View File

@ -21,10 +21,11 @@
/// @author Vasiliy Nabatchikov
////////////////////////////////////////////////////////////////////////////////
#ifndef IRESEARCH_INDEXWRITER_H
#define IRESEARCH_INDEXWRITER_H
#ifndef IRESEARCH_INDEX_WRITER_H
#define IRESEARCH_INDEX_WRITER_H
#include "field_meta.hpp"
#include "column_info.hpp"
#include "index_meta.hpp"
#include "merge_writer.hpp"
#include "segment_reader.hpp"
@ -53,17 +54,21 @@ class directory_reader;
class readers_cache final : util::noncopyable {
public:
struct key_t {
std::string name;
uint64_t version;
key_t(const segment_meta& meta); // implicit constructor
bool operator<(const key_t& other) const NOEXCEPT {
return name < other.name
|| (name == other.name && version < other.version);
}
bool operator==(const key_t& other) const NOEXCEPT {
return name == other.name && version == other.version;
}
std::string name;
uint64_t version;
};
struct key_hash_t {
size_t operator()(const key_t& key) const NOEXCEPT {
return std::hash<std::string>()(key.name);
@ -108,11 +113,12 @@ ENABLE_BITMASK_ENUM(OpenMode);
/// the same directory simultaneously.
/// Thread safe.
////////////////////////////////////////////////////////////////////////////////
class IRESEARCH_API index_writer:
private atomic_shared_ptr_helper<std::pair<
std::shared_ptr<index_meta>, std::vector<index_file_refs::ref_t>
>>,
private util::noncopyable {
class IRESEARCH_API index_writer
: private atomic_shared_ptr_helper<
std::pair<
std::shared_ptr<index_meta>, std::vector<index_file_refs::ref_t>
>>,
private util::noncopyable {
private:
struct flush_context; // forward declaration
struct segment_context; // forward declaration
@ -122,9 +128,7 @@ class IRESEARCH_API index_writer:
void(*)(flush_context*) // sizeof(std::function<void(flush_context*)>) > sizeof(void(*)(flush_context*))
> flush_context_ptr; // unique pointer required since need ponter declaration before class declaration e.g. for 'documents_context'
typedef std::shared_ptr<
segment_context
> segment_context_ptr; // declaration from segment_context::ptr below
typedef std::shared_ptr<segment_context> segment_context_ptr; // declaration from segment_context::ptr below
//////////////////////////////////////////////////////////////////////////////
/// @brief segment references given out by flush_context to allow tracking
@ -427,7 +431,12 @@ class IRESEARCH_API index_writer:
//////////////////////////////////////////////////////////////////////////////
/// @brief options the the writer should use after creation
//////////////////////////////////////////////////////////////////////////////
struct init_options: public segment_options {
struct init_options : public segment_options {
////////////////////////////////////////////////////////////////////////////
/// @brief returns column info the writer should use for columnstore
////////////////////////////////////////////////////////////////////////////
column_info_provider_t column_info;
////////////////////////////////////////////////////////////////////////////
/// @brief comparator defines physical order of documents in each segment
/// produced by an index_writer.
@ -457,18 +466,14 @@ class IRESEARCH_API index_writer:
};
struct segment_hash {
size_t operator()(
const segment_meta* segment
) const NOEXCEPT {
size_t operator()(const segment_meta* segment) const NOEXCEPT {
return hash_utils::hash(segment->name);
}
}; // segment_hash
struct segment_equal {
size_t operator()(
const segment_meta* lhs,
const segment_meta* rhs
) const NOEXCEPT {
size_t operator()(const segment_meta* lhs,
const segment_meta* rhs) const NOEXCEPT {
return lhs->name == rhs->name;
}
}; // segment_equal
@ -517,6 +522,7 @@ class IRESEARCH_API index_writer:
////////////////////////////////////////////////////////////////////////////
/// @brief Clears the existing index repository by staring an empty index.
/// Previously opened readers still remain valid.
/// @note call will rollback any opened transaction
////////////////////////////////////////////////////////////////////////////
void clear();
@ -801,8 +807,8 @@ class IRESEARCH_API index_writer:
segment_writer::ptr writer_;
index_meta::index_segment_t writer_meta_; // the segment_meta this writer was initialized with
DECLARE_FACTORY(directory& dir, segment_meta_generator_t&& meta_generator, const comparer* comparator);
segment_context(directory& dir, segment_meta_generator_t&& meta_generator, const comparer* comparator);
DECLARE_FACTORY(directory& dir, segment_meta_generator_t&& meta_generator, const column_info_provider_t& column_info, const comparer* comparator);
segment_context(directory& dir, segment_meta_generator_t&& meta_generator, const column_info_provider_t& column_info, const comparer* comparator);
////////////////////////////////////////////////////////////////////////////
/// @brief flush current writer state into a materialized segment
@ -1000,7 +1006,11 @@ class IRESEARCH_API index_writer:
committed_state_t commit; // meta + references of next commit
operator bool() const NOEXCEPT { return ctx && commit; }
void reset() NOEXCEPT { ctx.reset(), commit.reset(); }
void reset() NOEXCEPT {
ctx.reset();
commit.reset();
}
}; // pending_state_t
index_writer(
@ -1011,9 +1021,10 @@ class IRESEARCH_API index_writer:
size_t segment_pool_size,
const segment_options& segment_limits,
const comparer* comparator,
index_meta&& meta,
const column_info_provider_t& column_info,
index_meta&& meta,
committed_state_t&& committed_state
) NOEXCEPT;
);
pending_context_t flush_all(const before_commit_f& before_commit);
@ -1025,6 +1036,7 @@ class IRESEARCH_API index_writer:
void abort(); // aborts transaction
IRESEARCH_API_PRIVATE_VARIABLES_BEGIN
column_info_provider_t column_info_;
const comparer* comparator_;
readers_cache cached_readers_; // readers by segment name
format::ptr codec_;
@ -1048,4 +1060,4 @@ class IRESEARCH_API index_writer:
NS_END
#endif
#endif // IRESEARCH_INDEX_WRITER_H

View File

@ -33,6 +33,7 @@
#include "index/comparer.hpp"
#include "utils/directory_utils.hpp"
#include "utils/log.hpp"
#include "utils/lz4compression.hpp"
#include "utils/type_limits.hpp"
#include "utils/version_utils.hpp"
#include "store/store_utils.hpp"
@ -43,6 +44,12 @@
NS_LOCAL
const irs::column_info NORM_COLUMN{
irs::compression::lz4::type(),
irs::compression::options(),
false
};
// mapping of old doc_id to new doc_id (reader doc_ids are sequential 0 based)
// masked doc_ids have value of MASKED_DOC_ID
typedef std::vector<irs::doc_id_t> doc_id_map_t;
@ -958,8 +965,7 @@ class columnstore {
bool insert(
const irs::sub_reader& reader,
irs::field_id column,
const doc_map_f& doc_map
) {
const doc_map_f& doc_map) {
const auto* column_reader = reader.column_reader(column);
if (!column_reader) {
@ -1010,9 +1016,9 @@ class columnstore {
return true;
}
void reset() {
void reset(const irs::column_info& info) {
if (!empty_) {
column_ = writer_->push_column();
column_ = writer_->push_column(info);
empty_ = true;
}
}
@ -1128,10 +1134,10 @@ bool write_columns(
columnstore& cs,
CompoundIterator& columns,
irs::directory& dir,
const irs::column_info_provider_t& column_info,
const irs::segment_meta& meta,
compound_column_meta_iterator_t& column_meta_itr,
const irs::merge_writer::flush_progress_t& progress
) {
const irs::merge_writer::flush_progress_t& progress) {
REGISTER_TIMER_DETAILED();
assert(cs);
assert(progress);
@ -1159,7 +1165,8 @@ bool write_columns(
column_meta_writer->prepare(dir, meta);
while (column_meta_itr.next()) {
cs.reset();
const auto& column_name = (*column_meta_itr).name;
cs.reset(column_info(column_name));
// visit matched columns from merging segments and
// write all survived values to the new segment
@ -1172,7 +1179,7 @@ bool write_columns(
}
if (!cs.empty()) {
column_meta_writer->write((*column_meta_itr).name, cs.id());
column_meta_writer->write(column_name, cs.id());
}
}
@ -1187,10 +1194,10 @@ bool write_columns(
bool write_columns(
columnstore& cs,
irs::directory& dir,
const irs::column_info_provider_t& column_info,
const irs::segment_meta& meta,
compound_column_meta_iterator_t& column_itr,
const irs::merge_writer::flush_progress_t& progress
) {
const irs::merge_writer::flush_progress_t& progress) {
REGISTER_TIMER_DETAILED();
assert(cs);
assert(progress);
@ -1207,7 +1214,8 @@ bool write_columns(
cmw->prepare(dir, meta);
while (column_itr.next()) {
cs.reset();
const auto& column_name = (*column_itr).name;
cs.reset(column_info(column_name));
// visit matched columns from merging segments and
// write all survived values to the new segment
@ -1216,7 +1224,7 @@ bool write_columns(
}
if (!cs.empty()) {
cmw->write((*column_itr).name, cs.id());
cmw->write(column_name, cs.id());
}
}
@ -1262,7 +1270,7 @@ bool write_fields(
};
while (field_itr.next()) {
cs.reset();
cs.reset(NORM_COLUMN); // FIXME encoder for norms???
auto& field_meta = field_itr.meta();
auto& field_features = field_meta.features;
@ -1339,7 +1347,7 @@ bool write_fields(
};
while (field_itr.next()) {
cs.reset();
cs.reset(NORM_COLUMN); // FIXME encoder for norms???
auto& field_meta = field_itr.meta();
auto& field_features = field_meta.features;
@ -1419,7 +1427,9 @@ merge_writer::reader_ctx::reader_ctx(irs::sub_reader::ptr reader) NOEXCEPT
}
merge_writer::merge_writer() NOEXCEPT
: dir_(noop_directory::instance()) {
: dir_(noop_directory::instance()),
column_info_(nullptr),
comparator_(nullptr) {
}
merge_writer::operator bool() const NOEXCEPT {
@ -1429,8 +1439,7 @@ merge_writer::operator bool() const NOEXCEPT {
bool merge_writer::flush(
tracking_directory& dir,
index_meta::index_segment_t& segment,
const flush_progress_t& progress
) {
const flush_progress_t& progress) {
REGISTER_TIMER_DETAILED();
assert(progress);
assert(!comparator_);
@ -1500,7 +1509,7 @@ bool merge_writer::flush(
}
// write columns
if (!write_columns(cs, dir, segment.meta, columns_meta_itr, progress)) {
if (!write_columns(cs, dir, *column_info_, segment.meta, columns_meta_itr, progress)) {
return false; // flush failure
}
@ -1525,11 +1534,11 @@ bool merge_writer::flush(
bool merge_writer::flush_sorted(
tracking_directory& dir,
index_meta::index_segment_t& segment,
const flush_progress_t& progress
) {
const flush_progress_t& progress) {
REGISTER_TIMER_DETAILED();
assert(progress);
assert(comparator_);
assert(column_info_ && *column_info_);
field_meta_map_t field_meta_map;
compound_column_meta_iterator_t columns_meta_itr;
@ -1617,7 +1626,9 @@ bool merge_writer::flush_sorted(
auto writer = segment.meta.codec->get_columnstore_writer();
writer->prepare(dir, segment.meta);
auto column = writer->push_column();
// get column info for sorted column
const auto info = (*column_info_)(string_ref::NIL);
auto column = writer->push_column(info);
irs::doc_id_t next_id = irs::doc_limits::min();
while (columns_it.next()) {
@ -1674,7 +1685,7 @@ bool merge_writer::flush_sorted(
}
// write columns
if (!write_columns(cs, sorting_doc_it, dir, segment.meta, columns_meta_itr, progress)) {
if (!write_columns(cs, sorting_doc_it, dir, *column_info_, segment.meta, columns_meta_itr, progress)) {
return false; // flush failure
}

View File

@ -26,6 +26,7 @@
#include <vector>
#include "column_info.hpp"
#include "index_meta.hpp"
#include "utils/memory.hpp"
#include "utils/noncopyable.hpp"
@ -55,13 +56,18 @@ class IRESEARCH_API merge_writer: public util::noncopyable {
explicit merge_writer(
directory& dir,
const column_info_provider_t& column_info,
const comparer* comparator = nullptr) NOEXCEPT
: dir_(dir), comparator_(comparator) {
: dir_(dir),
column_info_(&column_info),
comparator_(comparator) {
assert(column_info);
}
merge_writer(merge_writer&& rhs) NOEXCEPT
: dir_(rhs.dir_),
readers_(std::move(rhs.readers_)),
column_info_(rhs.column_info_),
comparator_(rhs.comparator_){
}
@ -118,7 +124,8 @@ class IRESEARCH_API merge_writer: public util::noncopyable {
IRESEARCH_API_PRIVATE_VARIABLES_BEGIN
directory& dir_;
std::vector<reader_ctx> readers_;
const comparer* comparator_{};
const column_info_provider_t* column_info_;
const comparer* comparator_;
IRESEARCH_API_PRIVATE_VARIABLES_END
}; // merge_writer

View File

@ -29,6 +29,7 @@
#include "analysis/token_attributes.hpp"
#include "utils/index_utils.hpp"
#include "utils/log.hpp"
#include "utils/lz4compression.hpp"
#include "utils/map_utils.hpp"
#include "utils/timer_utils.hpp"
#include "utils/type_limits.hpp"
@ -42,10 +43,13 @@ NS_ROOT
segment_writer::stored_column::stored_column(
const string_ref& name,
columnstore_writer& columnstore,
bool cache
) : name(name.c_str(), name.size()) {
const column_info_provider_t& column_info,
bool cache)
: name(name.c_str(), name.size()),
stream(column_info(name)) {
if (!cache) {
std::tie(id, writer) = columnstore.push_column();
auto& info = stream.info();
std::tie(id, writer) = columnstore.push_column(info);
} else {
writer = [this](irs::doc_id_t doc)->columnstore_writer::column_output& {
this->stream.prepare(doc);
@ -77,8 +81,11 @@ doc_id_t segment_writer::begin(
return doc_id_t(docs_cached() + doc_limits::min() - 1); // -1 for 0-based offset
}
segment_writer::ptr segment_writer::make(directory& dir, const comparer* comparator) {
return memory::maker<segment_writer>::make(dir, comparator);
segment_writer::ptr segment_writer::make(
directory& dir,
const column_info_provider_t& column_info,
const comparer* comparator) {
return memory::maker<segment_writer>::make(dir, column_info, comparator);
}
size_t segment_writer::memory_active() const NOEXCEPT {
@ -130,9 +137,11 @@ bool segment_writer::remove(doc_id_t doc_id) {
segment_writer::segment_writer(
directory& dir,
const comparer* comparator
) NOEXCEPT
: fields_(comparator),
const column_info_provider_t& column_info,
const comparer* comparator) NOEXCEPT
: sort_(column_info),
fields_(comparator),
column_info_(&column_info),
dir_(dir),
initialized_(false) {
}
@ -170,6 +179,7 @@ columnstore_writer::column_output& segment_writer::stream(
const hashed_string_ref& name,
const doc_id_t doc_id) {
REGISTER_TIMER_DETAILED();
assert(column_info_);
auto generator = [](
const hashed_string_ref& key,
@ -181,10 +191,10 @@ columnstore_writer::column_output& segment_writer::stream(
// replace original reference to 'name' provided by the caller
// with a reference to the cached copy in 'value'
return map_utils::try_emplace_update_key(
columns_, // container
generator, // key generator
name, // key
name, *col_writer_, nullptr != fields_.comparator() // value // FIXME
columns_, // container
generator, // key generator
name, // key
name, *col_writer_, *column_info_, nullptr != fields_.comparator() // value // FIXME
).first->second.writer(doc_id);
}
@ -206,8 +216,7 @@ void segment_writer::flush_column_meta(const segment_meta& meta) {
struct less_t {
bool operator()(
const stored_column* lhs,
const stored_column* rhs
) const NOEXCEPT {
const stored_column* rhs) const NOEXCEPT {
return lhs->name < rhs->name;
}
};

View File

@ -24,11 +24,13 @@
#ifndef IRESEARCH_TL_DOC_WRITER_H
#define IRESEARCH_TL_DOC_WRITER_H
#include "column_info.hpp"
#include "field_data.hpp"
#include "sorted_column.hpp"
#include "analysis/token_stream.hpp"
#include "formats/formats.hpp"
#include "utils/bitvector.hpp"
#include "utils/compression.hpp"
#include "utils/directory_utils.hpp"
#include "utils/noncopyable.hpp"
#include "utils/type_limits.hpp"
@ -164,7 +166,11 @@ class IRESEARCH_API segment_writer: util::noncopyable {
}; // document
DECLARE_UNIQUE_PTR(segment_writer);
DECLARE_FACTORY(directory& dir, const comparer* comparator);
DECLARE_FACTORY(
directory& dir,
const column_info_provider_t& column_info,
const comparer* comparator
);
struct update_context {
size_t generation;
@ -238,6 +244,7 @@ class IRESEARCH_API segment_writer: util::noncopyable {
stored_column(
const string_ref& name,
columnstore_writer& columnstore,
const column_info_provider_t& column_info,
bool cache
);
@ -248,13 +255,20 @@ class IRESEARCH_API segment_writer: util::noncopyable {
}; // stored_column
struct sorted_column : util::noncopyable {
sorted_column() = default;
explicit sorted_column(
const column_info_provider_t& column_info) NOEXCEPT
: stream(column_info(string_ref::NIL)) { // get compression for sorted column
}
irs::sorted_column stream;
field_id id{ field_limits::invalid() };
}; // sorted_column
segment_writer(directory& dir, const comparer* comparator) NOEXCEPT;
segment_writer(
directory& dir,
const column_info_provider_t& column_info,
const comparer* comparator
) NOEXCEPT;
bool index(
const hashed_string_ref& name,
@ -399,6 +413,7 @@ class IRESEARCH_API segment_writer: util::noncopyable {
std::unordered_set<field_data*> norm_fields_; // document fields for normalization
std::string seg_name_;
field_writer::ptr field_writer_;
const column_info_provider_t* column_info_;
column_meta_writer::ptr col_meta_writer_;
columnstore_writer::ptr col_writer_;
tracking_directory dir_;

View File

@ -26,6 +26,7 @@
#include "comparer.hpp"
#include "utils/type_limits.hpp"
#include "utils/misc.hpp"
#include "utils/lz4compression.hpp"
NS_ROOT
@ -93,7 +94,7 @@ std::pair<doc_map, field_id> sorted_column::flush(
}
// flush sorted data
auto column = writer.push_column();
auto column = writer.push_column(info_);
auto& column_writer = column.second;
new_doc_id = doc_limits::min();
@ -186,15 +187,14 @@ void sorted_column::flush_sparse(
field_id sorted_column::flush(
columnstore_writer& writer,
const doc_map& docmap,
std::vector<std::pair<doc_id_t, doc_id_t>>& buffer
) {
std::vector<std::pair<doc_id_t, doc_id_t>>& buffer) {
assert(docmap.size() < irs::doc_limits::eof());
if (index_.empty()) {
return field_limits::invalid();
}
auto column = writer.push_column();
auto column = writer.push_column(info_);
auto& column_writer = column.second;
// temporarily push sentinel

View File

@ -24,6 +24,7 @@
#ifndef IRESEARCH_SORTED_COLUMN_H
#define IRESEARCH_SORTED_COLUMN_H
#include "column_info.hpp"
#include "formats/formats.hpp"
#include "store/store_utils.hpp"
@ -39,7 +40,9 @@ class sorted_column final : public irs::columnstore_writer::column_output {
public:
typedef std::vector<std::pair<doc_id_t, doc_id_t>> flush_buffer_t;
sorted_column() = default;
explicit sorted_column(const column_info& info)
: info_(info) {
}
void prepare(doc_id_t key) {
assert(index_.empty() || key >= index_.back().first);
@ -54,11 +57,11 @@ class sorted_column final : public irs::columnstore_writer::column_output {
}
virtual void write_byte(byte_type b) override {
data_buf_.write_byte(b);
data_buf_ += b;
}
virtual void write_bytes(const byte_type* b, size_t size) override {
data_buf_.write_bytes(b, size);
data_buf_.append(b, size);
}
virtual void reset() override {
@ -66,7 +69,7 @@ class sorted_column final : public irs::columnstore_writer::column_output {
return;
}
data_buf_.reset(index_.back().second);
data_buf_.resize(index_.back().second);
index_.pop_back();
}
@ -79,7 +82,7 @@ class sorted_column final : public irs::columnstore_writer::column_output {
}
void clear() NOEXCEPT {
data_buf_.reset();
data_buf_.clear();
index_.clear();
}
@ -105,6 +108,10 @@ class sorted_column final : public irs::columnstore_writer::column_output {
return data_buf_.capacity() + index_.capacity()*sizeof(decltype(index_)::value_type);
}
const column_info& info() const NOEXCEPT {
return info_;
}
private:
void write_value(data_output& out, const size_t idx) {
assert(idx + 1 < index_.size());
@ -131,8 +138,9 @@ class sorted_column final : public irs::columnstore_writer::column_output {
flush_buffer_t& buffer
);
bytes_output data_buf_; // FIXME use memory_file or block_pool instead
bstring data_buf_; // FIXME use memory_file or block_pool instead
std::vector<std::pair<irs::doc_id_t, size_t>> index_; // doc_id + offset in 'data_buf_'
column_info info_;
}; // sorted_column
NS_END // ROOT

View File

@ -159,7 +159,7 @@
|| ((_MSC_FULL_VER >= 191326128) && (_MSC_FULL_VER <= 191326132)) \
|| ((_MSC_FULL_VER >= 191426430) && (_MSC_FULL_VER <= 191426433)) \
|| ((_MSC_FULL_VER >= 191526726) && (_MSC_FULL_VER <= 191526732)) \
|| ((_MSC_FULL_VER >= 191627023) && (_MSC_FULL_VER <= 191627031)))
|| ((_MSC_FULL_VER >= 191627023) && (_MSC_FULL_VER <= 191627032)))
#define MSVC2017_3456789_OPTIMIZED_WORKAROUND(...) __VA_ARGS__
#else
#define MSVC2017_3456789_OPTIMIZED_WORKAROUND(...)
@ -336,6 +336,17 @@
////////////////////////////////////////////////////////////////////////////////
// likely/unlikely branch indicator
// macro definitions similar to the ones at
// https://kernelnewbies.org/FAQ/LikelyUnlikely
#if defined(__GNUC__) || defined(__GNUG__)
#define IRS_LIKELY(v) __builtin_expect(!!(v), 1)
#define IRS_UNLIKELY(v) __builtin_expect(!!(v), 0)
#else
#define IRS_LIKELY(v) v
#define IRS_UNLIKELY(v) v
#endif
#ifdef IRESEARCH_DEBUG
#define IRS_ASSERT(CHECK) \
( (CHECK) ? void(0) : []{assert(!#CHECK);}() )

View File

@ -299,28 +299,6 @@ uint32_t write_block(
NS_END // bitpack
NS_END // encode
// ----------------------------------------------------------------------------
// --SECTION-- I/O streams
// ----------------------------------------------------------------------------
/* bytes_output */
bytes_output::bytes_output(size_t capacity) {
buf_.reserve(capacity);
}
bytes_output::bytes_output(bytes_output&& other) NOEXCEPT
: buf_(std::move(other.buf_)) {
}
bytes_output& bytes_output::operator=(bytes_output&& other) NOEXCEPT {
if (this != &other) {
buf_ = std::move(other.buf_);
}
return *this;
}
// ----------------------------------------------------------------------------
// --SECTION-- bytes_ref_input implementation
// ----------------------------------------------------------------------------

View File

@ -356,44 +356,23 @@ FORCE_INLINE bool shift_unpack_32(uint32_t in, uint32_t& out) NOEXCEPT {
//////////////////////////////////////////////////////////////////////////////
class IRESEARCH_API bytes_output final : public data_output {
public:
bytes_output() = default;
explicit bytes_output(size_t capacity);
bytes_output(bytes_output&& rhs) NOEXCEPT;
bytes_output& operator=(bytes_output&& rhs) NOEXCEPT;
void reset(size_t size = 0) {
buf_.resize(size);
explicit bytes_output(bstring& buf) NOEXCEPT
: buf_(&buf) {
}
virtual void write_byte(byte_type b) override {
buf_ += b;
(*buf_) += b;
}
virtual void write_bytes(const byte_type* b, size_t size) override {
buf_.append(b, size);
}
const byte_type* c_str() const NOEXCEPT {
return buf_.c_str();
}
size_t size() const NOEXCEPT {
return buf_.size();
}
size_t capacity() const NOEXCEPT {
return buf_.capacity();
}
operator bytes_ref() const NOEXCEPT {
return buf_;
buf_->append(b, size);
}
virtual void close() override { }
private:
IRESEARCH_API_PRIVATE_VARIABLES_BEGIN
bstring buf_;
bstring* buf_;
IRESEARCH_API_PRIVATE_VARIABLES_END
}; // bytes_output
@ -641,13 +620,11 @@ inline void decode(Iterator begin, Iterator end) {
const auto second = begin+1;
std::transform(second, end, begin, second, std::plus<value_type>());
assert(std::is_sorted(begin, end));
}
template<typename Iterator>
inline void encode(Iterator begin, Iterator end) {
assert(std::distance(begin, end) > 0 && std::is_sorted(begin, end));
assert(std::distance(begin, end) > 0);
typedef typename std::iterator_traits<Iterator>::value_type value_type;
const auto rend = irstd::make_reverse_iterator(begin);

View File

@ -29,14 +29,14 @@
NS_LOCAL
static std::thread::id INVALID;
const auto RW_MUTEX_WAIT_TIMEOUT = std::chrono::milliseconds(100);
NS_END
NS_ROOT
NS_BEGIN(async_utils)
busywait_mutex::busywait_mutex(): owner_(INVALID) {}
busywait_mutex::busywait_mutex(): owner_(std::thread::id()) {}
busywait_mutex::~busywait_mutex() {
assert(try_lock()); // ensure destroying an unlocked mutex
@ -45,18 +45,17 @@ busywait_mutex::~busywait_mutex() {
void busywait_mutex::lock() {
auto this_thread_id = std::this_thread::get_id();
for (auto expected = INVALID;
for (auto expected = std::thread::id();
!owner_.compare_exchange_strong(expected, this_thread_id);
expected = INVALID
) {
assert(this_thread_id != expected); // recursive lock aquisition attempted
expected = std::thread::id()) {
assert(this_thread_id != expected); // recursive lock acquisition attempted
std::this_thread::yield();
}
}
bool busywait_mutex::try_lock() {
auto this_thread_id = std::this_thread::get_id();
auto expected = INVALID;
auto expected = std::thread::id();
return owner_.compare_exchange_strong(expected, this_thread_id);
}
@ -64,9 +63,9 @@ bool busywait_mutex::try_lock() {
void busywait_mutex::unlock() {
auto expected = std::this_thread::get_id();
if (!owner_.compare_exchange_strong(expected, INVALID)) {
if (!owner_.compare_exchange_strong(expected, std::thread::id())) {
// try again since std::thread::id is garanteed to be '==' but may not be bit equal
if (expected == std::this_thread::get_id() && owner_.compare_exchange_strong(expected, INVALID)) {
if (expected == std::this_thread::get_id() && owner_.compare_exchange_strong(expected, std::thread::id())) {
return;
}
@ -77,6 +76,7 @@ void busywait_mutex::unlock() {
read_write_mutex::read_write_mutex() NOEXCEPT
: concurrent_count_(0),
exclusive_count_(0),
exclusive_owner_(std::thread::id()),
exclusive_owner_recursion_count_(0) {
}
@ -100,8 +100,7 @@ void read_write_mutex::lock_read() {
// yield if there is already a writer waiting
// wait for notification (possibly with writers waiting) or no more writers waiting
while (exclusive_count_ && std::cv_status::timeout == reader_cond_.wait_for(lock, std::chrono::milliseconds(100))) {
}
while (exclusive_count_ && std::cv_status::timeout == reader_cond_.wait_for(lock, RW_MUTEX_WAIT_TIMEOUT)) {}
++concurrent_count_;
}
@ -120,7 +119,7 @@ void read_write_mutex::lock_write() {
// wait until lock is held exclusively by the current thread
while (concurrent_count_) {
try {
writer_cond_.wait_for(lock, std::chrono::milliseconds(100));
writer_cond_.wait_for(lock, RW_MUTEX_WAIT_TIMEOUT);
} catch (...) {
// 'wait_for' may throw according to specification
}
@ -218,11 +217,11 @@ void read_write_mutex::unlock(bool exclusive_only /*= false*/) {
--concurrent_count_;
#endif // IRESEARCH_DEBUG
// TODO: this should be changed to SCOPED_LOCK_NAMED, as right now it is not
// FIXME: this should be changed to SCOPED_LOCK_NAMED, as right now it is not
// guaranteed that we can succesfully acquire the mutex here. and if we don't,
// there is no guarantee that the notify_all will wake up queued waiter.
TRY_SCOPED_LOCK_NAMED(mutex_, lock); // try to aquire mutex for use with cond
TRY_SCOPED_LOCK_NAMED(mutex_, lock); // try to acquire mutex for use with cond
// wake only writers since this is a reader
// wake even without lock since writer may be waiting in lock_write() on cond

View File

@ -39,7 +39,7 @@ NS_BEGIN(util)
//////////////////////////////////////////////////////////////////////////////
class IRESEARCH_API const_attribute_store_provider {
public:
virtual ~const_attribute_store_provider() {}
virtual ~const_attribute_store_provider() = default;
virtual const irs::attribute_store& attributes() const NOEXCEPT = 0;
};
@ -49,7 +49,6 @@ class IRESEARCH_API const_attribute_store_provider {
//////////////////////////////////////////////////////////////////////////////
class IRESEARCH_API attribute_store_provider: public const_attribute_store_provider {
public:
virtual ~attribute_store_provider() {}
virtual irs::attribute_store& attributes() NOEXCEPT = 0;
virtual const irs::attribute_store& attributes() const NOEXCEPT override final {
return const_cast<attribute_store_provider*>(this)->attributes();
@ -62,7 +61,7 @@ class IRESEARCH_API attribute_store_provider: public const_attribute_store_provi
//////////////////////////////////////////////////////////////////////////////
class IRESEARCH_API const_attribute_view_provider {
public:
virtual ~const_attribute_view_provider() {}
virtual ~const_attribute_view_provider() = default;
virtual const irs::attribute_view& attributes() const NOEXCEPT = 0;
};
@ -72,7 +71,6 @@ class IRESEARCH_API const_attribute_view_provider {
//////////////////////////////////////////////////////////////////////////////
class IRESEARCH_API attribute_view_provider: public const_attribute_view_provider {
public:
virtual ~attribute_view_provider() {}
virtual irs::attribute_view& attributes() NOEXCEPT = 0;
virtual const irs::attribute_view& attributes() const NOEXCEPT override final {
return const_cast<attribute_view_provider*>(this)->attributes();

View File

@ -36,7 +36,7 @@
NS_ROOT
template<typename Alloc>
class dynamic_bitset_base : irs::compact<0, Alloc>, util::noncopyable {
class dynamic_bitset_base : irs::compact<0, Alloc> {
public:
typedef size_t word_t;
typedef typename std::allocator_traits<Alloc>::template rebind_alloc<word_t> allocator_type;

View File

@ -1,7 +1,7 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2016 by EMC Corporation, All Rights Reserved
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
@ -15,104 +15,193 @@
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is EMC Corporation
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Andrey Abramov
/// @author Vasiliy Nabatchikov
////////////////////////////////////////////////////////////////////////////////
#include "shared.hpp"
#include "error/error.hpp"
#include "utils/register.hpp"
#include "compression.hpp"
#include "utils/string_utils.hpp"
#include "utils/type_limits.hpp"
#include <lz4.h>
// list of statically loaded scorers via init()
#ifndef IRESEARCH_DLL
#include "lz4compression.hpp"
#include "delta_compression.hpp"
#endif
NS_ROOT
NS_LOCAL
void compressor::deleter::operator()(void *p) NOEXCEPT {
LZ4_freeStream(reinterpret_cast<LZ4_stream_t*>(p));
}
compressor::compressor(unsigned int chunk_size):
dict_size_(0),
stream_(LZ4_createStream()) {
string_utils::oversize(buf_, LZ4_COMPRESSBOUND(chunk_size));
}
void compressor::compress(const char* src, size_t size) {
assert(size <= std::numeric_limits<int>::max()); // LZ4 API uses int
auto src_size = static_cast<int>(size);
auto* stream = reinterpret_cast<LZ4_stream_t*>(stream_.get());
// ensure LZ4 dictionary from the previous run is at the start of buf_
{
auto* dict_store = dict_size_ ? &(buf_[0]) : nullptr;
// move the LZ4 dictionary from the previous run to the start of buf_
if (dict_store) {
dict_size_ = LZ4_saveDict(stream, dict_store, dict_size_);
assert(dict_size_ >= 0);
}
string_utils::oversize(buf_, LZ4_compressBound(src_size) + dict_size_);
// reload the LZ4 dictionary if buf_ has changed
if (&(buf_[0]) != dict_store) {
dict_size_ = LZ4_loadDict(stream, &(buf_[0]), dict_size_);
assert(dict_size_ >= 0);
}
struct value{
explicit value(
irs::compression::compressor_factory_f compressor_factory = nullptr,
irs::compression::decompressor_factory_f decompressor_factory = nullptr)
: compressor_factory_(compressor_factory),
decompressor_factory_(decompressor_factory) {
}
auto* buf = &(buf_[dict_size_]);
auto buf_size = static_cast<int>(std::min(
buf_.size() - dict_size_,
static_cast<size_t>(std::numeric_limits<int>::max())) // LZ4 API uses int
);
#if defined(LZ4_VERSION_NUMBER) && (LZ4_VERSION_NUMBER >= 10700)
auto lz4_size = LZ4_compress_fast_continue(stream, src, buf, src_size, buf_size, 0); // 0 == use default acceleration
#else
auto lz4_size = LZ4_compress_limitedOutput_continue(stream, src, buf, src_size, buf_size); // use for LZ4 <= v1.6.0
#endif
if (lz4_size < 0) {
this->size_ = 0;
throw index_error("while compressing, error: LZ4 returned negative size");
bool empty() const NOEXCEPT {
return !compressor_factory_ || !decompressor_factory_;
}
this->data_ = reinterpret_cast<const byte_type*>(buf);
this->size_ = lz4_size;
}
bool operator==(const value& other) const NOEXCEPT {
return compressor_factory_ == other.compressor_factory_ &&
decompressor_factory_ == other.decompressor_factory_;
}
void decompressor::deleter::operator()(void *p) NOEXCEPT {
LZ4_freeStreamDecode(reinterpret_cast<LZ4_streamDecode_t*>(p));
}
bool operator!=(const value& other) const NOEXCEPT {
return !(*this == other);
}
decompressor::decompressor()
: stream_(LZ4_createStreamDecode()) {
}
size_t decompressor::deflate(
const char* src, size_t src_size,
char* dst, size_t dst_size) const {
assert(src_size <= integer_traits<int>::const_max); // LZ4 API uses int
auto& stream = *reinterpret_cast<LZ4_streamDecode_t*>(stream_.get());
const irs::compression::compressor_factory_f compressor_factory_;
const irs::compression::decompressor_factory_f decompressor_factory_;
};
const auto lz4_size = LZ4_decompress_safe_continue(
&stream,
src,
dst,
static_cast<int>(src_size), // LZ4 API uses int
static_cast<int>(std::min(dst_size, static_cast<size_t>(integer_traits<int>::const_max))) // LZ4 API uses int
);
const std::string FILENAME_PREFIX("libcompression-");
return lz4_size < 0
? type_limits<type_t::address_t>::invalid() // corrupted index
: lz4_size;
}
class compression_register
: public irs::tagged_generic_register<irs::string_ref, value,
irs::string_ref, compression_register> {
protected:
virtual std::string key_to_filename(const key_type& key) const override {
std::string filename(FILENAME_PREFIX.size() + key.size(), 0);
std::memcpy(
&filename[0],
FILENAME_PREFIX.c_str(),
FILENAME_PREFIX.size()
);
irs::string_ref::traits_type::copy(
&filename[0] + FILENAME_PREFIX.size(),
key.c_str(),
key.size()
);
return filename;
}
};
NS_END
NS_ROOT
NS_BEGIN(compression)
compression_registrar::compression_registrar(
const type_id& type,
compressor_factory_f compressor_factory,
decompressor_factory_f decompressor_factory,
const char* source /*= nullptr*/) {
string_ref const source_ref(source);
const auto new_entry = ::value(compressor_factory, decompressor_factory);
auto entry = compression_register::instance().set(
type.name(),
new_entry,
source_ref.null() ? nullptr : &source_ref);
registered_ = entry.second;
if (!registered_ && new_entry != entry.first) {
auto* registered_source = compression_register::instance().tag(type.name());
if (source && registered_source) {
IR_FRMT_WARN(
"type name collision detected while registering compression, ignoring: type '%s' from %s, previously from %s",
type.name().c_str(),
source,
registered_source->c_str()
);
} else if (source) {
IR_FRMT_WARN(
"type name collision detected while registering compression, ignoring: type '%s' from %s",
type.name().c_str(),
source
);
} else if (registered_source) {
IR_FRMT_WARN(
"type name collision detected while registering compression, ignoring: type '%s', previously from %s",
type.name().c_str(),
registered_source->c_str()
);
} else {
IR_FRMT_WARN(
"type name collision detected while registering compression, ignoring: type '%s'",
type.name().c_str()
);
}
IR_LOG_STACK_TRACE();
}
}
bool exists(const string_ref& name, bool load_library /*= true*/ ) {
return !compression_register::instance().get(name, load_library).empty();
}
compressor::ptr get_compressor(
const string_ref& name,
const options& opts,
bool load_library /*= true*/) NOEXCEPT {
try {
auto* factory = compression_register::instance().get(name, load_library).compressor_factory_;
return factory ? factory(opts) : nullptr;
} catch (...) {
IR_FRMT_ERROR("Caught exception while getting an analyzer instance");
IR_LOG_EXCEPTION();
}
return nullptr;
}
decompressor::ptr get_decompressor(const string_ref& name, bool load_library /*= true*/) NOEXCEPT {
try {
auto* factory = compression_register::instance().get(name, load_library).decompressor_factory_;
return factory ? factory() : nullptr;
} catch (...) {
IR_FRMT_ERROR("Caught exception while getting an analyzer instance");
IR_LOG_EXCEPTION();
}
return nullptr;
}
void init() {
#ifndef IRESEARCH_DLL
lz4::init();
delta::init();
raw::init();
#endif
}
void load_all(const std::string& path) {
load_libraries(path, FILENAME_PREFIX, "");
}
bool visit(const std::function<bool(const string_ref&)>& visitor) {
compression_register::visitor_t wrapper = [&visitor](const string_ref& key)->bool {
return visitor(key);
};
return compression_register::instance().visit(wrapper);
}
// -----------------------------------------------------------------------------
// --SECTION-- raw implementation
// -----------------------------------------------------------------------------
/*static*/ void raw::init() {
#ifndef IRESEARCH_DLL
// match registration below
REGISTER_COMPRESSION(raw, &raw::compressor, &raw::decompressor);
#endif
}
DEFINE_COMPRESSION_TYPE(iresearch::compression::raw);
REGISTER_COMPRESSION(raw, &raw::compressor, &raw::decompressor);
NS_END // compression
NS_END

View File

@ -1,7 +1,7 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2016 by EMC Corporation, All Rights Reserved
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
@ -15,67 +15,216 @@
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is EMC Corporation
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Andrey Abramov
/// @author Vasiliy Nabatchikov
////////////////////////////////////////////////////////////////////////////////
#ifndef IRESEARCH_COMPRESSION_H
#define IRESEARCH_COMPRESSION_H
#include "string.hpp"
#include "type_id.hpp"
#include "memory.hpp"
#include "noncopyable.hpp"
#include <memory>
#include <map>
// -----------------------------------------------------------------------------
// --SECTION-- compression definition
// -----------------------------------------------------------------------------
#define DECLARE_COMPRESSION_TYPE() DECLARE_TYPE_ID(iresearch::compression::type_id)
#define DEFINE_COMPRESSION_TYPE_NAMED(class_type, class_name) \
DEFINE_TYPE_ID(class_type, iresearch::compression::type_id) { \
static iresearch::compression::type_id type(class_name); \
return type; \
}
#define DEFINE_COMPRESSION_TYPE(class_type) DEFINE_COMPRESSION_TYPE_NAMED(class_type, #class_type)
// -----------------------------------------------------------------------------
// --SECTION-- compression registration
// -----------------------------------------------------------------------------
#define REGISTER_COMPRESSION__(compression_name, compressor_factory, decompressor_factory, line, source) \
static iresearch::compression::compression_registrar compression_registrar ## _ ## line(compression_name::type(), compressor_factory, decompressor_factory, source)
#define REGISTER_COMPRESSION_EXPANDER__(compression_name, compressor_factory, decompressor_factory, file, line) \
REGISTER_COMPRESSION__(compression_name, compressor_factory, decompressor_factory, line, file ":" TOSTRING(line))
#define REGISTER_COMPRESSION(compression_name, compressor_factory, decompressor_factory) \
REGISTER_COMPRESSION_EXPANDER__(compression_name, compressor_factory, decompressor_factory, __FILE__, __LINE__)
NS_ROOT
class IRESEARCH_API compressor: public bytes_ref, private util::noncopyable {
struct data_output;
struct data_input;
NS_BEGIN(compression)
struct options {
enum class Hint : byte_type {
/// @brief use default compressor parameters
DEFAULT = 0,
/// @brief prefer speed over compression ratio
SPEED,
/// @brief prefer compression ratio over speed
COMPRESSION
};
/// @brief
Hint hint{ Hint::DEFAULT };
options(Hint hint = Hint::DEFAULT)
: hint(hint) {
}
};
////////////////////////////////////////////////////////////////////////////////
/// @class compressor
////////////////////////////////////////////////////////////////////////////////
struct IRESEARCH_API compressor {
DECLARE_SHARED_PTR(compressor);
virtual ~compressor() = default;
/// @note caller is allowed to modify data pointed by 'in' up to 'size'
virtual bytes_ref compress(byte_type* in, size_t size, bstring& buf) = 0;
/// @brief flush arbitrary payload relevant to compression
virtual void flush(data_output& /*out*/) { /*NOOP*/ }
};
////////////////////////////////////////////////////////////////////////////////
/// @class compressor
////////////////////////////////////////////////////////////////////////////////
struct IRESEARCH_API decompressor {
DECLARE_SHARED_PTR(decompressor);
virtual ~decompressor() = default;
/// @note caller is allowed to modify data pointed by 'src' up to 'src_size'
/// @note caller is allowed to modify data pointed by 'dst' up to 'dst_size'
virtual bytes_ref decompress(byte_type* src, size_t src_size,
byte_type* dst, size_t dst_size) = 0;
virtual bool prepare(data_input& /*in*/) {
// NOOP
return true;
}
};
////////////////////////////////////////////////////////////////////////////////
/// @class type_id
////////////////////////////////////////////////////////////////////////////////
class IRESEARCH_API type_id : public irs::type_id, private util::noncopyable {
public:
explicit compressor(unsigned int chunk_size);
type_id(const string_ref& name) NOEXCEPT
: name_(name) {
}
operator const type_id*() const NOEXCEPT { return this; }
const string_ref& name() const NOEXCEPT { return name_; }
void compress(const char* src, size_t size);
private:
string_ref name_;
};
inline void compress(const bytes_ref& src) {
compress(ref_cast<char>(src).c_str(), src.size());
typedef irs::compression::compressor::ptr(*compressor_factory_f)(const options&);
typedef irs::compression::decompressor::ptr(*decompressor_factory_f)();
// -----------------------------------------------------------------------------
// --SECTION-- compression registration
// -----------------------------------------------------------------------------
class IRESEARCH_API compression_registrar {
public:
compression_registrar(const compression::type_id& type,
compressor_factory_f compressor_factory,
decompressor_factory_f decompressor_factory,
const char* source = nullptr);
operator bool() const NOEXCEPT {
return registered_;
}
private:
struct IRESEARCH_API deleter {
void operator()(void* p) NOEXCEPT;
};
bool registered_;
};
IRESEARCH_API_PRIVATE_VARIABLES_BEGIN
std::string buf_;
int dict_size_; // the size of the LZ4 dictionary from the previous call
std::unique_ptr<void, deleter> stream_; // hide internal LZ4 implementation
IRESEARCH_API_PRIVATE_VARIABLES_END
}; // compressor
////////////////////////////////////////////////////////////////////////////////
/// @brief checks whether an comopression with the specified name is registered
////////////////////////////////////////////////////////////////////////////////
IRESEARCH_API bool exists(const string_ref& name, bool load_library = true);
class IRESEARCH_API decompressor {
public:
decompressor();
decompressor(const decompressor&) = default;
decompressor& operator=(const decompressor&) = default;
////////////////////////////////////////////////////////////////////////////////
/// @brief creates a compressor by name, or nullptr if not found
////////////////////////////////////////////////////////////////////////////////
IRESEARCH_API compressor::ptr get_compressor(
const string_ref& name,
const options& opts,
bool load_library = true) NOEXCEPT;
// returns number of decompressed bytes,
// or integer_traits<size_t>::const_max in case of error
size_t deflate(
const char* src, size_t src_size,
char* dst, size_t dst_size
) const;
////////////////////////////////////////////////////////////////////////////////
/// @brief creates a compressor by name, or nullptr if not found
////////////////////////////////////////////////////////////////////////////////
inline compressor::ptr get_compressor(
const type_id& type,
const options& opts,
bool load_library = true) NOEXCEPT {
return get_compressor(type.name(), opts, load_library);
}
private:
struct IRESEARCH_API deleter {
void operator()(void* p) NOEXCEPT;
};
////////////////////////////////////////////////////////////////////////////////
/// @brief creates a decompressor by name, or nullptr if not found
////////////////////////////////////////////////////////////////////////////////
IRESEARCH_API decompressor::ptr get_decompressor(
const string_ref& name,
bool load_library = true) NOEXCEPT;
IRESEARCH_API_PRIVATE_VARIABLES_BEGIN
std::unique_ptr<void, deleter> stream_; // hide internal LZ4 implementation
IRESEARCH_API_PRIVATE_VARIABLES_END
}; // decompressor
////////////////////////////////////////////////////////////////////////////////
/// @brief creates a decompressor by name, or nullptr if not found
////////////////////////////////////////////////////////////////////////////////
inline decompressor::ptr get_decompressor(
const type_id& type,
bool load_library = true) NOEXCEPT {
return get_decompressor(type.name(), load_library);
}
NS_END // NS_ROOT
////////////////////////////////////////////////////////////////////////////////
/// @brief for static lib reference all known compressions in lib
/// for shared lib NOOP
/// no explicit call of fn is required, existence of fn is sufficient
////////////////////////////////////////////////////////////////////////////////
IRESEARCH_API void init();
#endif
////////////////////////////////////////////////////////////////////////////////
/// @brief load all compressions from plugins directory
////////////////////////////////////////////////////////////////////////////////
IRESEARCH_API void load_all(const std::string& path);
////////////////////////////////////////////////////////////////////////////////
/// @brief visit all loaded compressions, terminate early if visitor returns false
////////////////////////////////////////////////////////////////////////////////
IRESEARCH_API bool visit(const std::function<bool(const string_ref&)>& visitor);
////////////////////////////////////////////////////////////////////////////////
/// @class raw
/// @brief no compression
////////////////////////////////////////////////////////////////////////////////
struct IRESEARCH_API raw {
DECLARE_COMPRESSION_TYPE();
static void init();
static compression::compressor::ptr compressor(const options& /*opts*/) {
return nullptr;
}
static compression::decompressor::ptr decompressor() {
return nullptr;
}
}; // raw
NS_END // compression
NS_END
#endif // IRESEARCH_COMPRESSION_H

View File

@ -0,0 +1,88 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Andrey Abramov
////////////////////////////////////////////////////////////////////////////////
#include "shared.hpp"
#include "delta_compression.hpp"
#include "store/store_utils.hpp"
NS_LOCAL
irs::compression::delta_compressor COMPRESSOR;
irs::compression::delta_decompressor DECOMPRESSOR;
NS_END
NS_ROOT
NS_BEGIN(compression)
bytes_ref delta_compressor::compress(byte_type* src, size_t size, bstring& buf) {
auto* begin = reinterpret_cast<uint64_t*>(src);
auto* end = reinterpret_cast<uint64_t*>(src + size);
encode::delta::encode(begin, end);
// ensure we have enough space in the worst case
assert(end >= begin);
buf.resize(size_t(std::distance(begin, end))*bytes_io<uint64_t>::const_max_vsize);
auto* out = const_cast<byte_type*>(buf.data());
for (;begin != end; ++begin) {
vwrite(out, zig_zag_encode64(int64_t(*begin)));
}
assert(out >= buf.data());
return { buf.c_str(), size_t(out - buf.data()) };
}
bytes_ref delta_decompressor::decompress(
byte_type* src, size_t src_size,
byte_type* dst, size_t dst_size) {
auto* dst_end = reinterpret_cast<uint64_t*>(dst);
for (const auto* src_end = src + src_size; src != src_end; ++dst_end) {
*dst_end = uint64_t(zig_zag_decode64(vread<uint64_t>(src)));
}
encode::delta::decode(reinterpret_cast<uint64_t*>(dst), dst_end);
return bytes_ref(dst, dst_size);
}
compressor::ptr delta::compressor(const options& /*opts*/) {
return compressor::ptr(compressor::ptr(), &COMPRESSOR);
}
decompressor::ptr delta::decompressor() {
return decompressor::ptr(decompressor::ptr(), &DECOMPRESSOR);
}
void delta::init() {
// match registration below
REGISTER_COMPRESSION(delta, &delta::compressor, &delta::decompressor);
}
DEFINE_COMPRESSION_TYPE(iresearch::compression::delta);
REGISTER_COMPRESSION(delta, &delta::compressor, &delta::decompressor);
NS_END // compression
NS_END

View File

@ -0,0 +1,57 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Andrey Abramov
////////////////////////////////////////////////////////////////////////////////
#ifndef IRESEARCH_DELTA_COMPRESSION_H
#define IRESEARCH_DELTA_COMPRESSION_H
#include "string.hpp"
#include "compression.hpp"
#include "noncopyable.hpp"
NS_ROOT
NS_BEGIN(compression)
class IRESEARCH_API delta_compressor : public compressor, private util::noncopyable {
public:
virtual bytes_ref compress(byte_type* src, size_t size, bstring& out) override final;
}; // delta_compressor
class IRESEARCH_API delta_decompressor : public decompressor, private util::noncopyable {
public:
/// @returns bytes_ref::NIL in case of error
virtual bytes_ref decompress(byte_type* src, size_t src_size,
byte_type* dst, size_t dst_size) override final;
}; // delta_decompressor
struct IRESEARCH_API delta {
DECLARE_COMPRESSION_TYPE();
static void init();
static compression::compressor::ptr compressor(const options& opts);
static compression::decompressor::ptr decompressor();
}; // delta
NS_END // compression
NS_END // NS_ROOT
#endif

View File

@ -28,9 +28,6 @@
NS_LOCAL
// FIXME
// - FIX segment_consolidate_clear_commit
/// @returns percentage of live documents
inline double_t fill_factor(const irs::segment_meta& segment) NOEXCEPT {
return double(segment.live_docs_count)/segment.docs_count;
@ -41,6 +38,8 @@ inline size_t size_without_removals(const irs::segment_meta& segment) NOEXCEPT{
return size_t(segment.size * fill_factor(segment));
}
NS_BEGIN(tier)
struct segment_stat {
segment_stat(const irs::segment_meta& meta) NOEXCEPT
: meta(&meta),
@ -64,6 +63,10 @@ struct segment_stat {
return lhs.size < rhs.size;
}
operator const irs::segment_meta*() const NOEXCEPT {
return meta;
}
const irs::segment_meta* meta;
size_t size; // approximate size of segment without removals
double_t fill_factor;
@ -104,32 +107,6 @@ struct consolidation_candidate {
double_t score{ DBL_MIN }; // how good this permutation is
};
struct consolidation {
explicit consolidation(
const consolidation_candidate& candidate
) : size(candidate.size),
score(candidate.score) {
segments.reserve(candidate.count);
for (const auto& segment : candidate) {
segments.emplace_back(segment);
}
}
bool operator<(const consolidation& rhs) const NOEXCEPT {
if (score < rhs.score) {
return true;
} else if (score > rhs.score) {
return false;
}
return segments.size() > segments.size();
}
std::vector<segment_stat> segments;
size_t size{ 0 }; // estimated size of the level
double_t score{ DBL_MIN }; // how good this permutation is
};
/// @returns score of the consolidation bucket
double_t consolidation_score(
const consolidation_candidate& consolidation,
@ -197,14 +174,14 @@ double_t consolidation_score(
return score;
}
NS_END // tier
NS_END
NS_ROOT
NS_BEGIN(index_utils)
index_writer::consolidation_policy_t consolidation_policy(
const consolidate_bytes& options
) {
const consolidate_bytes& options) {
return [options](
std::set<const segment_meta*>& candidates,
const index_meta& meta,
@ -233,8 +210,7 @@ index_writer::consolidation_policy_t consolidation_policy(
}
index_writer::consolidation_policy_t consolidation_policy(
const consolidate_bytes_accum& options
) {
const consolidate_bytes_accum& options) {
return [options](
std::set<const segment_meta*>& candidates,
const index_meta& meta,
@ -280,8 +256,7 @@ index_writer::consolidation_policy_t consolidation_policy(
}
index_writer::consolidation_policy_t consolidation_policy(
const consolidate_count& options
) {
const consolidate_count& options) {
return [options](
std::set<const segment_meta*>& candidates,
const index_meta& meta,
@ -297,8 +272,7 @@ index_writer::consolidation_policy_t consolidation_policy(
}
index_writer::consolidation_policy_t consolidation_policy(
const consolidate_docs_fill& options
) {
const consolidate_docs_fill& options) {
return [options](
std::set<const segment_meta*>& candidates,
const index_meta& meta,
@ -318,8 +292,7 @@ index_writer::consolidation_policy_t consolidation_policy(
}
index_writer::consolidation_policy_t consolidation_policy(
const consolidate_docs_live& options
) {
const consolidate_docs_live& options) {
return [options](
std::set<const segment_meta*>& candidates,
const index_meta& meta,
@ -347,8 +320,8 @@ index_writer::consolidation_policy_t consolidation_policy(
}
index_writer::consolidation_policy_t consolidation_policy(
const consolidate_tier& options
) {
const consolidate_tier& options) {
// validate input
const auto max_segments_per_tier = (std::max)(size_t(1), options.max_segments); // can't merge less than 1 segment
auto min_segments_per_tier = (std::max)(size_t(1), options.min_segments); // can't merge less than 1 segment
@ -360,8 +333,7 @@ index_writer::consolidation_policy_t consolidation_policy(
return [max_segments_per_tier, min_segments_per_tier, floor_segment_bytes, max_segments_bytes, min_score](
std::set<const segment_meta*>& candidates,
const index_meta& meta,
const index_writer::consolidating_segments_t& consolidating_segments
)->void {
const index_writer::consolidating_segments_t& consolidating_segments) -> void {
size_t consolidating_size = 0; // size of segments in bytes that are currently under consolidation
size_t min_segment_size = integer_traits<size_t>::const_max; // the smallest segment
size_t total_index_size = 0; // total size in bytes of all segments in index
@ -373,14 +345,19 @@ index_writer::consolidation_policy_t consolidation_policy(
/// get sorted list of segments
///////////////////////////////////////////////////////////////////////////
std::set<segment_stat> sorted_segments;
std::set<tier::segment_stat> sorted_segments;
// get sorted segments from index meta
auto push_segments = [&sorted_segments](
const std::string& /*filename*/,
const irs::segment_meta& segment
) {
sorted_segments.insert(segment);
const irs::segment_meta& segment) {
if (segment.live_docs_count) {
// skip empty segments, they'll be
// removed from index by index_writer
// during 'commit'
sorted_segments.insert(segment);
}
return true;
};
@ -408,6 +385,11 @@ index_writer::consolidation_policy_t consolidation_policy(
}
}
if (!total_docs_count) {
// nothing to consolidate
return;
}
///////////////////////////////////////////////////////////////////////////
/// Stage 2
/// filter out "too large segments", segment is meant to be treated as large if
@ -434,16 +416,14 @@ index_writer::consolidation_policy_t consolidation_policy(
/// find candidates
///////////////////////////////////////////////////////////////////////////
consolidation_candidate best(sorted_segments.begin());
tier::consolidation_candidate best(sorted_segments.begin());
if (sorted_segments.size() >= min_segments_per_tier) {
for (auto i = sorted_segments.begin(), end = sorted_segments.end(); i != end; ++i) {
consolidation_candidate candidate(i);
tier::consolidation_candidate candidate(i);
while (
candidate.segments.second != end
&& candidate.count < max_segments_per_tier
) {
while (candidate.segments.second != end
&& candidate.count < max_segments_per_tier) {
candidate.size += candidate.segments.second->size;
if (candidate.size > max_segments_bytes) {
@ -459,7 +439,7 @@ index_writer::consolidation_policy_t consolidation_policy(
continue;
}
candidate.score = ::consolidation_score(
candidate.score = tier::consolidation_score(
candidate, max_segments_per_tier, floor_segment_bytes
);
@ -480,9 +460,7 @@ index_writer::consolidation_policy_t consolidation_policy(
/// pick the best candidate
///////////////////////////////////////////////////////////////////////////
for (auto& candidate : best) {
candidates.insert(candidate.meta);
}
candidates.insert(best.begin(), best.end());
};
}

View File

@ -1919,7 +1919,7 @@ bool codecvtw_facet::append(
// convert 'BUFFER_SIZE' at a time
do {
auto offset = buf.size();
UErrorCode status = U_ZERO_ERROR;
status = U_ZERO_ERROR;
buf.resize(buf.size() + BUFFER_SIZE);
@ -2897,8 +2897,9 @@ typename num_put_facet<CharType, CvtType>::iter_type num_put_facet<CharType, Cvt
}
}
icu_buf = ctx->icu_buf1_.length() < ctx->icu_buf1_.length()
? &ctx->icu_buf1_ : &ctx->icu_buf0_;
icu_buf = ctx->icu_buf1_.length() < ctx->icu_buf0_.length()
? &ctx->icu_buf1_
: &ctx->icu_buf0_;
}
// ensure all letters are uppercased/lowercased
@ -3754,4 +3755,4 @@ NS_END
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// -----------------------------------------------------------------------------

View File

@ -0,0 +1,142 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Andrey Abramov
////////////////////////////////////////////////////////////////////////////////
#include "shared.hpp"
#include "lz4compression.hpp"
#include "error/error.hpp"
#include "store/store_utils.hpp"
#include "utils/string_utils.hpp"
#include "utils/misc.hpp"
#include "utils/type_limits.hpp"
#include <lz4.h>
NS_LOCAL
// can reuse stateless instances
irs::compression::lz4::lz4compressor LZ4_BASIC_COMPRESSOR;
irs::compression::lz4::lz4decompressor LZ4_BASIC_DECOMPRESSOR;
inline int acceleration(const irs::compression::options::Hint hint) NOEXCEPT {
static const int FACTORS[] { 0, 2, 0 };
assert(static_cast<size_t>(hint) < IRESEARCH_COUNTOF(FACTORS));
return FACTORS[static_cast<size_t>(hint)];
}
NS_END
NS_ROOT
static_assert(
sizeof(char) == sizeof(byte_type),
"sizeof(char) != sizeof(byte_type)"
);
NS_BEGIN(compression)
void LZ4_streamDecode_deleter::operator()(void *p) NOEXCEPT {
if (p) {
LZ4_freeStreamDecode(reinterpret_cast<LZ4_streamDecode_t*>(p));
}
}
void LZ4_stream_deleter::operator()(void *p) NOEXCEPT {
if (p) {
LZ4_freeStream(reinterpret_cast<LZ4_stream_t*>(p));
}
}
lz4stream lz4_make_stream() {
return lz4stream(LZ4_createStream());
}
lz4stream_decode lz4_make_stream_decode() {
return lz4stream_decode(LZ4_createStreamDecode());
}
// -----------------------------------------------------------------------------
// --SECTION-- lz4 compression
// -----------------------------------------------------------------------------
bytes_ref lz4::lz4compressor::compress(byte_type* src, size_t size, bstring& out) {
assert(size <= integer_traits<int>::const_max); // LZ4 API uses int
const auto src_size = static_cast<int>(size);
// ensure we have enough space to store compressed data
string_utils::oversize(out, size_t(LZ4_COMPRESSBOUND(src_size)));
const auto* src_data = reinterpret_cast<const char*>(src);
auto* buf = reinterpret_cast<char*>(&out[0]);
const auto buf_size = static_cast<int>(out.size());
const auto lz4_size = LZ4_compress_fast(src_data, buf, src_size, buf_size, acceleration_);
if (IRS_UNLIKELY(lz4_size < 0)) {
throw index_error("while compressing, error: LZ4 returned negative size");
}
return bytes_ref(reinterpret_cast<const byte_type*>(buf), size_t(lz4_size));
}
bytes_ref lz4::lz4decompressor::decompress(
byte_type* src, size_t src_size,
byte_type* dst, size_t dst_size) {
assert(src_size <= integer_traits<int>::const_max); // LZ4 API uses int
const auto lz4_size = LZ4_decompress_safe(
reinterpret_cast<const char*>(src),
reinterpret_cast<char*>(dst),
static_cast<int>(src_size), // LZ4 API uses int
static_cast<int>(std::min(dst_size, static_cast<size_t>(integer_traits<int>::const_max))) // LZ4 API uses int
);
if (IRS_UNLIKELY(lz4_size < 0)) {
return bytes_ref::NIL; // corrupted index
}
return bytes_ref(dst, size_t(lz4_size));
}
compressor::ptr lz4::compressor(const options& opts) {
const auto acceleration = ::acceleration(opts.hint);
if (0 == acceleration) {
return compressor::ptr(compressor::ptr(), &LZ4_BASIC_COMPRESSOR);
}
return std::make_shared<lz4compressor>(acceleration);
}
decompressor::ptr lz4::decompressor() {
return decompressor::ptr(decompressor::ptr(), &LZ4_BASIC_DECOMPRESSOR);
}
void lz4::init() {
// match registration below
REGISTER_COMPRESSION(lz4, &lz4::compressor, &lz4::decompressor);
}
DEFINE_COMPRESSION_TYPE(iresearch::compression::lz4);
REGISTER_COMPRESSION(lz4, &lz4::compressor, &lz4::decompressor);
NS_END // compression
NS_END

View File

@ -0,0 +1,80 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Andrey Abramov
////////////////////////////////////////////////////////////////////////////////
#ifndef IRESEARCH_LZ4COMPRESSION_H
#define IRESEARCH_LZ4COMPRESSION_H
#include "string.hpp"
#include "compression.hpp"
#include "noncopyable.hpp"
#include <memory>
NS_ROOT
NS_BEGIN(compression)
struct LZ4_stream_deleter {
void operator()(void* p) NOEXCEPT;
};
struct LZ4_streamDecode_deleter {
void operator()(void* p) NOEXCEPT;
};
typedef std::unique_ptr<void, LZ4_stream_deleter> lz4stream;
typedef std::unique_ptr<void, LZ4_streamDecode_deleter> lz4stream_decode;
lz4stream lz4_make_stream();
lz4stream_decode lz4_make_stream_decode();
struct IRESEARCH_API lz4 {
DECLARE_COMPRESSION_TYPE();
class IRESEARCH_API lz4compressor final : public compression::compressor {
public:
explicit lz4compressor(int acceleration = 0) NOEXCEPT
: acceleration_(acceleration) {
}
int acceleration() const NOEXCEPT { return acceleration_; }
virtual bytes_ref compress(byte_type* src, size_t size, bstring& out) override;
private:
const int acceleration_{0}; // 0 - default acceleration
};
class IRESEARCH_API lz4decompressor final : public compression::decompressor {
public:
virtual bytes_ref decompress(byte_type* src, size_t src_size,
byte_type* dst, size_t dst_size) override;
};
static void init();
static compression::compressor::ptr compressor(const options& opts);
static compression::decompressor::ptr decompressor();
}; // lz4basic
NS_END // compression
NS_END // NS_ROOT
#endif

View File

@ -38,8 +38,13 @@ NS_ROOT
NS_BEGIN(memory)
inline CONSTEXPR size_t align_up(size_t size, size_t alignment) NOEXCEPT {
#if defined(_MSC_VER) && (_MSC_VER < 1900)
assert(math::is_power2(alignment));
return (size + alignment - 1) & (0 - alignment);
#else
return IRS_ASSERT(math::is_power2(alignment)),
(size + alignment - 1) & (0 - alignment);
#endif
}
///////////////////////////////////////////////////////////////////////////////
@ -419,6 +424,7 @@ template<
free_ = std::move(rhs.free_);
blocks_ = std::move(rhs.blocks_);
}
return *this;
}
~memory_pool() NOEXCEPT {

View File

@ -62,7 +62,7 @@ class generic_register: public singleton<RegisterType> {
typedef std::unordered_map<key_type, entry_type> register_map_t;
typedef std::function<bool(const key_type& key)> visitor_t;
virtual ~generic_register() { }
virtual ~generic_register() = default;
// @return the entry registered under the key and inf an insertion took place
std::pair<entry_type, bool> set(
@ -181,8 +181,6 @@ class tagged_generic_register: public generic_register<KeyType, EntryType, Regis
typedef typename parent_type::entry_type entry_type;
typedef TagType tag_type;
virtual ~tagged_generic_register() { }
// @return the entry registered under the key and if an insertion took place
std::pair<entry_type, bool> set(
const key_type& key,
@ -214,4 +212,4 @@ class tagged_generic_register: public generic_register<KeyType, EntryType, Regis
NS_END
#endif
#endif

View File

@ -7,7 +7,7 @@ export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:$(pwd)/bin"
ulimit -c unlimited
for i in `seq 1 1`; do
for j in 1 ; do
for j in 1 5 10 15 20 25; do
MAX_LINES=${j}000000
rm -r iresearch.data || {

View File

@ -7,7 +7,7 @@ export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:$(pwd)/build/bin"
ulimit -c unlimited
for i in `seq 1 1`; do
for j in 25 ; do
for j in 1 5 10 15 20 25; do
MAX_LINES=${j}000000
# search

View File

@ -88,6 +88,7 @@ set(IReSearch_tests_sources
./utils/async_utils_tests.cpp
./utils/bitvector_tests.cpp
./utils/container_utils_tests.cpp
./utils/compression_test.cpp
./utils/crc_test.cpp
./utils/file_utils_tests.cpp
./utils/map_utils_tests.cpp
@ -155,6 +156,7 @@ add_executable(${IResearchTests_TARGET_NAME}-static
./analysis/text_analyzer_tests.cpp
./formats/formats_10_tests.cpp
./formats/formats_11_tests.cpp
./formats/formats_12_tests.cpp
./iql/parser_test.cpp
)

View File

@ -237,7 +237,7 @@ TEST_P(format_11_test_case, open_ecnrypted_with_non_encrypted) {
ASSERT_TRUE(dir().attributes().contains<tests::rot13_encryption>());
// write segment with format10
// write segment with format11
{
auto codec = irs::formats::get("1_1");
ASSERT_NE(nullptr, codec);
@ -269,7 +269,7 @@ TEST_P(format_11_test_case, open_non_ecnrypted_with_encrypted) {
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
// write segment with format10
// write segment with format11
{
auto codec = irs::formats::get("1_1");
ASSERT_NE(nullptr, codec);

View File

@ -0,0 +1,514 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Andrey Abramov
/// @author Vasiliy Nabatchikov
////////////////////////////////////////////////////////////////////////////////
#include "tests_shared.hpp"
#include "formats_test_case_base.hpp"
#include "store/directory_attributes.hpp"
NS_LOCAL
// -----------------------------------------------------------------------------
// --SECTION-- format 12 specific tests
// -----------------------------------------------------------------------------
class format_12_test_case : public tests::directory_test_case_base {
};
TEST_P(format_12_test_case, read_zero_block_encryption) {
tests::json_doc_generator gen(
resource("simple_sequential.json"),
&tests::generic_json_field_factory
);
tests::document const* doc1 = gen.next();
// replace encryption
ASSERT_TRUE(dir().attributes().contains<tests::rot13_encryption>());
// write segment with format10
{
auto codec = irs::formats::get("1_2");
ASSERT_NE(nullptr, codec);
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
ASSERT_NE(nullptr, writer);
ASSERT_TRUE(insert(*writer,
doc1->indexed.begin(), doc1->indexed.end(),
doc1->stored.begin(), doc1->stored.end()
));
writer->commit();
}
// replace encryption
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
dir().attributes().emplace<tests::rot13_encryption>(6);
// can't open encrypted index without encryption
ASSERT_THROW(irs::directory_reader::open(dir()), irs::index_error);
}
TEST_P(format_12_test_case, write_zero_block_encryption) {
tests::json_doc_generator gen(
resource("simple_sequential.json"),
&tests::generic_json_field_factory
);
tests::document const* doc1 = gen.next();
// replace encryption
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
dir().attributes().emplace<tests::rot13_encryption>(0);
// write segment with format10
auto codec = irs::formats::get("1_2");
ASSERT_NE(nullptr, codec);
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
ASSERT_NE(nullptr, writer);
ASSERT_THROW(insert(*writer,
doc1->indexed.begin(), doc1->indexed.end(),
doc1->stored.begin(), doc1->stored.end()
), irs::index_error);
}
TEST_P(format_12_test_case, fields_read_write_wrong_encryption) {
// create sorted && unsorted terms
typedef std::set<irs::bytes_ref> sorted_terms_t;
typedef std::vector<irs::bytes_ref> unsorted_terms_t;
sorted_terms_t sorted_terms;
unsorted_terms_t unsorted_terms;
tests::json_doc_generator gen(
resource("fst_prefixes.json"),
[&sorted_terms, &unsorted_terms] (tests::document& doc, const std::string& name, const tests::json_doc_generator::json_value& data) {
doc.insert(std::make_shared<tests::templates::string_field>(
irs::string_ref(name),
data.str
));
auto ref = irs::ref_cast<irs::byte_type>((doc.indexed.end() - 1).as<tests::templates::string_field>().value());
sorted_terms.emplace(ref);
unsorted_terms.emplace_back(ref);
});
// define field
irs::field_meta field;
field.name = "field";
field.norm = 5;
auto codec = irs::formats::get("1_2");
ASSERT_NE(nullptr, codec);
ASSERT_TRUE(dir().attributes().contains<tests::rot13_encryption>());
// write fields
{
irs::flush_state state;
state.dir = &dir();
state.doc_count = 100;
state.name = "segment_name";
state.features = &field.features;
// should use sorted terms on write
tests::format_test_case::terms<sorted_terms_t::iterator> terms(
sorted_terms.begin(), sorted_terms.end()
);
auto writer = codec->get_field_writer(false);
ASSERT_NE(nullptr, writer);
writer->prepare(state);
writer->write(field.name, field.norm, field.features, terms);
writer->end();
}
irs::segment_meta meta;
meta.name = "segment_name";
irs::document_mask docs_mask;
auto reader = codec->get_field_reader();
ASSERT_NE(nullptr, reader);
// can't open encrypted index without encryption
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
ASSERT_THROW(reader->prepare(dir(), meta, docs_mask), irs::index_error);
// can't open encrypted index with wrong encryption
dir().attributes().emplace<tests::rot13_encryption>(6);
ASSERT_THROW(reader->prepare(dir(), meta, docs_mask), irs::index_error);
}
TEST_P(format_12_test_case, column_meta_read_write_wrong_encryption) {
auto codec = irs::formats::get("1_2");
ASSERT_NE(nullptr, codec);
ASSERT_TRUE(dir().attributes().contains<tests::rot13_encryption>());
irs::segment_meta meta;
meta.name = "_1";
// write meta
{
auto writer = codec->get_column_meta_writer();
irs::segment_meta meta1;
// write segment _1
writer->prepare(dir(), meta);
writer->write("_1_column1", 1);
writer->write("_1_column2", 2);
writer->write("_1_column0", 0);
writer->flush();
}
size_t count = 0;
irs::field_id max_id = 0;
auto reader = codec->get_column_meta_reader();
ASSERT_NE(nullptr, reader);
// can't open encrypted index without encryption
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
ASSERT_THROW(reader->prepare(dir(), meta, count, max_id), irs::index_error);
// can't open encrypted index with wrong encryption
dir().attributes().emplace<tests::rot13_encryption>(6);
ASSERT_THROW(reader->prepare(dir(), meta, count, max_id), irs::index_error);
}
TEST_P(format_12_test_case, open_ecnrypted_with_wrong_encryption) {
tests::json_doc_generator gen(
resource("simple_sequential.json"),
&tests::generic_json_field_factory
);
tests::document const* doc1 = gen.next();
ASSERT_TRUE(dir().attributes().contains<tests::rot13_encryption>());
// write segment with format10
{
auto codec = irs::formats::get("1_2");
ASSERT_NE(nullptr, codec);
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
ASSERT_NE(nullptr, writer);
ASSERT_TRUE(insert(*writer,
doc1->indexed.begin(), doc1->indexed.end(),
doc1->stored.begin(), doc1->stored.end()
));
writer->commit();
}
// can't open encrypted index with wrong encryption
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
dir().attributes().emplace<tests::rot13_encryption>(6);
ASSERT_THROW(irs::directory_reader::open(dir()), irs::index_error);
}
TEST_P(format_12_test_case, open_ecnrypted_with_non_encrypted) {
tests::json_doc_generator gen(
resource("simple_sequential.json"),
&tests::generic_json_field_factory
);
tests::document const* doc1 = gen.next();
ASSERT_TRUE(dir().attributes().contains<tests::rot13_encryption>());
// write segment with format11
{
auto codec = irs::formats::get("1_2");
ASSERT_NE(nullptr, codec);
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
ASSERT_NE(nullptr, writer);
ASSERT_TRUE(insert(*writer,
doc1->indexed.begin(), doc1->indexed.end(),
doc1->stored.begin(), doc1->stored.end()
));
writer->commit();
}
// remove encryption
dir().attributes().remove<tests::rot13_encryption>();
// can't open encrypted index without encryption
ASSERT_THROW(irs::directory_reader::open(dir()), irs::index_error);
}
TEST_P(format_12_test_case, open_non_ecnrypted_with_encrypted) {
tests::json_doc_generator gen(
resource("simple_sequential.json"),
&tests::generic_json_field_factory
);
tests::document const* doc1 = gen.next();
ASSERT_TRUE(dir().attributes().remove<tests::rot13_encryption>());
// write segment with format11
{
auto codec = irs::formats::get("1_2");
ASSERT_NE(nullptr, codec);
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
ASSERT_NE(nullptr, writer);
ASSERT_TRUE(insert(*writer,
doc1->indexed.begin(), doc1->indexed.end(),
doc1->stored.begin(), doc1->stored.end()
));
writer->commit();
}
// add cipher
dir().attributes().emplace<tests::rot13_encryption>(7);
// check index
auto index = irs::directory_reader::open(dir());
ASSERT_TRUE(index);
ASSERT_EQ(1, index->size());
ASSERT_EQ(1, index->docs_count());
ASSERT_EQ(1, index->live_docs_count());
// check segment 0
{
auto& segment = index[0];
ASSERT_EQ(1, segment.size());
ASSERT_EQ(1, segment.docs_count());
ASSERT_EQ(1, segment.live_docs_count());
std::unordered_set<irs::string_ref> expectedName = { "A" };
const auto* column = segment.column_reader("name");
ASSERT_NE(nullptr, column);
auto values = column->values();
ASSERT_EQ(expectedName.size(), segment.docs_count()); // total count of documents
auto terms = segment.field("same");
ASSERT_NE(nullptr, terms);
auto termItr = terms->iterator();
ASSERT_TRUE(termItr->next());
irs::bytes_ref actual_value;
for (auto docsItr = termItr->postings(iresearch::flags()); docsItr->next();) {
ASSERT_TRUE(values(docsItr->value(), actual_value));
ASSERT_EQ(1, expectedName.erase(irs::to_string<irs::string_ref>(actual_value.c_str())));
}
ASSERT_TRUE(expectedName.empty());
}
}
TEST_P(format_12_test_case, open_10_with_12) {
tests::json_doc_generator gen(
resource("simple_sequential.json"),
&tests::generic_json_field_factory
);
tests::document const* doc1 = gen.next();
// write segment with format10
{
auto codec = irs::formats::get("1_0");
ASSERT_NE(nullptr, codec);
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
ASSERT_NE(nullptr, writer);
ASSERT_TRUE(insert(*writer,
doc1->indexed.begin(), doc1->indexed.end(),
doc1->stored.begin(), doc1->stored.end()
));
writer->commit();
}
// check index
auto codec = irs::formats::get("1_2");
ASSERT_NE(nullptr, codec);
auto index = irs::directory_reader::open(dir(), codec);
ASSERT_TRUE(index);
ASSERT_EQ(1, index->size());
ASSERT_EQ(1, index->docs_count());
ASSERT_EQ(1, index->live_docs_count());
// check segment 0
{
auto& segment = index[0];
ASSERT_EQ(1, segment.size());
ASSERT_EQ(1, segment.docs_count());
ASSERT_EQ(1, segment.live_docs_count());
std::unordered_set<irs::string_ref> expectedName = { "A" };
const auto* column = segment.column_reader("name");
ASSERT_NE(nullptr, column);
auto values = column->values();
ASSERT_EQ(expectedName.size(), segment.docs_count()); // total count of documents
auto terms = segment.field("same");
ASSERT_NE(nullptr, terms);
auto termItr = terms->iterator();
ASSERT_TRUE(termItr->next());
irs::bytes_ref actual_value;
for (auto docsItr = termItr->postings(iresearch::flags()); docsItr->next();) {
ASSERT_TRUE(values(docsItr->value(), actual_value));
ASSERT_EQ(1, expectedName.erase(irs::to_string<irs::string_ref>(actual_value.c_str())));
}
ASSERT_TRUE(expectedName.empty());
}
}
TEST_P(format_12_test_case, formats_10_12) {
tests::json_doc_generator gen(
resource("simple_sequential.json"),
&tests::generic_json_field_factory
);
tests::document const* doc1 = gen.next();
tests::document const* doc2 = gen.next();
// write segment with format10
{
auto codec = irs::formats::get("1_0");
ASSERT_NE(nullptr, codec);
auto writer = irs::index_writer::make(dir(), codec, irs::OM_CREATE);
ASSERT_NE(nullptr, writer);
ASSERT_TRUE(insert(*writer,
doc1->indexed.begin(), doc1->indexed.end(),
doc1->stored.begin(), doc1->stored.end()
));
writer->commit();
}
// write segment with format11
{
auto codec = irs::formats::get("1_2");
ASSERT_NE(nullptr, codec);
auto writer = irs::index_writer::make(dir(), codec, irs::OM_APPEND);
ASSERT_NE(nullptr, writer);
ASSERT_TRUE(insert(*writer,
doc2->indexed.begin(), doc2->indexed.end(),
doc2->stored.begin(), doc2->stored.end()
));
writer->commit();
}
// check index
auto index = irs::directory_reader::open(dir());
ASSERT_TRUE(index);
ASSERT_EQ(2, index->size());
ASSERT_EQ(2, index->docs_count());
ASSERT_EQ(2, index->live_docs_count());
// check segment 0
{
auto& segment = index[0];
ASSERT_EQ(1, segment.size());
ASSERT_EQ(1, segment.docs_count());
ASSERT_EQ(1, segment.live_docs_count());
std::unordered_set<irs::string_ref> expectedName = { "A" };
const auto* column = segment.column_reader("name");
ASSERT_NE(nullptr, column);
auto values = column->values();
ASSERT_EQ(expectedName.size(), segment.docs_count()); // total count of documents
auto terms = segment.field("same");
ASSERT_NE(nullptr, terms);
auto termItr = terms->iterator();
ASSERT_TRUE(termItr->next());
irs::bytes_ref actual_value;
for (auto docsItr = termItr->postings(iresearch::flags()); docsItr->next();) {
ASSERT_TRUE(values(docsItr->value(), actual_value));
ASSERT_EQ(1, expectedName.erase(irs::to_string<irs::string_ref>(actual_value.c_str())));
}
ASSERT_TRUE(expectedName.empty());
}
// check segment 1
{
auto& segment = index[1];
ASSERT_EQ(1, segment.size());
ASSERT_EQ(1, segment.docs_count());
ASSERT_EQ(1, segment.live_docs_count());
std::unordered_set<irs::string_ref> expectedName = { "B" };
const auto* column = segment.column_reader("name");
ASSERT_NE(nullptr, column);
auto values = column->values();
ASSERT_EQ(expectedName.size(), segment.docs_count()); // total count of documents
auto terms = segment.field("same");
ASSERT_NE(nullptr, terms);
auto termItr = terms->iterator();
ASSERT_TRUE(termItr->next());
irs::bytes_ref actual_value;
for (auto docsItr = termItr->postings(iresearch::flags()); docsItr->next();) {
ASSERT_TRUE(values(docsItr->value(), actual_value));
ASSERT_EQ(1, expectedName.erase(irs::to_string<irs::string_ref>(actual_value.c_str())));
}
ASSERT_TRUE(expectedName.empty());
}
}
INSTANTIATE_TEST_CASE_P(
format_12_test,
format_12_test_case,
::testing::Values(
&tests::rot13_cipher_directory<&tests::memory_directory, 16>,
&tests::rot13_cipher_directory<&tests::fs_directory, 16>,
&tests::rot13_cipher_directory<&tests::mmap_directory, 16>
),
tests::directory_test_case_base::to_string
);
// -----------------------------------------------------------------------------
// --SECTION-- generic tests
// -----------------------------------------------------------------------------
using tests::format_test_case;
INSTANTIATE_TEST_CASE_P(
format_12_test,
format_test_case,
::testing::Combine(
::testing::Values(
&tests::rot13_cipher_directory<&tests::memory_directory, 16>,
&tests::rot13_cipher_directory<&tests::fs_directory, 16>,
&tests::rot13_cipher_directory<&tests::mmap_directory, 16>,
&tests::rot13_cipher_directory<&tests::memory_directory, 7>,
&tests::rot13_cipher_directory<&tests::fs_directory, 7>,
&tests::rot13_cipher_directory<&tests::mmap_directory, 7>
),
::testing::Values("1_2")
),
tests::to_string
);
NS_END

View File

@ -22,6 +22,7 @@
////////////////////////////////////////////////////////////////////////////////
#include "formats_test_case_base.hpp"
#include "utils/lz4compression.hpp"
namespace tests {
@ -796,10 +797,13 @@ TEST_P(format_test_case, columns_rw_sparse_column_dense_block) {
// write docs
{
auto writer = codec()->get_columnstore_writer();
writer->prepare(dir(), seg);
auto column = writer->push_column();
auto column = writer->push_column({
irs::compression::lz4::type(),
irs::compression::options(),
bool(irs::get_encryption(dir().attributes()))
});
column_id = column.first;
auto& column_handler = column.second;
@ -859,7 +863,11 @@ TEST_P(format_test_case, columns_rw_dense_mask) {
{
auto writer = codec()->get_columnstore_writer();
writer->prepare(dir(), seg);
auto column = writer->push_column();
auto column = writer->push_column({
irs::compression::lz4::type(),
irs::compression::options(),
bool(irs::get_encryption(dir().attributes()))
});
column_id = column.first;
auto& column_handler = column.second;
@ -902,7 +910,11 @@ TEST_P(format_test_case, columns_rw_bit_mask) {
auto writer = codec()->get_columnstore_writer();
writer->prepare(dir(), segment);
auto column = writer->push_column();
auto column = writer->push_column({
irs::compression::lz4::type(),
irs::compression::options(),
bool(irs::get_encryption(dir().attributes()))
});
id = column.first;
auto& handle = column.second;
@ -1125,9 +1137,10 @@ TEST_P(format_test_case, columns_rw_empty) {
{
auto writer = codec()->get_columnstore_writer();
writer->prepare(dir(), meta0);
column0_id = writer->push_column().first;
column0_id = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) }).first;
ASSERT_EQ(0, column0_id);
column1_id = writer->push_column().first;
column1_id = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) }).first;
ASSERT_EQ(1, column1_id);
ASSERT_FALSE(writer->commit()); // flush empty columns
}
@ -1192,7 +1205,11 @@ TEST_P(format_test_case, columns_rw_same_col_empty_repeat) {
);
if (res.second) {
res.first->second = writer->push_column();
res.first->second = writer->push_column({
irs::compression::lz4::type(),
irs::compression::options(),
bool(irs::get_encryption(dir().attributes()))
});
}
auto& column = res.first->second.second;
@ -1268,7 +1285,11 @@ TEST_P(format_test_case, columns_rw_big_document) {
auto writer = codec()->get_columnstore_writer();
writer->prepare(dir(), segment);
auto column = writer->push_column();
auto column = writer->push_column({
irs::compression::lz4::type(),
irs::compression::options(),
bool(irs::get_encryption(dir().attributes()))
});
id = column.first;
{
@ -1432,7 +1453,7 @@ TEST_P(format_test_case, columns_rw_writer_reuse) {
);
if (res.second) {
res.first->second = writer->push_column();
res.first->second = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
}
auto& column = res.first->second.second;
@ -1461,7 +1482,7 @@ TEST_P(format_test_case, columns_rw_writer_reuse) {
);
if (res.second) {
res.first->second = writer->push_column();
res.first->second = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
}
auto& column = res.first->second.second;
@ -1488,7 +1509,7 @@ TEST_P(format_test_case, columns_rw_writer_reuse) {
);
if (res.second) {
res.first->second = writer->push_column();
res.first->second = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
}
auto& column = res.first->second.second;
@ -1677,7 +1698,7 @@ TEST_P(format_test_case, columns_rw_typed) {
);
if (res.second) {
res.first->second = writer->push_column();
res.first->second = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
}
auto& column = res.first->second.second;
@ -1923,8 +1944,8 @@ TEST_P(format_test_case, columns_rw_sparse_dense_offset_column_border_case) {
auto writer = codec()->get_columnstore_writer();
writer->prepare(dir(), meta0);
dense_fixed_offset_column = writer->push_column();
sparse_fixed_offset_column = writer->push_column();
dense_fixed_offset_column = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
sparse_fixed_offset_column = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
irs::doc_id_t doc = irs::type_limits<irs::type_t::doc_id_t>::min();
@ -2120,26 +2141,26 @@ TEST_P(format_test_case, columns_rw) {
{
writer->prepare(dir(), meta0);
auto field0 = writer->push_column();
auto field0 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
segment0_field0_id = field0.first;
auto& field0_writer = field0.second;
ASSERT_EQ(0, segment0_field0_id);
auto field1 = writer->push_column();
auto field1 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
segment0_field1_id = field1.first;
auto& field1_writer = field1.second;
ASSERT_EQ(1, segment0_field1_id);
auto empty_field = writer->push_column(); // gap between filled columns
auto empty_field = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) }); // gap between filled columns
segment0_empty_column_id = empty_field.first;
ASSERT_EQ(2, segment0_empty_column_id);
auto field2 = writer->push_column();
auto field2 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
segment0_field2_id = field2.first;
auto& field2_writer = field2.second;
ASSERT_EQ(3, segment0_field2_id);
auto field3 = writer->push_column();
auto field3 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
segment0_field3_id = field3.first;
auto& field3_writer = field3.second;
ASSERT_EQ(4, segment0_field3_id);
auto field4 = writer->push_column();
auto field4 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
segment0_field4_id = field4.first;
auto& field4_writer = field4.second;
ASSERT_EQ(5, segment0_field4_id);
@ -2219,15 +2240,15 @@ TEST_P(format_test_case, columns_rw) {
{
writer->prepare(dir(), meta1);
auto field0 = writer->push_column();
auto field0 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
segment1_field0_id = field0.first;
auto& field0_writer = field0.second;
ASSERT_EQ(0, segment1_field0_id);
auto field1 = writer->push_column();
auto field1 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
segment1_field1_id = field1.first;
auto& field1_writer = field1.second;
ASSERT_EQ(1, segment1_field1_id);
auto field2 = writer->push_column();
auto field2 = writer->push_column({ irs::compression::lz4::type(), {}, bool(irs::get_encryption(dir().attributes())) });
segment1_field2_id = field2.first;
auto& field2_writer = field2.second;
ASSERT_EQ(2, segment1_field2_id);

View File

@ -137,9 +137,10 @@ index_segment& index_segment::operator=(index_segment&& rhs) NOEXCEPT {
}
void index_segment::add_sorted(const ifield& f) {
irs::bytes_output out;
irs::bstring buf;
irs::bytes_output out(buf);
if (f.write(out)) {
const irs::bytes_ref value = out;
const irs::bytes_ref value = buf;
const auto doc_id = irs::doc_id_t((irs::doc_limits::min)() + count_);
sort_.emplace_back(std::make_pair(irs::bstring(value.c_str(), value.size()), doc_id));
}

View File

@ -147,6 +147,55 @@ TEST(consolidation_test_tier, test_max_consolidation_size) {
}
}
TEST(consolidation_test_tier, empty_meta) {
irs::index_meta meta;
irs::index_utils::consolidate_tier options;
options.floor_segment_bytes = 1;
options.max_segments = 10;
options.min_segments = 1;
options.max_segments_bytes = irs::integer_traits<size_t>::const_max;
irs::index_writer::consolidating_segments_t consolidating_segments;
auto policy = irs::index_utils::consolidation_policy(options);
std::set<const irs::segment_meta*> candidates;
policy(candidates, meta, consolidating_segments);
ASSERT_TRUE(candidates.empty());
}
TEST(consolidation_test_tier, empty_consolidating_segment) {
irs::index_meta meta;
meta.add(irs::segment_meta("empty", nullptr, 1, 0, false, irs::segment_meta::file_set(), 1));
irs::index_utils::consolidate_tier options;
options.floor_segment_bytes = 1;
options.max_segments = 10;
options.min_segments = 1;
options.max_segments_bytes = irs::integer_traits<size_t>::const_max;
irs::index_writer::consolidating_segments_t consolidating_segments { &meta[0].meta };
auto policy = irs::index_utils::consolidation_policy(options);
std::set<const irs::segment_meta*> candidates;
policy(candidates, meta, consolidating_segments);
ASSERT_TRUE(candidates.empty()); // skip empty consolidating segments
}
TEST(consolidation_test_tier, empty_segment) {
irs::index_meta meta;
meta.add(irs::segment_meta("empty", nullptr, 0, 0, false, irs::segment_meta::file_set(), 1));
irs::index_utils::consolidate_tier options;
options.floor_segment_bytes = 1;
options.max_segments = 10;
options.min_segments = 1;
options.max_segments_bytes = irs::integer_traits<size_t>::const_max;
irs::index_writer::consolidating_segments_t consolidating_segments { &meta[0].meta };
auto policy = irs::index_utils::consolidation_policy(options);
std::set<const irs::segment_meta*> candidates;
policy(candidates, meta, consolidating_segments);
ASSERT_TRUE(candidates.empty()); // skip empty segments
}
TEST(consolidation_test_tier, test_max_consolidation_count) {
// generate meta
irs::index_meta meta;
@ -1304,4 +1353,4 @@ TEST(consolidation_test_tier, test_skewed_segments) {
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// -----------------------------------------------------------------------------

View File

@ -419,8 +419,8 @@ class json_doc_generator: public doc_generator_base {
return *this;
}
operator irs::string_ref() const { return irs::string_ref(data, size); };
operator std::string() const { return std::string(data, size); };
operator irs::string_ref() const { return irs::string_ref(data, size); }
operator std::string() const { return std::string(data, size); }
};
struct json_value {

File diff suppressed because it is too large Load Diff

View File

@ -186,15 +186,13 @@ class index_test_base : public virtual test_param_base<index_test_context> {
irs::index_writer::ptr open_writer(
irs::directory& dir,
irs::OpenMode mode = irs::OM_CREATE,
const irs::index_writer::init_options& options = {}
) {
const irs::index_writer::init_options& options = {}) {
return irs::index_writer::make(dir, codec_, mode, options);
}
irs::index_writer::ptr open_writer(
irs::OpenMode mode = irs::OM_CREATE,
const irs::index_writer::init_options& options = {}
) {
const irs::index_writer::init_options& options = {}) {
return irs::index_writer::make(*dir_, codec_, mode, options);
}

View File

@ -26,6 +26,7 @@
#include "iql/query_builder.hpp"
#include "store/memory_directory.hpp"
#include "utils/type_limits.hpp"
#include "utils/lz4compression.hpp"
#include "index/merge_writer.hpp"
namespace tests {
@ -165,8 +166,12 @@ TEST_F(merge_writer_tests, test_merge_writer_columns_remove) {
writer->commit();
}
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
};
auto reader = iresearch::directory_reader::open(dir, codec_ptr);
irs::merge_writer writer(dir);
irs::merge_writer writer(dir, column_info);
ASSERT_EQ(2, reader.size());
ASSERT_EQ(2, reader[0].docs_count());
@ -566,8 +571,12 @@ TEST_F(merge_writer_tests, test_merge_writer_columns) {
writer->commit();
}
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
};
auto reader = iresearch::directory_reader::open(dir, codec_ptr);
irs::merge_writer writer(dir);
irs::merge_writer writer(dir, column_info);
ASSERT_EQ(2, reader.size());
ASSERT_EQ(2, reader[0].docs_count());
@ -1056,8 +1065,12 @@ TEST_F(merge_writer_tests, test_merge_writer) {
return reader ? reader->docs_count() : 0;
};
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
};
auto reader = iresearch::directory_reader::open(dir, codec_ptr);
irs::merge_writer writer(dir);
irs::merge_writer writer(dir, column_info);
ASSERT_EQ(2, reader.size());
ASSERT_EQ(2, reader[0].docs_count());
@ -2207,9 +2220,13 @@ TEST_F(merge_writer_tests, test_merge_writer_add_segments) {
// merge 33 segments to writer (segments > 32 to trigger GCC 8.2.0 optimizer bug)
{
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
};
irs::memory_directory dir;
irs::index_meta::index_segment_t index_segment;
irs::merge_writer writer(dir);
irs::merge_writer writer(dir, column_info);
for (auto& sub_reader: reader) {
writer.add(sub_reader);
@ -2263,10 +2280,14 @@ TEST_F(merge_writer_tests, test_merge_writer_flush_progress) {
// test default progress (false)
{
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
};
irs::memory_directory dir;
irs::index_meta::index_segment_t index_segment;
irs::merge_writer::flush_progress_t progress;
irs::merge_writer writer(dir);
irs::merge_writer writer(dir, column_info);
index_segment.meta.codec = codec_ptr;
writer.add(reader[0]);
@ -2285,10 +2306,14 @@ TEST_F(merge_writer_tests, test_merge_writer_flush_progress) {
// test always-false progress
{
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
};
irs::memory_directory dir;
irs::index_meta::index_segment_t index_segment;
irs::merge_writer::flush_progress_t progress = []()->bool { return false; };
irs::merge_writer writer(dir);
irs::merge_writer writer(dir, column_info);
index_segment.meta.codec = codec_ptr;
writer.add(reader[0]);
@ -2311,11 +2336,15 @@ TEST_F(merge_writer_tests, test_merge_writer_flush_progress) {
// test always-true progress
{
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
};
irs::memory_directory dir;
irs::index_meta::index_segment_t index_segment;
irs::merge_writer::flush_progress_t progress =
[&progress_call_count]()->bool { ++progress_call_count; return true; };
irs::merge_writer writer(dir);
irs::merge_writer writer(dir, column_info);
index_segment.meta.codec = codec_ptr;
writer.add(reader[0]);
@ -2334,6 +2363,10 @@ TEST_F(merge_writer_tests, test_merge_writer_flush_progress) {
ASSERT_TRUE(progress_call_count); // there should have been at least some calls
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
};
// test limited-true progress
for (size_t i = 1; i < progress_call_count; ++i) { // +1 for pre-decrement in 'progress'
size_t call_count = i;
@ -2341,7 +2374,7 @@ TEST_F(merge_writer_tests, test_merge_writer_flush_progress) {
irs::index_meta::index_segment_t index_segment;
irs::merge_writer::flush_progress_t progress =
[&call_count]()->bool { return --call_count; };
irs::merge_writer writer(dir);
irs::merge_writer writer(dir, column_info);
index_segment.meta.codec = codec_ptr;
index_segment.meta.name = "merged";
@ -2405,7 +2438,11 @@ TEST_F(merge_writer_tests, test_merge_writer_field_features) {
// test merge existing with feature subset (success)
{
irs::merge_writer writer(dir);
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
};
irs::merge_writer writer(dir, column_info);
writer.add(reader[1]); // assume 1 is segment with text field
writer.add(reader[0]); // assume 0 is segment with string field
@ -2417,7 +2454,11 @@ TEST_F(merge_writer_tests, test_merge_writer_field_features) {
// test merge existing with feature superset (fail)
{
irs::merge_writer writer(dir);
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true );
};
irs::merge_writer writer(dir, column_info);
writer.add(reader[0]); // assume 0 is segment with text field
writer.add(reader[1]); // assume 1 is segment with string field

View File

@ -28,6 +28,7 @@
#include "index/index_tests.hpp"
#include "store/memory_directory.hpp"
#include "store/store_utils.hpp"
#include "utils/lz4compression.hpp"
NS_LOCAL
@ -74,8 +75,12 @@ TEST_F(segment_writer_tests, invalid_actions) {
stream.reset(true);
field_t field(stream);
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info( irs::compression::lz4::type(), {}, true );
};
irs::memory_directory dir;
auto writer = irs::segment_writer::make(dir, nullptr);
auto writer = irs::segment_writer::make(dir, column_info, nullptr);
ASSERT_EQ(0, writer->memory_active());
// store + store sorted
@ -126,10 +131,14 @@ TEST_F(segment_writer_tests, memory_sorted_vs_unsorted) {
}
} less;
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info( irs::compression::lz4::type(), {}, true );
};
irs::memory_directory dir;
auto writer_sorted = irs::segment_writer::make(dir, &less);
auto writer_sorted = irs::segment_writer::make(dir, column_info, &less);
ASSERT_EQ(0, writer_sorted->memory_active());
auto writer_unsorted = irs::segment_writer::make(dir, nullptr);
auto writer_unsorted = irs::segment_writer::make(dir, column_info, nullptr);
ASSERT_EQ(0, writer_unsorted->memory_active());
irs::segment_meta segment;
@ -181,8 +190,12 @@ TEST_F(segment_writer_tests, insert_sorted_without_comparator) {
}
} field;
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info( irs::compression::lz4::type(), irs::compression::options(irs::compression::options::Hint::SPEED), true );
};
irs::memory_directory dir;
auto writer = irs::segment_writer::make(dir, nullptr);
auto writer = irs::segment_writer::make(dir, column_info, nullptr);
ASSERT_EQ(0, writer->memory_active());
irs::segment_meta segment;
@ -227,8 +240,12 @@ TEST_F(segment_writer_tests, memory_store_sorted_field) {
}
} less;
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info(irs::compression::lz4::type(), irs::compression::options{}, true);
};
irs::memory_directory dir;
auto writer = irs::segment_writer::make(dir, &less);
auto writer = irs::segment_writer::make(dir, column_info, &less);
ASSERT_EQ(0, writer->memory_active());
irs::segment_meta segment;
@ -273,8 +290,12 @@ TEST_F(segment_writer_tests, memory_store_field_sorted) {
}
} less;
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info( irs::compression::lz4::type(), irs::compression::options{}, true );
};
irs::memory_directory dir;
auto writer = irs::segment_writer::make(dir, &less);
auto writer = irs::segment_writer::make(dir, column_info, &less);
ASSERT_EQ(0, writer->memory_active());
irs::segment_meta segment;
@ -313,8 +334,12 @@ TEST_F(segment_writer_tests, memory_store_field_unsorted) {
}
} field;
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info( irs::compression::lz4::type(), irs::compression::options{}, true );
};
irs::memory_directory dir;
auto writer = irs::segment_writer::make(dir, nullptr);
auto writer = irs::segment_writer::make(dir, column_info, nullptr);
ASSERT_EQ(0, writer->memory_active());
irs::segment_meta segment;
@ -360,8 +385,12 @@ TEST_F(segment_writer_tests, memory_index_field) {
stream.reset(true);
field_t field(stream);
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info( irs::compression::lz4::type(), irs::compression::options{}, true );
};
irs::memory_directory dir;
auto writer = irs::segment_writer::make(dir, nullptr);
auto writer = irs::segment_writer::make(dir, column_info, nullptr);
ASSERT_EQ(0, writer->memory_active());
for (size_t i = 0; i < 100; ++i) {
@ -399,8 +428,12 @@ TEST_F(segment_writer_tests, index_field) {
// test missing token_stream attributes (increment)
{
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info( irs::compression::lz4::type(), irs::compression::options{}, true );
};
irs::memory_directory dir;
auto writer = irs::segment_writer::make(dir, nullptr);
auto writer = irs::segment_writer::make(dir, column_info, nullptr);
irs::segment_writer::update_context ctx;
token_stream_t stream;
field_t field(stream);
@ -418,8 +451,12 @@ TEST_F(segment_writer_tests, index_field) {
// test missing token_stream attributes (term_attribute)
{
irs::column_info_provider_t column_info = [](const irs::string_ref&) {
return irs::column_info( irs::compression::lz4::type(), irs::compression::options{}, true );
};
irs::memory_directory dir;
auto writer = irs::segment_writer::make(dir, nullptr);
auto writer = irs::segment_writer::make(dir, column_info, nullptr);
irs::segment_writer::update_context ctx;
token_stream_t stream;
field_t field(stream);

View File

@ -30,12 +30,13 @@
#include "store/memory_directory.hpp"
#include "utils/bitvector.hpp"
#include "utils/bytes_utils.hpp"
#include "utils/lz4compression.hpp"
#include "utils/type_limits.hpp"
// FIXME check gaps && deleted docs
TEST(sorted_column_test, ctor) {
irs::sorted_column col;
irs::sorted_column col({ irs::compression::lz4::type(), {}, false });
ASSERT_TRUE(col.empty());
ASSERT_EQ(0, col.size());
ASSERT_EQ(0, col.memory_active());
@ -43,7 +44,7 @@ TEST(sorted_column_test, ctor) {
}
TEST(sorted_column_test, flush_empty) {
irs::sorted_column col;
irs::sorted_column col({ irs::compression::lz4::type(), {}, false });
ASSERT_TRUE(col.empty());
ASSERT_EQ(0, col.size());
ASSERT_EQ(0, col.memory_active());
@ -159,7 +160,7 @@ TEST(sorted_column_test, insert_duplicates) {
writer->prepare(dir, segment);
irs::sorted_column col;
irs::sorted_column col({ irs::compression::raw::type(), {}, true });
ASSERT_TRUE(col.empty());
ASSERT_EQ(0, col.size());
ASSERT_EQ(0, col.memory_active());
@ -270,7 +271,7 @@ TEST(sorted_column_test, sort) {
writer->prepare(dir, segment);
irs::sorted_column col;
irs::sorted_column col({ irs::compression::lz4::type(), {}, true });
ASSERT_TRUE(col.empty());
ASSERT_EQ(0, col.size());
ASSERT_EQ(0, col.memory_active());

View File

@ -143,7 +143,7 @@ TEST_P(sorted_index_test_case, simple_sequential) {
// check sorted column
{
std::vector<irs::bytes_output> column_payload;
std::vector<irs::bstring> column_payload;
gen.reset();
while (auto* doc = gen.next()) {
@ -151,14 +151,15 @@ TEST_P(sorted_index_test_case, simple_sequential) {
ASSERT_NE(nullptr, field);
column_payload.emplace_back();
field->write(column_payload.back());
irs::bytes_output out(column_payload.back());
field->write(out);
}
ASSERT_EQ(column_payload.size(), segment.docs_count());
std::sort(
column_payload.begin(), column_payload.end(),
[&less](const irs::bytes_output& lhs, const irs::bytes_output& rhs) {
[&less](const irs::bstring& lhs, const irs::bstring& rhs) {
return less(lhs, rhs);
});
@ -189,8 +190,8 @@ TEST_P(sorted_index_test_case, simple_sequential) {
for (auto& column_name : column_names) {
struct doc {
irs::doc_id_t id{ irs::doc_limits::eof() };
irs::bytes_output order;
irs::bytes_output value;
irs::bstring order;
irs::bstring value;
};
std::vector<doc> column_docs;
@ -207,11 +208,13 @@ TEST_P(sorted_index_test_case, simple_sequential) {
auto* column = doc->stored.get(column_name);
auto& value = column_docs.back();
sorted->write(value.order);
irs::bytes_output order_out(value.order);
sorted->write(order_out);
if (column) {
value.id = id++;
column->write(value.value);
irs::bytes_output value_out(value.value);
column->write(value_out);
}
}
@ -325,21 +328,22 @@ TEST_P(sorted_index_test_case, simple_sequential_consolidate) {
// check sorted column
{
segment_gen.reset();
std::vector<irs::bytes_output> column_payload;
std::vector<irs::bstring> column_payload;
while (auto* doc = segment_gen.next()) {
auto* field = doc->stored.get(sorted_column);
ASSERT_NE(nullptr, field);
column_payload.emplace_back();
field->write(column_payload.back());
irs::bytes_output out(column_payload.back());
field->write(out);
}
ASSERT_EQ(column_payload.size(), segment.docs_count());
std::sort(
column_payload.begin(), column_payload.end(),
[&less](const irs::bytes_output& lhs, const irs::bytes_output& rhs) {
[&less](const irs::bstring& lhs, const irs::bstring& rhs) {
return less(lhs, rhs);
});
@ -370,8 +374,8 @@ TEST_P(sorted_index_test_case, simple_sequential_consolidate) {
for (auto& column_name : column_names) {
struct doc {
irs::doc_id_t id{ irs::doc_limits::eof() };
irs::bytes_output order;
irs::bytes_output value;
irs::bstring order;
irs::bstring value;
};
std::vector<doc> column_docs;
@ -388,11 +392,13 @@ TEST_P(sorted_index_test_case, simple_sequential_consolidate) {
auto* column = doc->stored.get(column_name);
auto& value = column_docs.back();
sorted->write(value.order);
irs::bytes_output order_out(value.order);
sorted->write(order_out);
if (column) {
value.id = id++;
column->write(value.value);
irs::bytes_output value_out(value.value);
column->write(value_out);
}
}
@ -475,21 +481,22 @@ TEST_P(sorted_index_test_case, simple_sequential_consolidate) {
// check sorted column
{
gen.reset();
std::vector<irs::bytes_output> column_payload;
std::vector<irs::bstring> column_payload;
while (auto* doc = gen.next()) {
auto* field = doc->stored.get(sorted_column);
ASSERT_NE(nullptr, field);
column_payload.emplace_back();
field->write(column_payload.back());
irs::bytes_output out(column_payload.back());
field->write(out);
}
ASSERT_EQ(column_payload.size(), segment.docs_count());
std::sort(
column_payload.begin(), column_payload.end(),
[&less](const irs::bytes_output& lhs, const irs::bytes_output& rhs) {
[&less](const irs::bstring& lhs, const irs::bstring& rhs) {
return less(lhs, rhs);
});
@ -520,8 +527,8 @@ TEST_P(sorted_index_test_case, simple_sequential_consolidate) {
for (auto& column_name : column_names) {
struct doc {
irs::doc_id_t id{ irs::doc_limits::eof() };
irs::bytes_output order;
irs::bytes_output value;
irs::bstring order;
irs::bstring value;
};
std::vector<doc> column_docs;
@ -538,11 +545,13 @@ TEST_P(sorted_index_test_case, simple_sequential_consolidate) {
auto* column = doc->stored.get(column_name);
auto& value = column_docs.back();
sorted->write(value.order);
irs::bytes_output order_out(value.order);
sorted->write(order_out);
if (column) {
value.id = id++;
column->write(value.value);
irs::bytes_output value_out(value.value);
column->write(value_out);
}
}
@ -630,7 +639,7 @@ TEST_P(sorted_index_test_case, simple_sequential_already_sorted) {
// check sorted column
{
std::vector<irs::bytes_output> column_payload;
std::vector<irs::bstring> column_payload;
gen.reset();
while (auto* doc = gen.next()) {
@ -638,14 +647,15 @@ TEST_P(sorted_index_test_case, simple_sequential_already_sorted) {
ASSERT_NE(nullptr, field);
column_payload.emplace_back();
field->write(column_payload.back());
irs::bytes_output out(column_payload.back());
field->write(out);
}
ASSERT_EQ(column_payload.size(), segment.docs_count());
std::sort(
column_payload.begin(), column_payload.end(),
[&less](const irs::bytes_output& lhs, const irs::bytes_output& rhs) {
[&less](const irs::bstring& lhs, const irs::bstring& rhs) {
return less(lhs, rhs);
});
@ -676,8 +686,8 @@ TEST_P(sorted_index_test_case, simple_sequential_already_sorted) {
for (auto& column_name : column_names) {
struct doc {
irs::doc_id_t id{ irs::doc_limits::eof() };
irs::bytes_output order;
irs::bytes_output value;
irs::bstring order;
irs::bstring value;
};
std::vector<doc> column_docs;
@ -694,11 +704,13 @@ TEST_P(sorted_index_test_case, simple_sequential_already_sorted) {
auto* column = doc->stored.get(column_name);
auto& value = column_docs.back();
sorted->write(value.order);
irs::bytes_output order_out(value.order);
sorted->write(order_out);
if (column) {
value.id = id++;
column->write(value.value);
irs::bytes_output value_out(value.value);
column->write(value_out);
}
}
@ -1354,7 +1366,7 @@ INSTANTIATE_TEST_CASE_P(
&tests::fs_directory,
&tests::mmap_directory
),
::testing::Values("1_1")
::testing::Values("1_1", "1_2")
),
tests::to_string
);

View File

@ -25,7 +25,7 @@
#include "store/store_utils.hpp"
#include "utils/bytes_utils.hpp"
using namespace iresearch;
using namespace irs;
namespace tests {
namespace detail {
@ -86,7 +86,8 @@ void packed_read_write_core(const std::vector<uint32_t> &src) {
assert(blocks);
// compress data to stream
iresearch::bytes_output out;
irs::bstring buf;
irs::bytes_output out(buf);
// write first n compressed blocks
{
@ -99,7 +100,7 @@ void packed_read_write_core(const std::vector<uint32_t> &src) {
// decompress data from stream
std::vector<uint32_t> read(src.size());
iresearch::bytes_ref_input in(out);
irs::bytes_ref_input in(buf);
// read first n compressed blocks
{
@ -113,21 +114,22 @@ void packed_read_write_core(const std::vector<uint32_t> &src) {
ASSERT_EQ(src, read);
}
using iresearch::data_input;
using iresearch::data_output;
using irs::data_input;
using irs::data_output;
template<typename T>
void read_write_core(
const std::vector<T>& src,
const std::function<T(data_input&)>& reader,
const std::function<void(data_output&,T)>& writer) {
iresearch::bytes_output out;
irs::bstring buf;
irs::bytes_output out(buf);
std::for_each(
src.begin(), src.end(),
[&out,&writer](const T& v){ writer(out, v); }
);
iresearch::bytes_input in( out);
irs::bytes_input in(buf);
std::for_each(
src.begin(), src.end(),
[&in,&reader](const T& v){ ASSERT_EQ(v, reader(in)); }
@ -139,11 +141,12 @@ template<typename T>
void read_write_core_nan(
const std::function<T(data_input&)>& reader,
const std::function<void(data_output&,T)>& writer) {
iresearch::bytes_output out;
irs::bstring buf;
irs::bytes_output out(buf);
writer(out, std::numeric_limits<T>::quiet_NaN());
writer(out, std::numeric_limits<T>::signaling_NaN());
iresearch::bytes_input in(out);
irs::bytes_input in(buf);
ASSERT_TRUE(std::isnan(reader(in)));
ASSERT_TRUE(std::isnan(reader(in)));
}
@ -153,21 +156,23 @@ void read_write_core_container(
const Cont& src,
const std::function<Cont(data_input&)>& reader,
const std::function<data_output&(data_output&,const Cont&)>& writer) {
iresearch::bytes_output out;
irs::bstring buf;
irs::bytes_output out(buf);
writer(out, src);
iresearch::bytes_input in(out);
irs::bytes_input in(buf);
const Cont read = reader( in);
ASSERT_EQ(src, read);
}
void read_write_block(const std::vector<uint32_t>& source, std::vector<uint32_t>& enc_dec_buf) {
// write block
iresearch::bytes_output out;
irs::bstring buf;
irs::bytes_output out(buf);
irs::encode::bitpack::write_block(out, &source[0], source.size(), &enc_dec_buf[0]);
// read block
iresearch::bytes_input in(out);
irs::bytes_input in(buf);
std::vector<uint32_t> read(source.size());
irs::encode::bitpack::read_block(in, source.size(), &enc_dec_buf[0], read.data());
@ -271,14 +276,14 @@ TEST(store_utils_tests, zvfloat_read_write) {
-21532764.631984f,
-9847.23427f
},
iresearch::read_zvfloat,
iresearch::write_zvfloat
irs::read_zvfloat,
irs::write_zvfloat
);
/* NaN case */
tests::detail::read_write_core_nan<float_t>(
iresearch::read_zvfloat,
iresearch::write_zvfloat
irs::read_zvfloat,
irs::write_zvfloat
);
}
@ -300,14 +305,14 @@ TEST(store_utils_tests, zvdouble_read_write) {
-19274316.123,
-98743098097.34352532
},
iresearch::read_zvdouble,
iresearch::write_zvdouble
irs::read_zvdouble,
irs::write_zvdouble
);
/* NaN case */
tests::detail::read_write_core_nan<double_t>(
iresearch::read_zvdouble,
iresearch::write_zvdouble
irs::read_zvdouble,
irs::write_zvdouble
);
}
@ -322,8 +327,8 @@ TEST( store_utils_tests, size_read_write) {
size_t(12371792192121),
size_t(9719496156)
},
iresearch::read_size,
iresearch::write_size);
irs::read_size,
irs::write_size);
}
TEST(store_utils_tests, zvint_read_write) {
@ -337,8 +342,8 @@ TEST(store_utils_tests, zvint_read_write) {
-911728376,
-10725017
},
iresearch::read_zvint,
iresearch::write_zvint);
irs::read_zvint,
irs::write_zvint);
}
TEST(store_utils_tests, zvlong_read_write) {
@ -352,8 +357,8 @@ TEST(store_utils_tests, zvlong_read_write) {
-9184236868362391274LL,
-91724962191921979LL
},
iresearch::read_zvlong,
iresearch::write_zvlong);
irs::read_zvlong,
irs::write_zvlong);
}
TEST(store_utils_tests, std_string_read_write) {
@ -366,21 +371,21 @@ TEST(store_utils_tests, std_string_read_write) {
std::string("lazy p1230142hlds"),
std::string("dob sdofjasoufdsa")
},
iresearch::read_string<std::string>,
iresearch::write_string<std::string>);
irs::read_string<std::string>,
irs::write_string<std::string>);
}
TEST(store_utils_tests, bytes_read_write) {
tests::detail::read_write_core<bstring>(
{
bstring(),
bstring(iresearch::ref_cast<byte_type>(iresearch::string_ref("qalsdflsajfd"))),
bstring(iresearch::ref_cast<byte_type>(iresearch::string_ref("jfdldsflaflj"))),
bstring(iresearch::ref_cast<byte_type>(iresearch::string_ref("102174174010"))),
bstring(iresearch::ref_cast<byte_type>(iresearch::string_ref("0182ljdskfaof")))
bstring(irs::ref_cast<byte_type>(irs::string_ref("qalsdflsajfd"))),
bstring(irs::ref_cast<byte_type>(irs::string_ref("jfdldsflaflj"))),
bstring(irs::ref_cast<byte_type>(irs::string_ref("102174174010"))),
bstring(irs::ref_cast<byte_type>(irs::string_ref("0182ljdskfaof")))
},
iresearch::read_string<bstring>,
iresearch::write_string<bstring>);
irs::read_string<bstring>,
irs::write_string<bstring>);
}
TEST( store_utils_tests, string_vector_read_write) {
@ -392,10 +397,11 @@ TEST( store_utils_tests, string_vector_read_write) {
"lazy", "dog", "mustard"
};
iresearch::bytes_output out;
irs::bstring buf;
irs::bytes_output out(buf);
write_strings(out, src);
iresearch::bytes_input in(out);
irs::bytes_input in(buf);
const container_t readed = read_strings<container_t>(in);
ASSERT_EQ(src, readed);
@ -674,18 +680,19 @@ TEST(store_utils_tests, avg_encode_block_read_write) {
std::vector<uint64_t> buf; // temporary buffer for bit packing
buf.resize(values.size());
irs::bytes_output out;
irs::bstring out_buf;
irs::bytes_output out(out_buf);
irs::encode::avg::write_block(
out, stats.first, stats.second, avg_encoded.data(), avg_encoded.size(), buf.data()
);
ASSERT_EQ(
irs::bytes_io<uint64_t>::vsize(step) + irs::bytes_io<uint64_t>::vsize(step) + irs::bytes_io<uint32_t>::vsize(irs::encode::bitpack::ALL_EQUAL) + irs::bytes_io<uint64_t>::vsize(0), // base + avg + bits + single value
out.size()
out_buf.size()
);
{
irs::bytes_input in(out);
irs::bytes_input in(out_buf);
const uint64_t base = in.read_vlong();
const uint64_t avg= in.read_vlong();
const uint64_t bits = in.read_vint();
@ -695,20 +702,20 @@ TEST(store_utils_tests, avg_encode_block_read_write) {
}
{
irs::bytes_input in(out);
irs::bytes_input in(out_buf);
ASSERT_TRUE(irs::encode::avg::check_block_rl64(in, step));
}
{
uint64_t base, avg;
irs::bytes_input in(out);
irs::bytes_input in(out_buf);
ASSERT_TRUE(irs::encode::avg::read_block_rl64(in, base, avg));
ASSERT_EQ(step, base);
ASSERT_EQ(step, avg);
}
{
irs::bytes_input in(out);
irs::bytes_input in(out_buf);
const uint64_t base = in.read_vlong();
const uint64_t avg = in.read_vlong();

View File

@ -0,0 +1,194 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2019 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Andrey Abramov
////////////////////////////////////////////////////////////////////////////////
#include "tests_shared.hpp"
#include "store/store_utils.hpp"
#include "utils/lz4compression.hpp"
#include "utils/delta_compression.hpp"
#include <numeric>
#include <random>
NS_LOCAL
struct dummy_compressor final : irs::compression::compressor {
virtual irs::bytes_ref compress(irs::byte_type* in, size_t size, irs::bstring& /*buf*/) {
return irs::bytes_ref::NIL;
}
virtual void flush(data_output&) { }
};
struct dummy_decompressor final : irs::compression::decompressor {
virtual irs::bytes_ref decompress(
irs::byte_type* src, size_t src_size,
irs::byte_type* dst, size_t dst_size) {
return irs::bytes_ref::NIL;
}
virtual bool prepare(data_input&) { return true; }
};
NS_END
TEST(compression_test, registration) {
const irs::compression::type_id type("dummy_compression");
// check absent
{
ASSERT_FALSE(irs::compression::exists(type.name()));
ASSERT_EQ(nullptr, irs::compression::get_compressor(type.name(), {}));
ASSERT_EQ(nullptr, irs::compression::get_decompressor(type.name(), {}));
auto visitor = [&type](const irs::string_ref& name) { return name != type.name(); };
ASSERT_TRUE(irs::compression::visit(visitor));
}
static size_t calls_count;
irs::compression::compression_registrar initial(
type,
[](const irs::compression::options&) -> irs::compression::compressor::ptr {
++calls_count;
return std::make_shared<dummy_compressor>();
},
[]() -> irs::compression::decompressor::ptr {
++calls_count;
return std::make_shared<dummy_decompressor>();
}
);
ASSERT_TRUE(initial); // registered
// check registered
{
ASSERT_TRUE(irs::compression::exists(type.name()));
ASSERT_EQ(0, calls_count);
ASSERT_NE(nullptr, irs::compression::get_compressor(type.name(), {}));
ASSERT_EQ(1, calls_count);
ASSERT_NE(nullptr, irs::compression::get_decompressor(type.name(), {}));
ASSERT_EQ(2, calls_count);
auto visitor = [&type](const irs::string_ref& name) { return name != type.name(); };
ASSERT_FALSE(irs::compression::visit(visitor));
}
irs::compression::compression_registrar duplicate(
type,
[](const irs::compression::options&) -> irs::compression::compressor::ptr { return nullptr; },
[]() -> irs::compression::decompressor::ptr { return nullptr; }
);
ASSERT_FALSE(duplicate); // not registered
// check registered
{
ASSERT_TRUE(irs::compression::exists(type.name()));
ASSERT_EQ(2, calls_count);
ASSERT_NE(nullptr, irs::compression::get_compressor(type.name(), {}));
ASSERT_EQ(3, calls_count);
ASSERT_NE(nullptr, irs::compression::get_decompressor(type.name(), {}));
ASSERT_EQ(4, calls_count);
auto visitor = [&type](const irs::string_ref& name) { return name != type.name(); };
ASSERT_FALSE(irs::compression::visit(visitor));
}
}
TEST(compression_test, lz4) {
using namespace iresearch;
std::vector<size_t> data(2047, 0);
std::random_device rnd_device;
std::mt19937 mersenne_engine {rnd_device()};
std::uniform_int_distribution<size_t> dist {1, 2142152};
auto generator = [&dist, &mersenne_engine](){ return dist(mersenne_engine); };
compression::lz4::lz4decompressor decompressor;
compression::lz4::lz4compressor compressor;
ASSERT_EQ(0, compressor.acceleration());
for (size_t i = 0; i < 10; ++i) {
std::generate(data.begin(), data.end(), generator);
bstring compression_buf;
bstring data_buf(data.size()*sizeof(size_t), 0);
std::memcpy(&data_buf[0], data.data(), data_buf.size());
ASSERT_EQ(
bytes_ref(reinterpret_cast<const byte_type*>(data.data()), data.size()*sizeof(size_t)),
bytes_ref(data_buf)
);
const auto compressed = compressor.compress(&data_buf[0], data_buf.size(), compression_buf);
ASSERT_EQ(compressed, bytes_ref(compression_buf.c_str(), compressed.size()));
// lz4 doesn't modify data_buf
ASSERT_EQ(
bytes_ref(reinterpret_cast<const byte_type*>(data.data()), data.size()*sizeof(size_t)),
bytes_ref(data_buf)
);
bstring decompression_buf(data_buf.size(), 0); // ensure we have enough space in buffer
const auto decompressed = decompressor.decompress(&compression_buf[0], compressed.size(),
&decompression_buf[0], decompression_buf.size());
ASSERT_EQ(data_buf, decompression_buf);
ASSERT_EQ(data_buf, decompressed);
}
}
TEST(compression_test, delta) {
using namespace iresearch;
std::vector<uint64_t> data(2047, 0);
std::random_device rnd_device;
std::mt19937 mersenne_engine {rnd_device()};
std::uniform_int_distribution<uint64_t> dist {1, 52};
auto generator = [&dist, &mersenne_engine](){ return dist(mersenne_engine); };
compression::delta_decompressor decompressor;
compression::delta_compressor compressor;
for (size_t i = 0; i < 10; ++i) {
std::generate(data.begin(), data.end(), generator);
bstring compression_buf;
bstring data_buf(data.size()*sizeof(size_t), 0);
std::memcpy(&data_buf[0], data.data(), data_buf.size());
ASSERT_EQ(
bytes_ref(reinterpret_cast<const byte_type*>(data.data()), data.size()*sizeof(size_t)),
bytes_ref(data_buf)
);
const auto compressed = compressor.compress(&data_buf[0], data_buf.size(), compression_buf);
ASSERT_EQ(compressed, bytes_ref(compression_buf.c_str(), compressed.size()));
bstring decompression_buf(data_buf.size(), 0); // ensure we have enough space in buffer
const auto decompressed = decompressor.decompress(&compression_buf[0], compressed.size(),
&decompression_buf[0], decompression_buf.size());
ASSERT_EQ(
bytes_ref(reinterpret_cast<const byte_type*>(data.data()), data.size()*sizeof(size_t)),
bytes_ref(decompression_buf)
);
ASSERT_EQ(
bytes_ref(reinterpret_cast<const byte_type*>(data.data()), data.size()*sizeof(size_t)),
bytes_ref(decompressed)
);
}
}

View File

@ -21,10 +21,6 @@
/// @author Vasiliy Nabatchikov
////////////////////////////////////////////////////////////////////////////////
#include "store/mmap_directory.hpp"
#include "store/store_utils.hpp"
#include "utils/singleton.hpp"
#include "IResearchCommon.h"
#include "IResearchFeature.h"
#include "IResearchLinkHelper.h"
@ -49,6 +45,13 @@
#include "IResearchLink.h"
#include "index/column_info.hpp"
#include "store/mmap_directory.hpp"
#include "store/store_utils.hpp"
#include "utils/lz4compression.hpp"
#include "utils/encryption.hpp"
#include "utils/singleton.hpp"
using namespace std::literals;
namespace {
@ -56,7 +59,7 @@ namespace {
////////////////////////////////////////////////////////////////////////////////
/// @brief the storage format used with IResearch writers
////////////////////////////////////////////////////////////////////////////////
const irs::string_ref IRESEARCH_STORE_FORMAT("1_1");
const irs::string_ref IRESEARCH_STORE_FORMAT("1_2");
typedef irs::async_utils::read_write_mutex::read_mutex ReadMutex;
typedef irs::async_utils::read_write_mutex::write_mutex WriteMutex;
@ -1090,10 +1093,24 @@ Result IResearchLink::initDataStore(InitCallback const& initCallback, bool sorte
_lastCommittedTick = _dataStore._recoveryTick;
_flushSubscription.reset(new IResearchFlushSubscription(_dataStore._recoveryTick));
irs::index_writer::init_options options;
options.lock_repository = false; // do not lock index, ArangoDB has its own lock
options.comparator = sorted ? &_comparer : nullptr; // set comparator if requested
// setup columnstore compression/encryption if requested by storage engine
auto const encrypt = (nullptr != irs::get_encryption(_dataStore._directory->attributes()));
if (encrypt) {
options.column_info = [](const irs::string_ref& name) -> irs::column_info {
// do not waste resources to encrypt primary key column
return { irs::compression::lz4::type(), {}, DocumentPrimaryKey::PK() != name };
};
} else {
options.column_info = [](const irs::string_ref& /*name*/) -> irs::column_info {
return { irs::compression::lz4::type(), {}, false };
};
}
auto openFlags = irs::OM_APPEND;
if (!_dataStore._reader) {
openFlags |= irs::OM_CREATE;