1
0
Fork 0

update codebase (#7125)

This commit is contained in:
Andrey Abramov 2018-10-29 16:59:39 +03:00 committed by GitHub
parent c7c838ed30
commit d30cf315fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 311 additions and 82 deletions

View File

@ -60,13 +60,19 @@ struct IRESEARCH_API error_base: std::exception {
// -----------------------------------------------------------------------------
// detailed_error_base
// -----------------------------------------------------------------------------
class IRESEARCH_API detailed_error_base: public error_base {
class IRESEARCH_API detailed_error_base : public error_base {
public:
explicit detailed_error_base(const char* error): error_(error) {}
explicit detailed_error_base(const char* error)
: error_(error) {
}
explicit detailed_error_base(std::string&& error) NOEXCEPT
: error_(std::move(error)) {
}
virtual const char* what() const NOEXCEPT final { return error_.c_str(); }
virtual const char* what() const NOEXCEPT override final {
return error_.c_str();
}
private:
IRESEARCH_API_PRIVATE_VARIABLES_BEGIN
@ -190,4 +196,4 @@ struct IRESEARCH_API illegal_state: error_base {
NS_END
#endif
#endif

View File

@ -158,7 +158,11 @@ using namespace iresearch;
class features {
public:
enum Mask : uint32_t {
POS = 3, POS_OFFS = 7, POS_PAY = 11, POS_OFFS_PAY = 15
DOCS = 0,
FREQ = 1,
POS = 2,
OFFS = 4,
PAY = 8
};
features() = default;
@ -188,10 +192,20 @@ class features {
bool payload() const NOEXCEPT { return irs::check_bit<3>(mask_); }
operator Mask() const NOEXCEPT { return static_cast<Mask>(mask_); }
bool any(Mask mask) const NOEXCEPT {
return Mask(0) != (mask_ & mask);
}
bool all(Mask mask) const NOEXCEPT {
return mask != (mask_ & mask);
}
private:
irs::byte_type mask_{};
}; // features
ENABLE_BITMASK_ENUM(features::Mask);
// ----------------------------------------------------------------------------
// --SECTION-- forward declarations
// ----------------------------------------------------------------------------
@ -492,7 +506,7 @@ void postings_writer::prepare(index_output& out, const iresearch::flush_state& s
std::memset(doc.freqs.get(), 0, sizeof(uint32_t) * BLOCK_SIZE);
}
if (features.check< position >()) {
if (features.check<position>()) {
// prepare proximity stream
if (!pos_) {
pos_ = memory::make_unique< pos_stream >();
@ -501,7 +515,7 @@ void postings_writer::prepare(index_output& out, const iresearch::flush_state& s
pos_->reset();
prepare_output(name, pos_->out, state, POS_EXT, POS_FORMAT_NAME, FORMAT_MAX);
if (features.check< payload >() || features.check< offset >()) {
if (features.check<payload>() || features.check<offset>()) {
// prepare payload stream
if (!pay_) {
pay_ = memory::make_unique<pay_stream>();
@ -629,11 +643,11 @@ void postings_writer::begin_term() {
doc.start = doc.out->file_pointer();
std::fill_n(doc.skip_ptr, MAX_SKIP_LEVELS, doc.start);
if (features_.position()) {
assert(pos_);
assert(pos_ && pos_->out);
pos_->start = pos_->out->file_pointer();
std::fill_n(pos_->skip_ptr, MAX_SKIP_LEVELS, pos_->start);
if (features_.payload() || features_.offset()) {
assert(pay_);
if (features_.any(features::OFFS | features::PAY)) {
assert(pay_ && pay_->out);
pay_->start = pay_->out->file_pointer();
std::fill_n(pay_->skip_ptr, MAX_SKIP_LEVELS, pay_->start);
}
@ -674,7 +688,7 @@ void postings_writer::begin_doc(doc_id_t id, const frequency* freq) {
void postings_writer::add_position(uint32_t pos, const offset* offs, const payload* pay) {
assert(!offs || offs->start <= offs->end);
assert(pos_); /* at least positions stream should be created */
assert(features_.position() && pos_ && pos_->out); /* at least positions stream should be created */
pos_->pos(pos - pos_->last);
if (pay) pay_->payload(pos_->size, pay->value);
@ -686,26 +700,29 @@ void postings_writer::add_position(uint32_t pos, const offset* offs, const paylo
pos_->flush(buf);
if (pay) {
assert(features_.payload() && pay_ && pay_->out);
pay_->flush_payload(buf);
}
if (offs) {
assert(features_.payload() && pay_ && pay_->out);
pay_->flush_offsets(buf);
}
}
}
void postings_writer::end_doc() {
if ( doc.full() ) {
if (doc.full()) {
doc.block_last = doc.last;
doc.end = doc.out->file_pointer();
if ( pos_ ) {
assert( pos_ );
if (features_.position()) {
assert(pos_ && pos_->out);
pos_->end = pos_->out->file_pointer();
// documents stream is full, but positions stream is not
// save number of positions to skip before the next block
pos_->block_last = pos_->size;
if ( pay_ ) {
if (features_.any(features::OFFS | features::PAY)) {
assert(pay_ && pay_->out);
pay_->end = pay_->out->file_pointer();
pay_->block_last = pay_->pay_buf_.size();
}
@ -751,6 +768,8 @@ void postings_writer::end_term(version10::term_meta& meta, const uint32_t* tfreq
/* write remaining position using
* variable length encoding */
if (features_.position()) {
assert(pos_ && pos_->out);
if (meta.freq > BLOCK_SIZE) {
meta.pos_end = pos_->out->file_pointer() - pos_->start;
}
@ -763,6 +782,8 @@ void postings_writer::end_term(version10::term_meta& meta, const uint32_t* tfreq
for (uint32_t i = 0; i < pos_->size; ++i) {
const uint32_t pos_delta = pos_->buf[i];
if (features_.payload()) {
assert(pay_ && pay_->out);
const uint32_t size = pay_->pay_sizes[i];
if (last_pay_size != size) {
last_pay_size = size;
@ -781,6 +802,8 @@ void postings_writer::end_term(version10::term_meta& meta, const uint32_t* tfreq
}
if (features_.offset()) {
assert(pay_ && pay_->out);
const uint32_t pay_offs_delta = pay_->offs_start_buf[i];
const uint32_t len = pay_->offs_len_buf[i];
if (len == last_offs_len) {
@ -794,6 +817,7 @@ void postings_writer::end_term(version10::term_meta& meta, const uint32_t* tfreq
}
if (features_.payload()) {
assert(pay_ && pay_->out);
pay_->pay_buf_.clear();
}
}
@ -850,8 +874,8 @@ void postings_writer::write_skip(size_t level, index_output& out) {
pos_->skip_ptr[level] = pos_ptr;
if (features_.payload() || features_.offset()) {
assert(pay_);
if (features_.any(features::OFFS | features::PAY)) {
assert(pay_ && pay_->out);
if (features_.payload()) {
out.write_vint(static_cast<uint32_t>(pay_->block_last));
@ -886,7 +910,7 @@ void postings_writer::encode(
if (type_limits<type_t::address_t>::valid(meta.pos_end)) {
out.write_vlong(meta.pos_end);
}
if (features_.payload() || features_.offset()) {
if (features_.any(features::OFFS | features::PAY)) {
out.write_vlong(meta.pay_start - last_state.pay_start);
}
}
@ -5053,6 +5077,12 @@ void postings_reader::decode(
}
}
#if defined(_MSC_VER)
#elif defined (__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wswitch"
#endif
irs::doc_iterator::ptr postings_reader::iterator(
const flags& field,
const attribute_view& attrs,
@ -5064,17 +5094,24 @@ irs::doc_iterator::ptr postings_reader::iterator(
const auto enabled = features & req;
doc_iterator::ptr it;
switch(enabled) {
case features::POS_OFFS_PAY:
// MSVC 2013 doesn't support constexpr, can't use
// 'operator|' in the following switch statement
CONSTEXPR const auto FREQ_POS_OFFS_PAY = features::FREQ | features::POS | features::OFFS | features::PAY;
CONSTEXPR const auto FREQ_POS_OFFS = features::FREQ | features::POS | features::OFFS;
CONSTEXPR const auto FREQ_POS_PAY = features::FREQ | features::POS | features::PAY;
CONSTEXPR const auto FREQ_POS = features::FREQ | features::POS;
switch (enabled) {
case FREQ_POS_OFFS_PAY:
it = doc_iterator::make<pos_doc_iterator<offs_pay_iterator>>();
break;
case features::POS_OFFS:
case FREQ_POS_OFFS:
it = doc_iterator::make<pos_doc_iterator<offs_iterator>>();
break;
case features::POS_PAY:
case FREQ_POS_PAY:
it = doc_iterator::make<pos_doc_iterator<pay_iterator>>();
break;
case features::POS:
case FREQ_POS:
it = doc_iterator::make<pos_doc_iterator<pos_iterator>>();
break;
default:
@ -5089,6 +5126,11 @@ irs::doc_iterator::ptr postings_reader::iterator(
return IMPLICIT_MOVE_WORKAROUND(it);
}
#if defined(_MSC_VER)
#elif defined (__GNUC__)
#pragma GCC diagnostic pop
#endif
// actual implementation
class format : public irs::version10::format {
public:
@ -5231,4 +5273,4 @@ NS_END // root
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// -----------------------------------------------------------------------------

View File

@ -2156,4 +2156,4 @@ NS_END
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// -----------------------------------------------------------------------------

View File

@ -26,6 +26,10 @@
#include "shared.hpp"
#if defined(_MSC_VER) && _MSC_VER < 1900 // before MSVC2015
#define snprintf _snprintf
#endif
NS_ROOT
NS_BEGIN(string_utils)
@ -49,8 +53,7 @@ inline std::basic_string<T>& oversize(
////////////////////////////////////////////////////////////////////////////////
template <typename... Args>
inline int to_string(std::string& buf, const char* format, Args&&... args) {
char ch;
auto result = snprintf(&ch, 0, format, std::forward<Args>(args)...);
auto result = snprintf(nullptr, 0, format, std::forward<Args>(args)...); // MSVC requires 'nullptr' buffer and '0' size to get expected size
if (result <= 0) {
return result;
@ -63,7 +66,7 @@ inline int to_string(std::string& buf, const char* format, Args&&... args) {
try {
result = snprintf(&buf[start], result, format, std::forward<Args>(args)...);
buf.resize(start + std::max(0, result));
buf.resize(start + (std::max)(0, result));
} catch (...) {
buf.resize(start);
@ -84,7 +87,6 @@ inline std::string to_string(const char* format, Args&&... args) {
assert(result >= 0);
assert(size_t(result) == buf.size());
UNUSED(result);
return buf;
@ -93,4 +95,4 @@ inline std::string to_string(const char* format, Args&&... args) {
NS_END // string_utils
NS_END
#endif
#endif

View File

@ -21,11 +21,13 @@
/// @author Vasiliy Nabatchikov
////////////////////////////////////////////////////////////////////////////////
#include <mutex>
#include <unordered_map>
#include "singleton.hpp"
#include "timer_utils.hpp"
#include <mutex>
#include <unordered_map>
#include <map>
NS_LOCAL
class timer_states: public iresearch::singleton<timer_states> {
@ -138,9 +140,45 @@ bool visit(
return timer_states::instance().visit(visitor);
}
void flush_stats(std::ostream &out) {
std::map<std::string, std::pair<size_t, size_t>> ordered_stats;
iresearch::timer_utils::visit([&ordered_stats](const std::string& key, size_t count, size_t time)->bool {
std::string key_str = key;
#if defined(__GNUC__)
if (key_str.compare(0, strlen("virtual "), "virtual ") == 0) {
key_str = key_str.substr(strlen("virtual "));
}
size_t i;
if (std::string::npos != (i = key_str.find(' ')) && key_str.find('(') > i) {
key_str = key_str.substr(i + 1);
}
#elif defined(_MSC_VER)
size_t i;
if (std::string::npos != (i = key_str.find("__cdecl "))) {
key_str = key_str.substr(i + strlen("__cdecl "));
}
#endif
ordered_stats.emplace(key_str, std::make_pair(count, time));
return true;
});
for (auto& entry: ordered_stats) {
auto& key = entry.first;
auto& count = entry.second.first;
auto& time = entry.second.second;
out << key << "\tcalls:" << count << ",\ttime: " << time/1000 << " us,\tavg call: " << time/1000/(double)count << " us"<< std::endl;
}
}
NS_END // timer_utils
NS_END // NS_ROOT
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// -----------------------------------------------------------------------------

View File

@ -103,7 +103,12 @@ IRESEARCH_API bool visit(
const std::function<bool(const std::string& key, size_t count, size_t time_us)>& visitor
);
////////////////////////////////////////////////////////////////////////////////
/// @brief flush formatted timer stats to a specified stream
////////////////////////////////////////////////////////////////////////////////
IRESEARCH_API void flush_stats(std::ostream &out);
NS_END // timer_utils
NS_END // NS_ROOT
#endif
#endif

View File

@ -27,6 +27,7 @@
#include "store/memory_directory.hpp"
#include "store/fs_directory.hpp"
#include "utils/bit_packing.hpp"
#include "utils/type_limits.hpp"
#include "formats/formats_10.hpp"
#include "formats/formats_10_attributes.hpp"
#include "formats_test_case_base.hpp"
@ -323,6 +324,176 @@ class format_10_test_case : public tests::format_test_case_base {
}
}
void postings_writer_reuse() {
auto codec = std::dynamic_pointer_cast<const irs::version10::format>(get_codec());
ASSERT_NE(nullptr, codec);
auto writer = codec->get_postings_writer(false);
ASSERT_NE(nullptr, writer);
std::vector<irs::doc_id_t> docs0;
irs::doc_id_t i = (irs::type_limits<irs::type_t::doc_id_t>::min)();
for (; i < 1000; ++i) {
docs0.push_back(i);
}
// gap
for (i += 1000; i < 10000; ++i) {
docs0.push_back(i);
}
// write docs 'segment0' with all possible streams
{
const irs::field_meta field(
"field", irs::flags{ irs::frequency::type(), irs::position::type(), irs::offset::type(), irs::payload::type() }
);
irs::flush_state state;
state.dir = &dir();
state.doc_count = 10000;
state.fields_count = 1;
state.name = "0";
state.features = &field.features; // all possible features in segment
state.ver = IRESEARCH_VERSION;
auto out = dir().create(std::string("postings") + state.name.c_str());
ASSERT_FALSE(!out);
postings docs(docs0.begin(), docs0.end());
writer->prepare(*out, state);
writer->begin_field(*state.features);
writer->write(docs);
writer->end();
}
// write docs 'segment1' with position & offset
{
const irs::field_meta field(
"field", irs::flags{ irs::frequency::type(), irs::position::type(), irs::offset::type() }
);
irs::flush_state state;
state.dir = &dir();
state.doc_count = 10000;
state.fields_count = 1;
state.name = "1";
state.features = &field.features; // all possible features in segment
state.ver = IRESEARCH_VERSION;
auto out = dir().create(std::string("postings") + state.name.c_str());
ASSERT_FALSE(!out);
postings docs(docs0.begin(), docs0.end());
writer->prepare(*out, state);
writer->begin_field(*state.features);
writer->write(docs);
writer->end();
}
// write docs 'segment2' with position & payload
{
const irs::field_meta field(
"field", irs::flags{ irs::frequency::type(), irs::position::type(), irs::payload::type() }
);
irs::flush_state state;
state.dir = &dir();
state.doc_count = 10000;
state.fields_count = 1;
state.name = "2";
state.features = &field.features; // all possible features in segment
state.ver = IRESEARCH_VERSION;
auto out = dir().create(std::string("postings") + state.name.c_str());
ASSERT_FALSE(!out);
postings docs(docs0.begin(), docs0.end());
writer->prepare(*out, state);
writer->begin_field(*state.features);
writer->write(docs);
writer->end();
}
// write docs 'segment3' with position
{
const irs::field_meta field(
"field", irs::flags{ irs::frequency::type(), irs::position::type() }
);
irs::flush_state state;
state.dir = &dir();
state.doc_count = 10000;
state.fields_count = 1;
state.name = "3";
state.features = &field.features; // all possible features in segment
state.ver = IRESEARCH_VERSION;
auto out = dir().create(std::string("postings") + state.name.c_str());
ASSERT_FALSE(!out);
postings docs(docs0.begin(), docs0.end());
writer->prepare(*out, state);
writer->begin_field(*state.features);
writer->write(docs);
writer->end();
}
// write docs 'segment3' with frequency
{
const irs::field_meta field(
"field", irs::flags{ irs::frequency::type() }
);
irs::flush_state state;
state.dir = &dir();
state.doc_count = 10000;
state.fields_count = 1;
state.name = "4";
state.features = &field.features; // all possible features in segment
state.ver = IRESEARCH_VERSION;
auto out = dir().create(std::string("postings") + state.name.c_str());
ASSERT_FALSE(!out);
postings docs(docs0.begin(), docs0.end());
writer->prepare(*out, state);
writer->begin_field(*state.features);
writer->write(docs);
writer->end();
}
// writer segment without any attributes
{
const irs::field_meta field_no_features(
"field", irs::flags{}
);
irs::flush_state state;
state.dir = &dir();
state.doc_count = 10000;
state.fields_count = 1;
state.name = "5";
state.features = &field_no_features.features; // all possible features in segment
state.ver = IRESEARCH_VERSION;
auto out = dir().create(std::string("postings") + state.name.c_str());
ASSERT_FALSE(!out);
postings docs(docs0.begin(), docs0.end());
writer->prepare(*out, state);
writer->begin_field(*state.features);
writer->write(docs);
writer->end();
}
}
void assert_positions(const irs::doc_iterator& expected, const irs::doc_iterator& actual) {
auto& expected_pos = expected.attributes().get<irs::position>();
auto& actual_pos = actual.attributes().get<irs::position>();
@ -809,6 +980,10 @@ TEST_F(memory_format_10_test_case, document_mask_rw) {
document_mask_read_write();
}
TEST_F(memory_format_10_test_case, reuse_postings_writer) {
postings_writer_reuse();
}
// ----------------------------------------------------------------------------
// --SECTION-- fs_directory + iresearch_format_10
// ----------------------------------------------------------------------------

View File

@ -308,7 +308,7 @@ class index_test_case_base: public index_test_base {
std::ofstream out(path.native());
flush_timers(out);
irs::timer_utils::flush_stats(out);
out.close();
std::cout << "Path to timing log: " << path.utf8_absolute() << std::endl;
@ -696,4 +696,4 @@ TEST_F(mmap_index_profile_test, profile_bulk_index_multithread_update_batched_mt
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// -----------------------------------------------------------------------------

View File

@ -316,7 +316,7 @@ class transaction_store_tests: public test_base {
std::ofstream out(path.native());
flush_timers(out);
irs::timer_utils::flush_stats(out);
out.close();
std::cout << "Path to timing log: " << path.utf8_absolute() << std::endl;
@ -5713,4 +5713,4 @@ TEST_F(transaction_store_tests, read_reopen) {
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// -----------------------------------------------------------------------------

View File

@ -269,42 +269,6 @@ int test_base::initialize(int argc, char* argv[]) {
return RUN_ALL_TESTS();
}
void flush_timers(std::ostream &out) {
std::map<std::string, std::pair<size_t, size_t>> ordered_stats;
iresearch::timer_utils::visit([&ordered_stats](const std::string& key, size_t count, size_t time)->bool {
std::string key_str = key;
#if defined(__GNUC__)
if (key_str.compare(0, strlen("virtual "), "virtual ") == 0) {
key_str = key_str.substr(strlen("virtual "));
}
size_t i;
if (std::string::npos != (i = key_str.find(' ')) && key_str.find('(') > i) {
key_str = key_str.substr(i + 1);
}
#elif defined(_MSC_VER)
size_t i;
if (std::string::npos != (i = key_str.find("__cdecl "))) {
key_str = key_str.substr(i + strlen("__cdecl "));
}
#endif
ordered_stats.emplace(key_str, std::make_pair(count, time));
return true;
});
for (auto& entry: ordered_stats) {
auto& key = entry.first;
auto& count = entry.second.first;
auto& time = entry.second.second;
out << key << "\tcalls:" << count << ",\ttime: " << time/1000 << " us,\tavg call: " << time/1000/(double)count << " us"<< std::endl;
}
}
void stack_trace_handler(int sig) {
// reset to default handler
signal(sig, SIG_DFL);

View File

@ -88,7 +88,4 @@ class test_base : public ::testing::Test {
bool artifacts_;
}; // test_base
// writes formatted report to the specified output stream
void flush_timers(std::ostream& out);
#endif
#endif

View File

@ -393,7 +393,7 @@ TEST_F(memory_pool_allocator_test, profile_std_map) {
std::ofstream out(path.native());
flush_timers(out);
irs::timer_utils::flush_stats(out);
out.close();
std::cout << "Path to timing log: " << path.utf8_absolute() << std::endl;
}
@ -596,7 +596,7 @@ TEST_F(memory_pool_allocator_test, profile_std_multimap) {
std::ofstream out(path.native());
flush_timers(out);
irs::timer_utils::flush_stats(out);
out.close();
std::cout << "Path to timing log: " << path.utf8_absolute() << std::endl;
}
@ -743,7 +743,7 @@ TEST_F(memory_pool_allocator_test, profile_std_list) {
std::ofstream out(path.native());
flush_timers(out);
irs::timer_utils::flush_stats(out);
out.close();
std::cout << "Path to timing log: " << path.utf8_absolute() << std::endl;
}
@ -916,7 +916,7 @@ TEST_F(memory_pool_allocator_test, profile_std_set) {
std::ofstream out(path.native());
flush_timers(out);
irs::timer_utils::flush_stats(out);
out.close();
std::cout << "Path to timing log: " << path.utf8_absolute() << std::endl;
}