From 822500386196cc3bdfb948e6694e813d9a77775d Mon Sep 17 00:00:00 2001 From: Andrey Abramov Date: Wed, 31 Oct 2018 15:54:16 +0300 Subject: [PATCH] [3.4] arangosearch speedup removals (#7158) * Feature/arangosearch speedup removals (#7134) * speedup document removals and optimize data model * fix invalid constexpr * reduce number of heap allocations for removals (#7157) --- 3rdParty/iresearch/core/search/filter.hpp | 2 +- arangod/CMakeLists.txt | 1 + arangod/IResearch/IResearchDocument.cpp | 77 ++++----- arangod/IResearch/IResearchDocument.h | 40 +++-- arangod/IResearch/IResearchFilterFactory.cpp | 28 +--- arangod/IResearch/IResearchFilterFactory.h | 10 +- .../IResearch/IResearchPrimaryKeyFilter.cpp | 105 ++++++++++++ arangod/IResearch/IResearchPrimaryKeyFilter.h | 149 ++++++++++++++++++ arangod/IResearch/IResearchView.cpp | 120 ++++++++------ arangod/IResearch/IResearchView.h | 5 +- tests/IResearch/ExpressionFilter-test.cpp | 2 +- tests/IResearch/IResearchDocument-test.cpp | 48 ++---- 12 files changed, 415 insertions(+), 172 deletions(-) create mode 100644 arangod/IResearch/IResearchPrimaryKeyFilter.cpp create mode 100644 arangod/IResearch/IResearchPrimaryKeyFilter.h diff --git a/3rdParty/iresearch/core/search/filter.hpp b/3rdParty/iresearch/core/search/filter.hpp index c3dfc12564..f5b51d96d3 100644 --- a/3rdParty/iresearch/core/search/filter.hpp +++ b/3rdParty/iresearch/core/search/filter.hpp @@ -87,7 +87,7 @@ class IRESEARCH_API filter { ////////////////////////////////////////////////////////////////////////////// class IRESEARCH_API prepared: public util::attribute_store_provider { public: - DECLARE_SHARED_PTR(prepared); + DECLARE_SHARED_PTR(const prepared); DEFINE_FACTORY_INLINE(prepared); static prepared::ptr empty(); diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index 564fc677c5..9943aabcb9 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -73,6 +73,7 @@ if (USE_IRESEARCH) IResearch/IResearchViewMeta.cpp IResearch/IResearchViewMeta.h IResearch/IResearchFeature.cpp IResearch/IResearchFeature.h IResearch/IResearchDocument.cpp IResearch/IResearchDocument.h + IResearch/IResearchPrimaryKeyFilter.cpp IResearch/IResearchPrimaryKeyFilter.h IResearch/IResearchFilterFactory.cpp IResearch/IResearchFilterFactory.h IResearch/IResearchOrderFactory.cpp IResearch/IResearchOrderFactory.h IResearch/VelocyPackHelper.cpp IResearch/VelocyPackHelper.h diff --git a/arangod/IResearch/IResearchDocument.cpp b/arangod/IResearch/IResearchDocument.cpp index 09bff473f6..e935af2d6b 100644 --- a/arangod/IResearch/IResearchDocument.cpp +++ b/arangod/IResearch/IResearchDocument.cpp @@ -65,7 +65,6 @@ static_assert( ); irs::string_ref const CID_FIELD("@_CID"); -irs::string_ref const RID_FIELD("@_REV"); irs::string_ref const PK_COLUMN("@_PK"); // wrapper for use objects with the IResearch unbounded_object_pool @@ -370,19 +369,6 @@ bool setStringValue( return true; } -void setIdValue( - uint64_t& value, - irs::token_stream& analyzer -) { -#ifdef ARANGODB_ENABLE_MAINTAINER_MODE - auto& sstream = dynamic_cast(analyzer); -#else - auto& sstream = static_cast(analyzer); -#endif - - sstream.reset(arangodb::iresearch::DocumentPrimaryKey::encode(value)); -} - NS_END NS_BEGIN(arangodb) @@ -392,36 +378,62 @@ NS_BEGIN(iresearch) // --SECTION-- Field implementation // ---------------------------------------------------------------------------- -/*static*/ void Field::setCidValue(Field& field, TRI_voc_cid_t& cid) { +/*static*/ void Field::setCidValue( + Field& field, + TRI_voc_cid_t const& cid +) { + TRI_ASSERT(field._analyzer); + + irs::bytes_ref const cidRef( + reinterpret_cast(&cid), + sizeof(TRI_voc_cid_t) + ); + field._name = CID_FIELD; - setIdValue(cid, *field._analyzer); field._features = &irs::flags::empty_instance(); +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + auto& sstream = dynamic_cast(*field._analyzer); +#else + auto& sstream = static_cast(*field._analyzer); +#endif + sstream.reset(cidRef); } /*static*/ void Field::setCidValue( Field& field, - TRI_voc_cid_t& cid, + TRI_voc_cid_t const& cid, Field::init_stream_t ) { field._analyzer = StringStreamPool.emplace().release(); // FIXME don't use shared_ptr setCidValue(field, cid); } -/*static*/ void Field::setRidValue(Field& field, TRI_voc_rid_t& rid) { - field._name = RID_FIELD; - setIdValue(rid, *field._analyzer); +/*static*/ void Field::setPkValue( + Field& field, + DocumentPrimaryKey const& pk +) { + field._name = PK_COLUMN; field._features = &irs::flags::empty_instance(); + field._storeValues = ValueStorage::FULL; + field._value = static_cast(pk); +#ifdef ARANGODB_ENABLE_MAINTAINER_MODE + auto& sstream = dynamic_cast(*field._analyzer); +#else + auto& sstream = static_cast(*field._analyzer); +#endif + sstream.reset(field._value); } -/*static*/ void Field::setRidValue( +/*static*/ void Field::setPkValue( Field& field, - TRI_voc_rid_t& rid, + DocumentPrimaryKey const& pk, Field::init_stream_t ) { field._analyzer = StringStreamPool.emplace().release(); // FIXME don't use shared_ptr - setRidValue(field, rid); + setPkValue(field, pk); } + Field::Field(Field&& rhs) : _features(rhs._features), _analyzer(std::move(rhs._analyzer)), @@ -632,10 +644,6 @@ void FieldIterator::next() { return CID_FIELD; } -/* static */ irs::string_ref const& DocumentPrimaryKey::RID() { - return RID_FIELD; -} - /* static */ bool DocumentPrimaryKey::decode( uint64_t& buf, const irs::bytes_ref& value ) { @@ -671,6 +679,12 @@ DocumentPrimaryKey::DocumentPrimaryKey( ) noexcept : _keys{ cid, rid } { static_assert(sizeof(_keys) == sizeof(cid) + sizeof(rid), "Invalid size"); + + // ensure little endian + if (irs::numeric_utils::is_big_endian()) { + _keys[0] = Swap8Bytes(_keys[0]); + _keys[1] = Swap8Bytes(_keys[1]); + } } bool DocumentPrimaryKey::read(irs::bytes_ref const& in) noexcept { @@ -683,15 +697,6 @@ bool DocumentPrimaryKey::read(irs::bytes_ref const& in) noexcept { return true; } -bool DocumentPrimaryKey::write(irs::data_output& out) const { - out.write_bytes( - reinterpret_cast(_keys), - sizeof(_keys) - ); - - return true; -} - bool appendKnownCollections( std::unordered_set& set, const irs::index_reader& reader diff --git a/arangod/IResearch/IResearchDocument.h b/arangod/IResearch/IResearchDocument.h index d7cd8e44b1..91c3aee0bc 100644 --- a/arangod/IResearch/IResearchDocument.h +++ b/arangod/IResearch/IResearchDocument.h @@ -30,6 +30,7 @@ #include "VelocyPackHelper.h" #include "search/filter.hpp" +#include "store/data_output.hpp" NS_BEGIN(iresearch) @@ -80,6 +81,7 @@ char const NESTING_LIST_OFFSET_PREFIX = '['; char const NESTING_LIST_OFFSET_SUFFIX = ']'; struct IResearchViewMeta; // forward declaration +class DocumentPrimaryKey; // forward declaration //////////////////////////////////////////////////////////////////////////////// /// @brief indexed/stored document field adapter for IResearch @@ -87,10 +89,10 @@ struct IResearchViewMeta; // forward declaration struct Field { struct init_stream_t{}; // initialize stream - static void setCidValue(Field& field, TRI_voc_cid_t& cid); - static void setCidValue(Field& field, TRI_voc_cid_t& cid, init_stream_t); - static void setRidValue(Field& field, TRI_voc_rid_t& rid); - static void setRidValue(Field& field, TRI_voc_rid_t& rid, init_stream_t); + static void setCidValue(Field& field, TRI_voc_cid_t const& cid); + static void setCidValue(Field& field, TRI_voc_cid_t const& cid, init_stream_t); + static void setPkValue(Field& field, DocumentPrimaryKey const& pk); + static void setPkValue(Field& field, DocumentPrimaryKey const& pk, init_stream_t); Field() = default; Field(Field&& rhs); @@ -110,13 +112,18 @@ struct Field { return *_analyzer; } - bool write(irs::data_output&) const noexcept { + bool write(irs::data_output& out) const noexcept { + if (!_value.null()) { + out.write_bytes(_value.c_str(), _value.size()); + } + return true; } irs::flags const* _features{ &irs::flags::empty_instance() }; std::shared_ptr _analyzer; irs::string_ref _name; + irs::bytes_ref _value; ValueStorage _storeValues; }; // Field @@ -239,7 +246,6 @@ class DocumentPrimaryKey { public: static irs::string_ref const& PK(); // stored primary key column static irs::string_ref const& CID(); // stored collection id column - static irs::string_ref const& RID(); // stored revision id column //////////////////////////////////////////////////////////////////////////////// /// @brief decodes the specified value in a proper way into 'buf' @@ -254,18 +260,22 @@ class DocumentPrimaryKey { //////////////////////////////////////////////////////////////////////////////// static irs::bytes_ref encode(uint64_t& value); - DocumentPrimaryKey() = default; + constexpr DocumentPrimaryKey() = default; DocumentPrimaryKey(TRI_voc_cid_t cid, TRI_voc_rid_t rid) noexcept; - irs::string_ref const& name() const noexcept { return PK(); } + // returning reference is important + // (because of casting to 'irs::bytes_ref') + constexpr TRI_voc_cid_t const& cid() const noexcept { return _keys[0]; } + constexpr TRI_voc_rid_t const& rid() const noexcept { return _keys[1]; } + + explicit operator irs::bytes_ref() const noexcept { + return { + reinterpret_cast(_keys), + sizeof _keys + }; + } + bool read(irs::bytes_ref const& in) noexcept; - bool write(irs::data_output& out) const; - - TRI_voc_cid_t cid() const noexcept { return _keys[0]; } - void cid(TRI_voc_cid_t cid) noexcept { _keys[0] = cid; } - - TRI_voc_rid_t rid() const noexcept { return _keys[1]; } - void rid(TRI_voc_rid_t rid) noexcept { _keys[1] = rid; } private: // FIXME: define storage format (LE or BE) diff --git a/arangod/IResearch/IResearchFilterFactory.cpp b/arangod/IResearch/IResearchFilterFactory.cpp index 961b17c6c3..8a1b3d09c7 100644 --- a/arangod/IResearch/IResearchFilterFactory.cpp +++ b/arangod/IResearch/IResearchFilterFactory.cpp @@ -47,6 +47,7 @@ #include "IResearchFilterFactory.h" #include "IResearchDocument.h" #include "IResearchKludge.h" +#include "IResearchPrimaryKeyFilter.h" #include "Aql/Function.h" #include "Aql/Ast.h" #include "Logger/LogMacros.h" @@ -2080,32 +2081,7 @@ namespace iresearch { TRI_voc_cid_t cid, TRI_voc_rid_t rid ) { - auto filter = irs::And::make(); - - FilterFactory::filter(static_cast(*filter), cid, rid); - - return filter; -} - -/*static*/ irs::filter& FilterFactory::filter( - irs::boolean_filter& buf, - TRI_voc_cid_t cid, - TRI_voc_rid_t rid -) { - // filter matching on cid and rid - auto& filter = buf.add(); - - // filter matching on cid - filter.add() - .field(DocumentPrimaryKey::CID()) // set field - .term(DocumentPrimaryKey::encode(cid)); // set value - - // filter matching on rid - filter.add() - .field(DocumentPrimaryKey::RID()) // set field - .term(DocumentPrimaryKey::encode(rid)); // set value - - return filter; + return std::make_unique(cid, rid); } /*static*/ bool FilterFactory::filter( diff --git a/arangod/IResearch/IResearchFilterFactory.h b/arangod/IResearch/IResearchFilterFactory.h index 67445afa6d..dc72b90bbb 100644 --- a/arangod/IResearch/IResearchFilterFactory.h +++ b/arangod/IResearch/IResearchFilterFactory.h @@ -47,17 +47,11 @@ struct QueryContext; struct FilterFactory { static irs::filter::ptr filter(TRI_voc_cid_t cid); - static irs::filter::ptr filter(TRI_voc_cid_t cid, TRI_voc_rid_t rid); //////////////////////////////////////////////////////////////////////////////// - /// @brief create a filter matching 'cid' + 'rid' pair and append to 'buf' - /// @return the appended filter portion + /// @brief create a filter matching 'cid' + 'rid' pair //////////////////////////////////////////////////////////////////////////////// - static irs::filter& filter( - irs::boolean_filter& buf, - TRI_voc_cid_t cid, - TRI_voc_rid_t rid - ); + static irs::filter::ptr filter(TRI_voc_cid_t cid, TRI_voc_rid_t rid); //////////////////////////////////////////////////////////////////////////////// /// @brief determine if the 'node' can be converted into an iresearch filter diff --git a/arangod/IResearch/IResearchPrimaryKeyFilter.cpp b/arangod/IResearch/IResearchPrimaryKeyFilter.cpp new file mode 100644 index 0000000000..00c8ef86b1 --- /dev/null +++ b/arangod/IResearch/IResearchPrimaryKeyFilter.cpp @@ -0,0 +1,105 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2018 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Andrey Abramov +/// @author Vasiliy Nabatchikov +//////////////////////////////////////////////////////////////////////////////// + +#include "IResearchPrimaryKeyFilter.h" + +#include "index/index_reader.hpp" +#include "utils/hash_utils.hpp" + +namespace arangodb { +namespace iresearch { + +// ---------------------------------------------------------------------------- +// --SECTION-- PrimaryKeyFilter implementation +// ---------------------------------------------------------------------------- + +irs::doc_iterator::ptr PrimaryKeyFilter::execute( + irs::sub_reader const& segment, + irs::order::prepared const& /*order*/, + irs::attribute_view const& /*ctx*/ +) const { + if (&segment != _pkIterator._pkSegment) { + return irs::doc_iterator::empty(); + } + + // aliasing constructor + return irs::doc_iterator::ptr( + irs::doc_iterator::ptr(), + const_cast(&_pkIterator) + ); +} + +size_t PrimaryKeyFilter::hash() const noexcept { + size_t seed = 0; + irs::hash_combine(seed, filter::hash()); + irs::hash_combine(seed, _pk.cid()); + irs::hash_combine(seed, _pk.rid()); + return seed; +} + +irs::filter::prepared::ptr PrimaryKeyFilter::prepare( + irs::index_reader const& index, + irs::order::prepared const& /*ord*/, + irs::boost::boost_t /*boost*/, + irs::attribute_view const& /*ctx*/ +) const { + auto const pkRef = static_cast(_pk); + + for (auto& segment : index) { + auto* pkField = segment.field(arangodb::iresearch::DocumentPrimaryKey::PK()); + + if (!pkField) { + continue; + } + + auto term = pkField->iterator(); + + if (!term->seek(pkRef)) { + continue; + } + + auto docs = term->postings(irs::flags::empty_instance()); + + if (docs->next()) { + _pkIterator.reset(segment, docs->value()); + } + + break; + } + + // aliasing constructor + return irs::filter::prepared::ptr(irs::filter::prepared::ptr(), this); +} + +bool PrimaryKeyFilter::equals(filter const& rhs) const noexcept { + auto const& trhs = static_cast(rhs); + + return filter::equals(rhs) + && _pk.cid() == trhs._pk.cid() + && _pk.rid() == trhs._pk.rid(); +} + +DEFINE_FILTER_TYPE(PrimaryKeyFilter); + +} // iresearch +} // arangodb diff --git a/arangod/IResearch/IResearchPrimaryKeyFilter.h b/arangod/IResearch/IResearchPrimaryKeyFilter.h new file mode 100644 index 0000000000..8305a7a85b --- /dev/null +++ b/arangod/IResearch/IResearchPrimaryKeyFilter.h @@ -0,0 +1,149 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2018 ArangoDB GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Andrey Abramov +/// @author Vasiliy Nabatchikov +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_IRESEARCH__IRESEARCH_PRIMARY_KEY_FILTER_H +#define ARANGOD_IRESEARCH__IRESEARCH_PRIMARY_KEY_FILTER_H 1 + +#include "VocBase/voc-types.h" +#include "IResearchDocument.h" + +#include "search/filter.hpp" +#include "utils/type_limits.hpp" + +namespace arangodb { +namespace iresearch { + +/////////////////////////////////////////////////////////////////////////////// +/// @class PrimaryKeyFilter +/// @brief iresearch filter optimized for filtering on primary keys +/////////////////////////////////////////////////////////////////////////////// +class PrimaryKeyFilter final + : public irs::filter, + public irs::filter::prepared { + public: + DECLARE_FILTER_TYPE(); + + PrimaryKeyFilter(TRI_voc_cid_t cid, TRI_voc_rid_t id) noexcept + : irs::filter(PrimaryKeyFilter::type()), _pk(cid, id) { + } + +// ---------------------------------------------------------------------------- +// --SECTION-- irs::filter::prepared +// ---------------------------------------------------------------------------- + + virtual irs::doc_iterator::ptr execute( + irs::sub_reader const& segment, + irs::order::prepared const& /*order*/, + irs::attribute_view const& /*ctx*/ + ) const override; + +// ---------------------------------------------------------------------------- +// --SECTION-- irs::filter +// ---------------------------------------------------------------------------- + + virtual size_t hash() const noexcept override; + + virtual filter::prepared::ptr prepare( + irs::index_reader const& index, + irs::order::prepared const& /*ord*/, + irs::boost::boost_t /*boost*/, + irs::attribute_view const& /*ctx*/ + ) const override; + + protected: + bool equals(filter const& rhs) const noexcept override; + + private: + struct PrimaryKeyIterator final : public irs::doc_iterator { + PrimaryKeyIterator() = default; + + virtual bool next() noexcept override { + _doc = _next; + _next = irs::type_limits::eof(); + return !irs::type_limits::eof(_doc); + } + + virtual irs::doc_id_t seek(irs::doc_id_t target) noexcept override { + _doc = target <= _next + ? _next + : irs::type_limits::eof(); + + return _doc; + } + + virtual irs::doc_id_t value() const noexcept override { + return _doc; + } + + virtual irs::attribute_view const& attributes() const noexcept override { + return irs::attribute_view::empty_instance(); + } + + void reset(irs::sub_reader const& segment, irs::doc_id_t doc) noexcept { + _pkSegment = &segment; + _doc = irs::type_limits::invalid(); + _next = doc; + } + + mutable irs::sub_reader const* _pkSegment{}; + mutable irs::doc_id_t _doc{ irs::type_limits::invalid() }; + mutable irs::doc_id_t _next{ irs::type_limits::eof() }; + }; // PrimaryKeyIterator + + DocumentPrimaryKey _pk; + mutable PrimaryKeyIterator _pkIterator; +}; // PrimaryKeyFilter + +/////////////////////////////////////////////////////////////////////////////// +/// @class PrimaryKeyFilterContainer +/// @brief container for storing 'PrimaryKeyFilter's, does nothing as a filter +/////////////////////////////////////////////////////////////////////////////// +class PrimaryKeyFilterContainer final : public irs::empty { + public: + DECLARE_FILTER_TYPE(); + + PrimaryKeyFilterContainer() = default; + PrimaryKeyFilterContainer(PrimaryKeyFilterContainer&&) = default; + PrimaryKeyFilterContainer& operator=(PrimaryKeyFilterContainer&&) = default; + + PrimaryKeyFilter& emplace(TRI_voc_cid_t cid, TRI_voc_rid_t rid) { + _filters.emplace_back(cid, rid); + return _filters.back(); + } + + bool empty() const noexcept { + return _filters.empty(); + } + + void clear() noexcept { + _filters.clear(); + } + + private: + std::deque _filters; // pointers remain valid +}; // PrimaryKeyFilterContainer + +} // iresearch +} // arangodb + +#endif // ARANGOD_IRESEARCH__IRESEARCH_PRIMARY_KEY_FILTER_H diff --git a/arangod/IResearch/IResearchView.cpp b/arangod/IResearch/IResearchView.cpp index e47e653f4b..52a4421ea2 100644 --- a/arangod/IResearch/IResearchView.cpp +++ b/arangod/IResearch/IResearchView.cpp @@ -37,6 +37,7 @@ #include "IResearchFilterFactory.h" #include "IResearchLink.h" #include "IResearchLinkHelper.h" +#include "IResearchPrimaryKeyFilter.h" #include "Aql/AstNode.h" #include "Aql/QueryCache.h" @@ -292,17 +293,15 @@ inline void insertDocument( } // System fields - // Indexed: CID - Field::setCidValue(field, cid, Field::init_stream_t()); - doc.insert(irs::action::index, field); - - // Indexed: RID - Field::setRidValue(field, rid); - doc.insert(irs::action::index, field); - - // Stored: CID + RID DocumentPrimaryKey const primaryKey(cid, rid); - doc.insert(irs::action::store, primaryKey); + + // Indexed and Stored: CID + RID + Field::setPkValue(field, primaryKey, Field::init_stream_t()); + doc.insert(irs::action::index_store, field); + + // Indexed: CID + Field::setCidValue(field, primaryKey.cid()); + doc.insert(irs::action::index, field); } //////////////////////////////////////////////////////////////////////////////// @@ -646,8 +645,10 @@ struct IResearchView::ViewFactory: public arangodb::ViewFactory { //////////////////////////////////////////////////////////////////////////////// /// @brief container storing the view 'read' state for a given TransactionState //////////////////////////////////////////////////////////////////////////////// -struct IResearchView::ViewStateRead: public arangodb::TransactionState::Cookie { +struct IResearchView::ViewStateRead final + : public arangodb::TransactionState::Cookie { CompoundReader _snapshot; + explicit ViewStateRead(ReadMutex& mutex) noexcept : _snapshot(mutex) { } @@ -656,17 +657,53 @@ struct IResearchView::ViewStateRead: public arangodb::TransactionState::Cookie { //////////////////////////////////////////////////////////////////////////////// /// @brief container storing the view 'write' state for a given TransactionState //////////////////////////////////////////////////////////////////////////////// -struct IResearchView::ViewStateWrite - : public arangodb::TransactionState::Cookie, - public irs::index_writer::documents_context { +class IResearchView::ViewStateWrite final + : public arangodb::TransactionState::Cookie { + public: std::lock_guard _viewLock; // prevent data-store deallocation (lock @ AsyncSelf) + irs::index_writer::documents_context _ctx; + PrimaryKeyFilterContainer _removals; // list of document removals explicit ViewStateWrite( ReadMutex& viewMutex, irs::index_writer& writer ) noexcept - : irs::index_writer::documents_context(writer.documents()), - _viewLock(viewMutex) { + : _viewLock(viewMutex), + _ctx(writer.documents()) { + } + + virtual ~ViewStateWrite() noexcept { + if (_removals.empty()) { + // nothing to do + return; + } + + try { + // hold references even after transaction + _ctx.remove( + irs::filter::make(std::move(_removals)) + ); + } catch (std::exception const& e) { + LOG_TOPIC(ERR, arangodb::iresearch::TOPIC) + << "Failed to apply accumulated removals, error '" << e.what() << "'"; + } catch (...) { + // NOOP + LOG_TOPIC(WARN, arangodb::iresearch::TOPIC) + << "Failed to apply accumulated removals"; + } + } + + operator irs::index_writer::documents_context&() noexcept { + return _ctx; + } + + void remove(TRI_voc_cid_t cid, TRI_voc_rid_t rid) { + _ctx.remove(_removals.emplace(cid, rid)); + } + + void reset() noexcept { + _removals.clear(); + _ctx.reset(); } }; @@ -753,10 +790,12 @@ class IResearchView::ViewStateHelper { if (rollback && prev) { // TODO FIXME find a better way to look up a ViewState #ifdef ARANGODB_ENABLE_MAINTAINER_MODE - dynamic_cast(*prev).reset(); + auto& ctx = dynamic_cast(*prev); #else - static_cast(*prev).reset(); + auto& ctx = static_cast(*prev); #endif + + ctx.reset(); } prev.reset(); @@ -1119,7 +1158,8 @@ arangodb::Result IResearchView::drop( TRI_voc_cid_t cid, bool unlink /*= true*/ ) { - std::shared_ptr shared_filter(iresearch::FilterFactory::filter(cid)); + auto filter = iresearch::FilterFactory::filter(cid); + WriteMutex rmutex(_mutex); // '_meta' and '_storeByTid' can be asynchronously updated WriteMutex wmutex(_mutex); // '_meta' and '_storeByTid' can be asynchronously updated DEFER_SCOPED_LOCK_NAMED(rmutex, rlock); @@ -1165,7 +1205,7 @@ arangodb::Result IResearchView::drop( try { if (_storePersisted) { - _storePersisted._writer->documents().remove(shared_filter); + _storePersisted._writer->documents().remove(std::move(filter)); } } catch (arangodb::basics::Exception& e) { IR_LOG_EXCEPTION(); @@ -1422,15 +1462,15 @@ arangodb::Result IResearchView::commit() { return {}; } catch (arangodb::basics::Exception const& e) { LOG_TOPIC(ERR, arangodb::iresearch::TOPIC) - << "caught exception while committing memory store for arangosearch view '" << name() << "': " << e.code() << " " << e.what(); + << "caught exception while committing store for arangosearch view '" << name() << "': " << e.code() << " " << e.what(); IR_LOG_EXCEPTION(); } catch (std::exception const& e) { LOG_TOPIC(ERR, arangodb::iresearch::TOPIC) - << "caught exception while committing memory store for arangosearch view '" << name() << "': " << e.what(); + << "caught exception while committing store for arangosearch view '" << name() << "': " << e.what(); IR_LOG_EXCEPTION(); } catch (...) { LOG_TOPIC(ERR, arangodb::iresearch::TOPIC) - << "caught exception while committing memory store for arangosearch view '" << name() << "'"; + << "caught exception while committing store for arangosearch view '" << name() << "'"; IR_LOG_EXCEPTION(); } @@ -1484,12 +1524,6 @@ int IResearchView::insert( return TRI_ERROR_INTERNAL; }; - if (_inRecovery) { - auto ctx = _storePersisted._writer->documents(); - ctx.remove(FilterFactory::filter(cid, documentId.id())); - return insertImpl(ctx); - } - if (!trx.state()) { return TRI_ERROR_BAD_PARAMETER; // 'trx' and transaction state required } @@ -1525,6 +1559,10 @@ int IResearchView::insert( TRI_ASSERT(ctx); + if (_inRecovery) { + ctx->remove(cid, documentId.id()); + } + return insertImpl(*ctx); } @@ -1577,14 +1615,6 @@ int IResearchView::insert( return TRI_ERROR_NO_ERROR; }; - if (_inRecovery) { - auto ctx = _storePersisted._writer->documents(); - for (auto& doc : batch) { - ctx.remove(FilterFactory::filter(cid, doc.first.id())); - } - return insertImpl(ctx); - } - if (!trx.state()) { return TRI_ERROR_BAD_PARAMETER; // 'trx' and transaction state required } @@ -1615,6 +1645,12 @@ int IResearchView::insert( TRI_ASSERT(ctx); + if (_inRecovery) { + for (auto const& doc : batch) { + ctx->remove(cid, doc.first.id()); + } + } + return insertImpl(*ctx); } @@ -1743,14 +1779,6 @@ int IResearchView::remove( ) { TRI_ASSERT(_storePersisted); - std::shared_ptr shared_filter(FilterFactory::filter(cid, documentId.id())); - - if (_inRecovery) { - _storePersisted._writer->documents().remove(shared_filter); - - return TRI_ERROR_NO_ERROR; - } - if (!trx.state()) { return TRI_ERROR_BAD_PARAMETER; // 'trx' and transaction state required } @@ -1786,7 +1814,7 @@ int IResearchView::remove( // all of its fid stores, no impact to iResearch View data integrity // ........................................................................... try { - ctx->remove(shared_filter); + ctx->remove(cid, documentId.id()); return TRI_ERROR_NO_ERROR; } catch (arangodb::basics::Exception const& e) { diff --git a/arangod/IResearch/IResearchView.h b/arangod/IResearch/IResearchView.h index dc325e5443..3431c836a2 100644 --- a/arangod/IResearch/IResearchView.h +++ b/arangod/IResearch/IResearchView.h @@ -311,7 +311,6 @@ class IResearchView final struct DataStore { irs::directory::ptr _directory; irs::directory_reader _reader; - irs::index_reader::ptr _readerImpl; // need this for 'std::atomic_exchange_strong' irs::index_writer::ptr _writer; DataStore() = default; @@ -331,7 +330,7 @@ class IResearchView final struct ViewFactory; // forward declaration class ViewStateHelper; // forward declaration struct ViewStateRead; // forward declaration - struct ViewStateWrite; // forward declaration + class ViewStateWrite; // forward declaration struct FlushCallbackUnregisterer { void operator()(IResearchView* view) const noexcept; @@ -377,4 +376,4 @@ class IResearchView final } // iresearch } // arangodb -#endif \ No newline at end of file +#endif diff --git a/tests/IResearch/ExpressionFilter-test.cpp b/tests/IResearch/ExpressionFilter-test.cpp index c615f4f111..8c36ffcc3d 100644 --- a/tests/IResearch/ExpressionFilter-test.cpp +++ b/tests/IResearch/ExpressionFilter-test.cpp @@ -1240,7 +1240,7 @@ TEST_CASE("IResearchExpressionFilterTest", "[iresearch][iresearch-expression-fil auto prepared = filter.prepare(*reader, preparedOrder, queryCtx); auto const& boost = prepared->attributes().get(); CHECK(boost); - CHECK(1.5f == boost->get()->value); + CHECK(1.5f == boost->value); auto column = segment.column_reader("name"); REQUIRE(column); diff --git a/tests/IResearch/IResearchDocument-test.cpp b/tests/IResearch/IResearchDocument-test.cpp index c69b5bfd39..ee6c2dcc7b 100644 --- a/tests/IResearch/IResearchDocument-test.cpp +++ b/tests/IResearch/IResearchDocument-test.cpp @@ -243,30 +243,6 @@ SECTION("Field_setCid") { CHECK(stream->next()); CHECK(!stream->next()); } - - // reset field - field._features = &features; - field._analyzer = nullptr; - - // check RID value - { - TRI_voc_rid_t rid = 10; - arangodb::iresearch::Field::setRidValue(field, rid, arangodb::iresearch::Field::init_stream_t()); - CHECK(arangodb::iresearch::DocumentPrimaryKey::RID() == field._name); - CHECK(&irs::flags::empty_instance() == field._features); - - auto* stream = dynamic_cast(field._analyzer.get()); - REQUIRE(nullptr != stream); - CHECK(stream->next()); - CHECK(!stream->next()); - - arangodb::iresearch::Field::setRidValue(field, rid); - CHECK(arangodb::iresearch::DocumentPrimaryKey::RID() == field._name); - CHECK(&irs::flags::empty_instance() == field._features); - CHECK(stream == field._analyzer.get()); - CHECK(stream->next()); - CHECK(!stream->next()); - } } SECTION("FieldIterator_static_checks") { @@ -1533,17 +1509,17 @@ SECTION("test_cid_rid_encoding") { cid = cidSlice.getNumber(); rid = ridSlice.getNumber(); + arangodb::iresearch::DocumentPrimaryKey const pk(cid, rid); + auto& writer = store0.writer; // insert document { auto doc = writer->documents().insert(); - arangodb::iresearch::Field::setCidValue(field, cid, arangodb::iresearch::Field::init_stream_t()); + arangodb::iresearch::Field::setCidValue(field, pk.cid(), arangodb::iresearch::Field::init_stream_t()); CHECK((doc.insert(irs::action::index, field))); - arangodb::iresearch::Field::setRidValue(field, rid); - CHECK((doc.insert(irs::action::index, field))); - arangodb::iresearch::DocumentPrimaryKey const primaryKey(cid, rid); - CHECK(doc.insert(irs::action::store, primaryKey)); + arangodb::iresearch::Field::setPkValue(field, pk); + CHECK(doc.insert(irs::action::index_store, field)); CHECK(doc); } writer->commit(); @@ -1578,9 +1554,9 @@ SECTION("test_cid_rid_encoding") { CHECK(cidField); CHECK(size == cidField->docs_count()); - auto* ridField = segment.field(arangodb::iresearch::DocumentPrimaryKey::RID()); - CHECK(ridField); - CHECK(size == ridField->docs_count()); + auto* pkField = segment.field(arangodb::iresearch::DocumentPrimaryKey::PK()); + CHECK(pkField); + CHECK(size == pkField->docs_count()); auto filter = arangodb::iresearch::FilterFactory::filter(cid, rid); REQUIRE(filter); @@ -1644,10 +1620,10 @@ SECTION("test_appendKnownCollections") { ); REQUIRE(writer); - TRI_voc_rid_t rid = 42; + arangodb::iresearch::DocumentPrimaryKey pk(42, 42); arangodb::iresearch::Field field; - arangodb::iresearch::Field::setRidValue(field, rid, arangodb::iresearch::Field::init_stream_t()); + arangodb::iresearch::Field::setPkValue(field, pk, arangodb::iresearch::Field::init_stream_t()); { auto doc = writer->documents().insert(); @@ -1731,9 +1707,9 @@ SECTION("test_visitReaderCollections") { ); REQUIRE(writer); - TRI_voc_rid_t rid = 42; + arangodb::iresearch::DocumentPrimaryKey pk(42, 42); arangodb::iresearch::Field field; - arangodb::iresearch::Field::setRidValue(field, rid, arangodb::iresearch::Field::init_stream_t()); + arangodb::iresearch::Field::setPkValue(field, pk, arangodb::iresearch::Field::init_stream_t()); { auto doc = writer->documents().insert(); CHECK(doc.insert(irs::action::index, field));