//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2017 ArangoDB GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Andrey Abramov /// @author Vasiliy Nabatchikov //////////////////////////////////////////////////////////////////////////////// #ifndef ARANGOD_IRESEARCH__IRESEARCH_ANALYZER_FEATURE_H #define ARANGOD_IRESEARCH__IRESEARCH_ANALYZER_FEATURE_H 1 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ApplicationFeatures/ApplicationFeature.h" #include "Auth/Common.h" #include "Basics/Result.h" namespace iresearch { namespace text_format { class type_id; const type_id& vpack_t(); static const auto& vpack = vpack_t(); } } #define REGISTER_ANALYZER_VPACK(analyzer_name, factory, normalizer) \ REGISTER_ANALYZER(analyzer_name, ::iresearch::text_format::vpack, factory, normalizer) struct TRI_vocbase_t; // forward declaration namespace arangodb { namespace application_features { class ApplicationServer; } } // namespace arangodb namespace arangodb { namespace iresearch { //////////////////////////////////////////////////////////////////////////////// /// @brief a cache of IResearch analyzer instances /// and a provider of AQL TOKENS(, ) function /// NOTE: deallocation of an IResearchAnalyzerFeature instance /// invalidates all AnalyzerPool instances previously provided /// by the deallocated feature instance //////////////////////////////////////////////////////////////////////////////// class IResearchAnalyzerFeature final : public arangodb::application_features::ApplicationFeature { public: // thread-safe analyzer pool class AnalyzerPool : private irs::util::noncopyable { public: typedef std::shared_ptr ptr; explicit AnalyzerPool(irs::string_ref const& name); irs::flags const& features() const noexcept { return _features; } irs::analysis::analyzer::ptr get() const noexcept; // nullptr == error creating analyzer std::string const& name() const noexcept { return _name; } VPackSlice properties() const noexcept { return _properties; } irs::string_ref const& type() const noexcept { return _type; } void toVelocyPack(arangodb::velocypack::Builder& builder, bool forPersistence = false); private: friend class IResearchAnalyzerFeature; // required for calling AnalyzerPool::init(...) and AnalyzerPool::setKey(...) // 'make(...)' method wrapper for irs::analysis::analyzer types struct Builder { typedef irs::analysis::analyzer::ptr ptr; DECLARE_FACTORY(irs::string_ref const& type, VPackSlice properties); }; mutable irs::unbounded_object_pool _cache; // cache of irs::analysis::analyzer (constructed via // AnalyzerBuilder::make(...)) std::string _config; // non-null type + non-null properties + key irs::flags _features; // cached analyzer features irs::string_ref _key; // the key of the persisted configuration for this pool, null == static analyzer std::string _name; // ArangoDB alias for an IResearch analyzer configuration VPackSlice _properties; // IResearch analyzer configuration irs::string_ref _type; // IResearch analyzer name bool init(irs::string_ref const& type, VPackSlice const properties, irs::flags const& features = irs::flags::empty_instance()); void setKey(irs::string_ref const& type); }; explicit IResearchAnalyzerFeature(arangodb::application_features::ApplicationServer& server); ////////////////////////////////////////////////////////////////////////////// /// @return analyzers in the specified vocbase are granted 'level' access ////////////////////////////////////////////////////////////////////////////// static bool canUse( // check permissions TRI_vocbase_t const& vocbase, // analyzer vocbase arangodb::auth::Level const& level // access level ); ////////////////////////////////////////////////////////////////////////////// /// @return analyzer with the given prefixed name (or unprefixed and resides /// in defaultVocbase) is granted 'level' access ////////////////////////////////////////////////////////////////////////////// static bool canUse( // check permissions irs::string_ref const& name, // analyzer name (already normalized) arangodb::auth::Level const& level // access level ); ////////////////////////////////////////////////////////////////////////////// /// @brief emplace an analyzer as per the specified parameters /// @param result the result of the successful emplacement (out-param) /// first - the emplaced pool /// second - if an insertion of an new analyzer occured /// @param name analyzer name (already normalized) /// @param type the underlying IResearch analyzer type /// @param properties the configuration for the underlying IResearch type /// @param features the expected features the analyzer should produce /// @param implicitCreation false == treat as error if creation is required /// @return success /// @note emplacement while inRecovery() will not allow adding new analyzers /// valid because for existing links the analyzer definition should /// already have been persisted and feature administration is not /// allowed during recovery ////////////////////////////////////////////////////////////////////////////// typedef std::pair EmplaceResult; arangodb::Result emplace( EmplaceResult& result, irs::string_ref const& name, irs::string_ref const& type, VPackSlice properties, irs::flags const& features = irs::flags::empty_instance()) { return ensure(result, name, type, properties, features, true); } ////////////////////////////////////////////////////////////////////////////// /// @brief find analyzer /// @param name analyzer name (already normalized) /// @return analyzer with the specified name or nullptr ////////////////////////////////////////////////////////////////////////////// AnalyzerPool::ptr get( // find analyzer irs::string_ref const& name, // analyzer name bool onlyCached = false // check only locally cached analyzers ) const noexcept; ////////////////////////////////////////////////////////////////////////////// /// @brief find analyzer /// @return analyzer with the specified name or nullptr ////////////////////////////////////////////////////////////////////////////// AnalyzerPool::ptr get( // find analyzer irs::string_ref const& name, // analyzer name TRI_vocbase_t const& activeVocbase, // fallback vocbase if not part of name TRI_vocbase_t const& systemVocbase, // the system vocbase for use with empty prefix bool onlyCached = false // check only locally cached analyzers ) const noexcept; ////////////////////////////////////////////////////////////////////////////// /// @brief find analyzer /// @param name analyzer name (already normalized) /// @param type the underlying IResearch analyzer type /// @param properties the configuration for the underlying IResearch type /// @param features the expected features the analyzer should produce /// @return analyzer matching the specified parameters or nullptr /// @note will construct and cache the analyzer if missing only on db-server /// persistence in the cases of inRecovery or !storage engine will fail ////////////////////////////////////////////////////////////////////////////// AnalyzerPool::ptr get( // find analyzer irs::string_ref const& name, // analyzer name irs::string_ref const& type, // analyzer type VPackSlice const properties, // analyzer properties irs::flags const& features // analyzer features ); static AnalyzerPool::ptr identity() noexcept; // the identity analyzer static std::string const& name() noexcept; ////////////////////////////////////////////////////////////////////////////// /// @return normalized analyzer name, i.e. with vocbase prefix ////////////////////////////////////////////////////////////////////////////// static std::string normalize( // normalize name irs::string_ref const& name, // analyzer name TRI_vocbase_t const& activeVocbase, // fallback vocbase if not part of name TRI_vocbase_t const& systemVocbase, // the system vocbase for use with empty prefix bool expandVocbasePrefix = true // use full vocbase name as prefix for active/system v.s. EMPTY/'::' ); ////////////////////////////////////////////////////////////////////////////// /// @param name analyzer name (normalized) /// @return vocbase prefix extracted from normalized analyzer name /// EMPTY == system vocbase /// NIL == analyzer name have had no db name prefix /// @see analyzerReachableFromDb ////////////////////////////////////////////////////////////////////////////// static irs::string_ref extractVocbaseName(irs::string_ref const& name) noexcept; ////////////////////////////////////////////////////////////////////////////// /// Checks if analyzer db (identified by db name prefix extracted from analyzer /// name) could be reached from specified db. /// Properly handles special cases (e.g. NIL and EMPTY) /// @param dbNameFromAnalyzer database name extracted from analyzer name /// @param currentDbName database name to check against (should not be empty!) /// @param forGetters check special case for getting analyzer (not creating/removing) /// @return true if analyzer is reachable static bool analyzerReachableFromDb(irs::string_ref const& dbNameFromAnalyzer, irs::string_ref const& currentDbName, bool forGetters = false) noexcept; //////////////////////////////////////////////////////////////////////////////// /// @brief split the analyzer name into the vocbase part and analyzer part /// @param name analyzer name /// @return pair of first == vocbase name, second == analyzer name /// EMPTY == system vocbase /// NIL == unprefixed analyzer name, i.e. active vocbase //////////////////////////////////////////////////////////////////////////////// static std::pair splitAnalyzerName(irs::string_ref const& analyzer) noexcept; void prepare() override; ////////////////////////////////////////////////////////////////////////////// /// @brief remove the specified analyzer /// @param name analyzer name (already normalized) /// @param force remove even if the analyzer is actively referenced ////////////////////////////////////////////////////////////////////////////// arangodb::Result remove(irs::string_ref const& name, bool force = false); void start() override; void stop() override; ////////////////////////////////////////////////////////////////////////////// /// @brief visit all analyzers for the specified vocbase /// @param vocbase only visit analysers for this vocbase (nullptr == static) /// @return visitation compleated fully ////////////////////////////////////////////////////////////////////////////// bool visit(std::function const& visitor) const; bool visit(std::function const& visitor, TRI_vocbase_t const* vocbase) const; /////////////////////////////////////////////////////////////////////////////// // @brief removes analyzers for specified database from cache // @param vocbase database to invalidate analyzers /////////////////////////////////////////////////////////////////////////////// void invalidate(const TRI_vocbase_t& vocbase); private: // map of caches of irs::analysis::analyzer pools indexed by analyzer name and // their associated metas typedef std::unordered_map Analyzers; Analyzers _analyzers; // all analyzers known to this feature (including static) (names are stored with expanded vocbase prefixes) std::unordered_map _lastLoad; // last time a database was loaded mutable irs::async_utils::read_write_mutex _mutex; // for use with member '_analyzers', '_lastLoad' static Analyzers const& getStaticAnalyzers(); ////////////////////////////////////////////////////////////////////////////// /// @brief validate analyzer parameters and emplace into map ////////////////////////////////////////////////////////////////////////////// typedef std::pair EmplaceAnalyzerResult; arangodb::Result emplaceAnalyzer( // emplace EmplaceAnalyzerResult& result, // emplacement result on success (out-param) arangodb::iresearch::IResearchAnalyzerFeature::Analyzers& analyzers, // analyzers irs::string_ref const& name, // analyzer name irs::string_ref const& type, // analyzer type VPackSlice const properties, // analyzer properties irs::flags const& features // analyzer features ); ////////////////////////////////////////////////////////////////////////////// /// @brief ensure an analyzer as per the specified parameters exists if /// possible /// @param result the result of the successful emplacement (out-param) /// first - the emplaced pool /// second - if an insertion of an new analyzer occured /// @param name analyzer name (already normalized) /// @param type the underlying IResearch analyzer type /// @param properties the configuration for the underlying IResearch type /// @param features the expected features the analyzer should produce /// @param isEmplace request coming from emplace(...) /// @return success /// @note ensure while inRecovery() will not allow new analyzer persistance /// valid because for existing links the analyzer definition should /// already have been persisted and feature administration is not /// allowed during recovery /// @note for db-server analyzers are not persisted /// valid because the authoritative analyzer source is from coordinators ////////////////////////////////////////////////////////////////////////////// arangodb::Result ensure( // ensure analyzer existence if possible EmplaceResult& result, // emplacement result on success (out-param) irs::string_ref const& name, // analyzer name irs::string_ref const& type, // analyzer type VPackSlice const properties, // analyzer properties irs::flags const& features, // analyzer features bool isEmplace ); ////////////////////////////////////////////////////////////////////////////// /// @brief load the analyzers for the specific database, analyzers read from /// the corresponding collection if they have not been loaded yet /// @param database the database to load analizers for (nullptr == all) /// @note on coordinator and db-server reload is also done if the database has /// not been reloaded in 'timeout' seconds ////////////////////////////////////////////////////////////////////////////// arangodb::Result loadAnalyzers( // load analyzers irs::string_ref const& database = irs::string_ref::NIL // database to load ); //////////////////////////////////////////////////////////////////////////////// /// removes analyzers for database from feature cache /// Write lock must be acquired by caller /// @param database the database to cleanup analyzers for void cleanupAnalyzers(irs::string_ref const& database); ////////////////////////////////////////////////////////////////////////////// /// @brief store the definition for the speicifed pool in the corresponding /// vocbase /// @note on success will modify the '_key' of the pool ////////////////////////////////////////////////////////////////////////////// arangodb::Result storeAnalyzer(AnalyzerPool& pool); }; } // namespace iresearch } // namespace arangodb #endif