//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2017 ArangoDB GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Andrey Abramov /// @author Vasiliy Nabatchikov //////////////////////////////////////////////////////////////////////////////// #include "common.h" #include "gtest/gtest.h" #include "../Mocks/StorageEngineMock.h" #if USE_ENTERPRISE #include "Enterprise/Ldap/LdapFeature.h" #endif #include "3rdParty/iresearch/tests/tests_config.hpp" #include "Aql/AqlFunctionFeature.h" #include "Aql/Ast.h" #include "Aql/OptimizerRulesFeature.h" #include "Aql/Query.h" #include "Basics/VelocyPackHelper.h" #include "Cluster/ClusterFeature.h" #include "GeneralServer/AuthenticationFeature.h" #include "IResearch/IResearchAnalyzerFeature.h" #include "IResearch/IResearchCommon.h" #include "IResearch/IResearchFeature.h" #include "IResearch/IResearchFilterFactory.h" #include "IResearch/IResearchView.h" #include "Logger/LogTopic.h" #include "Logger/Logger.h" #include "RestServer/AqlFeature.h" #include "RestServer/DatabaseFeature.h" #include "RestServer/DatabasePathFeature.h" #include "RestServer/FlushFeature.h" #include "RestServer/QueryRegistryFeature.h" #include "RestServer/SystemDatabaseFeature.h" #include "RestServer/TraverserEngineRegistryFeature.h" #include "RestServer/ViewTypesFeature.h" #include "Sharding/ShardingFeature.h" #include "StorageEngine/EngineSelectorFeature.h" #include "Transaction/StandaloneContext.h" #include "Utils/OperationOptions.h" #include "Utils/SingleCollectionTransaction.h" #include "V8/v8-globals.h" #include "V8Server/V8DealerFeature.h" #include "VocBase/LogicalCollection.h" #include "VocBase/LogicalView.h" #include "VocBase/ManagedDocumentResult.h" #include "VocBase/Methods/Collections.h" #include "IResearch/VelocyPackHelper.h" #include "analysis/analyzers.hpp" #include "analysis/token_attributes.hpp" #include "utils/utf8_path.hpp" #include namespace { struct TestTermAttribute : public irs::term_attribute { public: void value(irs::bytes_ref const& value) { value_ = value; } }; class TestDelimAnalyzer : public irs::analysis::analyzer { public: DECLARE_ANALYZER_TYPE(); static ptr make(irs::string_ref const& args) { auto slice = arangodb::iresearch::slice(args); if (slice.isNull()) throw std::exception(); if (slice.isNone()) return nullptr; if (slice.isString()) { PTR_NAMED(TestDelimAnalyzer, ptr, arangodb::iresearch::getStringRef(slice)); return ptr; } else if (slice.isObject() && slice.hasKey("args") && slice.get("args").isString()) { PTR_NAMED(TestDelimAnalyzer, ptr, arangodb::iresearch::getStringRef(slice.get("args"))); return ptr; } else { return nullptr; } } static bool normalize(irs::string_ref const& args, std::string& out) { auto slice = arangodb::iresearch::slice(args); if (slice.isNull()) throw std::exception(); if (slice.isNone()) return false; arangodb::velocypack::Builder builder; if (slice.isString()) { VPackObjectBuilder scope(&builder); arangodb::iresearch::addStringRef(builder, "args", arangodb::iresearch::getStringRef(slice)); } else if (slice.isObject() && slice.hasKey("args") && slice.get("args").isString()) { VPackObjectBuilder scope(&builder); arangodb::iresearch::addStringRef(builder, "args", arangodb::iresearch::getStringRef(slice.get("args"))); } else { return false; } out = builder.buffer()->toString(); return true; } TestDelimAnalyzer(irs::string_ref const& delim) : irs::analysis::analyzer(TestDelimAnalyzer::type()), _delim(irs::ref_cast(delim)) { _attrs.emplace(_term); } virtual irs::attribute_view const& attributes() const NOEXCEPT override { return _attrs; } virtual bool next() override { if (_data.empty()) { return false; } size_t i = 0; for (size_t count = _data.size(); i < count; ++i) { auto data = irs::ref_cast(_data); auto delim = irs::ref_cast(_delim); if (0 == strncmp(&(data.c_str()[i]), delim.c_str(), delim.size())) { _term.value(irs::bytes_ref(_data.c_str(), i)); _data = irs::bytes_ref(_data.c_str() + i + (std::max)(size_t(1), _delim.size()), _data.size() - i - (std::max)(size_t(1), _delim.size())); return true; } } _term.value(_data); _data = irs::bytes_ref::NIL; return true; } virtual bool reset(irs::string_ref const& data) override { _data = irs::ref_cast(data); return true; } private: irs::attribute_view _attrs; std::basic_string _delim; irs::bytes_ref _data; TestTermAttribute _term; }; DEFINE_ANALYZER_TYPE_NAMED(TestDelimAnalyzer, "TestDelimAnalyzer"); REGISTER_ANALYZER_VPACK(TestDelimAnalyzer, TestDelimAnalyzer::make, TestDelimAnalyzer::normalize); // ----------------------------------------------------------------------------- // --SECTION-- setup / tear-down // ----------------------------------------------------------------------------- class IResearchQueryTokensTest : public ::testing::Test { protected: StorageEngineMock engine; arangodb::application_features::ApplicationServer server; std::vector> features; IResearchQueryTokensTest() : engine(server), server(nullptr, nullptr) { arangodb::EngineSelectorFeature::ENGINE = &engine; arangodb::tests::init(true); // suppress INFO {authentication} Authentication is turned on (system only), authentication for unix sockets is turned on // suppress WARNING {authentication} --server.jwt-secret is insecure. Use --server.jwt-secret-keyfile instead arangodb::LogTopic::setLogLevel(arangodb::Logger::AUTHENTICATION.name(), arangodb::LogLevel::ERR); // suppress log messages since tests check error conditions arangodb::LogTopic::setLogLevel(arangodb::Logger::FIXME.name(), arangodb::LogLevel::ERR); // suppress WARNING DefaultCustomTypeHandler called arangodb::LogTopic::setLogLevel(arangodb::iresearch::TOPIC.name(), arangodb::LogLevel::FATAL); irs::logger::output_le(iresearch::logger::IRL_FATAL, stderr); // setup required application features features.emplace_back(new arangodb::FlushFeature(server), false); features.emplace_back(new arangodb::ViewTypesFeature(server), true); features.emplace_back(new arangodb::AuthenticationFeature(server), true); features.emplace_back(new arangodb::DatabasePathFeature(server), false); features.emplace_back(new arangodb::DatabaseFeature(server), false); features.emplace_back(new arangodb::ShardingFeature(server), false); features.emplace_back(new arangodb::QueryRegistryFeature(server), false); // must be first arangodb::application_features::ApplicationServer::server->addFeature( features.back().first); // need QueryRegistryFeature feature to be added now in order to create the system database features.emplace_back(new arangodb::SystemDatabaseFeature(server), true); // required for IResearchAnalyzerFeature features.emplace_back(new arangodb::TraverserEngineRegistryFeature(server), false); // must be before AqlFeature features.emplace_back(new arangodb::V8DealerFeature(server), false); // required for DatabaseFeature::createDatabase(...) features.emplace_back(new arangodb::AqlFeature(server), true); features.emplace_back(new arangodb::aql::OptimizerRulesFeature(server), true); features.emplace_back(new arangodb::aql::AqlFunctionFeature(server), true); // required for IResearchAnalyzerFeature features.emplace_back(new arangodb::iresearch::IResearchAnalyzerFeature(server), true); features.emplace_back(new arangodb::iresearch::IResearchFeature(server), true); #if USE_ENTERPRISE features.emplace_back(new arangodb::LdapFeature(server), false); // required for AuthenticationFeature with USE_ENTERPRISE #endif // required for V8DealerFeature::prepare(), ClusterFeature::prepare() not required arangodb::application_features::ApplicationServer::server->addFeature( new arangodb::ClusterFeature(server)); for (auto& f : features) { arangodb::application_features::ApplicationServer::server->addFeature(f.first); } for (auto& f : features) { f.first->prepare(); } auto const databases = arangodb::velocypack::Parser::fromJson( std::string("[ { \"name\": \"") + arangodb::StaticStrings::SystemDatabase + "\" } ]"); auto* dbFeature = arangodb::application_features::ApplicationServer::lookupFeature( "Database"); dbFeature->loadDatabases(databases->slice()); for (auto& f : features) { if (f.second) { f.first->start(); } } auto* analyzers = arangodb::application_features::ApplicationServer::lookupFeature(); arangodb::iresearch::IResearchAnalyzerFeature::EmplaceResult result; TRI_vocbase_t* vocbase; dbFeature->createDatabase(1, "testVocbase", vocbase); // required for IResearchAnalyzerFeature::emplace(...) arangodb::methods::Collections::createSystem( *vocbase, arangodb::tests::AnalyzerCollectionName, false); analyzers->emplace(result, "testVocbase::test_analyzer", "TestAnalyzer", VPackParser::fromJson("\"abc\"")->slice()); // cache analyzer analyzers->emplace(result, "testVocbase::test_csv_analyzer", "TestDelimAnalyzer", VPackParser::fromJson("\",\"")->slice()); // cache analyzer auto* dbPathFeature = arangodb::application_features::ApplicationServer::getFeature( "DatabasePath"); arangodb::tests::setDatabasePath(*dbPathFeature); // ensure test data is stored in a unique directory } ~IResearchQueryTokensTest() { arangodb::AqlFeature(server).stop(); // unset singleton instance arangodb::LogTopic::setLogLevel(arangodb::iresearch::TOPIC.name(), arangodb::LogLevel::DEFAULT); arangodb::LogTopic::setLogLevel(arangodb::Logger::FIXME.name(), arangodb::LogLevel::DEFAULT); arangodb::application_features::ApplicationServer::server = nullptr; // destroy application features for (auto& f : features) { if (f.second) { f.first->stop(); } } for (auto& f : features) { f.first->unprepare(); } arangodb::LogTopic::setLogLevel(arangodb::Logger::AUTHENTICATION.name(), arangodb::LogLevel::DEFAULT); arangodb::EngineSelectorFeature::ENGINE = nullptr; } }; // IResearchQuerySetup } // namespace // ----------------------------------------------------------------------------- // --SECTION-- test suite // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief setup //////////////////////////////////////////////////////////////////////////////// TEST_F(IResearchQueryTokensTest, test) { TRI_vocbase_t vocbase(TRI_vocbase_type_e::TRI_VOCBASE_TYPE_NORMAL, 1, "testVocbase"); std::vector insertedDocs; arangodb::LogicalView* view; // create collection0 { auto createJson = arangodb::velocypack::Parser::fromJson( "{ \"name\": \"testCollection0\" }"); auto collection = vocbase.createCollection(createJson->slice()); ASSERT_TRUE((nullptr != collection)); std::vector> docs{ VPackParser::fromJson("{ \"seq\": -6, \"value\": null }"), VPackParser::fromJson("{ \"seq\": -5, \"value\": true }"), VPackParser::fromJson("{ \"seq\": -4, \"value\": \"abc\" }"), VPackParser::fromJson("{ \"seq\": -3, \"value\": 3.14 }"), VPackParser::fromJson("{ \"seq\": -2, \"value\": [ 1, \"abc\" ] }"), VPackParser::fromJson("{ \"seq\": -1, \"value\": { \"a\": 7, \"b\": \"c\" } }"), }; arangodb::OperationOptions options; options.returnNew = true; arangodb::SingleCollectionTransaction trx(arangodb::transaction::StandaloneContext::Create(vocbase), *collection, arangodb::AccessMode::Type::WRITE); EXPECT_TRUE((trx.begin().ok())); for (auto& entry : docs) { auto res = trx.insert(collection->name(), entry->slice(), options); EXPECT_TRUE((res.ok())); insertedDocs.emplace_back(res.slice().get("new")); } EXPECT_TRUE((trx.commit().ok())); } // create collection1 { auto createJson = arangodb::velocypack::Parser::fromJson( "{ \"name\": \"testCollection1\" }"); auto collection = vocbase.createCollection(createJson->slice()); ASSERT_TRUE((nullptr != collection)); irs::utf8_path resource; resource /= irs::string_ref(arangodb::tests::testResourceDir); resource /= irs::string_ref("simple_sequential.json"); auto builder = arangodb::basics::VelocyPackHelper::velocyPackFromFile(resource.utf8()); auto slice = builder.slice(); ASSERT_TRUE(slice.isArray()); arangodb::OperationOptions options; options.returnNew = true; arangodb::SingleCollectionTransaction trx(arangodb::transaction::StandaloneContext::Create(vocbase), *collection, arangodb::AccessMode::Type::WRITE); EXPECT_TRUE((trx.begin().ok())); for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) { auto res = trx.insert(collection->name(), itr.value(), options); EXPECT_TRUE((res.ok())); insertedDocs.emplace_back(res.slice().get("new")); } EXPECT_TRUE((trx.commit().ok())); } // create view { auto createJson = arangodb::velocypack::Parser::fromJson( "{ \"name\": \"testView\", \"type\": \"arangosearch\" }"); auto logicalView = vocbase.createView(createJson->slice()); ASSERT_TRUE((false == !logicalView)); view = logicalView.get(); auto* impl = dynamic_cast(view); ASSERT_TRUE((false == !impl)); auto updateJson = arangodb::velocypack::Parser::fromJson( "{ \"links\": {" "\"testCollection0\": { \"includeAllFields\": true, " "\"trackListPositions\": true }," "\"testCollection1\": { \"includeAllFields\": true }" "}}"); EXPECT_TRUE((impl->properties(updateJson->slice(), true).ok())); std::set cids; impl->visitCollections([&cids](TRI_voc_cid_t cid) -> bool { cids.emplace(cid); return true; }); EXPECT_TRUE((2 == cids.size())); EXPECT_TRUE( (arangodb::tests::executeQuery(vocbase, "FOR d IN testView SEARCH 1 ==1 OPTIONS " "{ waitForSync: true } RETURN d") .result.ok())); // commit } // test no-match { std::vector expected = {}; auto result = arangodb::tests::executeQuery( vocbase, "FOR d IN testView SEARCH d.prefix IN TOKENS('def', " "'test_csv_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d"); ASSERT_TRUE(result.result.ok()); auto slice = result.data->slice(); EXPECT_TRUE(slice.isArray()); size_t i = 0; for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) { auto const resolved = itr.value().resolveExternals(); EXPECT_TRUE((i < expected.size())); EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++], resolved, true))); } EXPECT_TRUE((i == expected.size())); } // test no-match via [] { std::vector expected = {}; auto result = arangodb::tests::executeQuery( vocbase, "FOR d IN testView SEARCH d['prefix'] IN TOKENS('def', " "'test_csv_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d"); ASSERT_TRUE(result.result.ok()); auto slice = result.data->slice(); EXPECT_TRUE(slice.isArray()); size_t i = 0; for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) { auto const resolved = itr.value().resolveExternals(); EXPECT_TRUE((i < expected.size())); EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++], resolved, true))); } EXPECT_TRUE((i == expected.size())); } // test single match { std::vector expected = { insertedDocs[9].slice(), }; auto result = arangodb::tests::executeQuery( vocbase, "FOR d IN testView SEARCH d.prefix IN TOKENS('ab,abcde,de', " "'test_csv_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d"); ASSERT_TRUE(result.result.ok()); auto slice = result.data->slice(); EXPECT_TRUE(slice.isArray()); size_t i = 0; for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) { auto const resolved = itr.value().resolveExternals(); EXPECT_TRUE((i < expected.size())); EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++], resolved, true))); } EXPECT_TRUE((i == expected.size())); } // test single match via [] { std::vector expected = { insertedDocs[9].slice(), }; auto result = arangodb::tests::executeQuery( vocbase, "FOR d IN testView SEARCH d['prefix'] IN TOKENS('ab,abcde,de', " "'test_csv_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d"); ASSERT_TRUE(result.result.ok()); auto slice = result.data->slice(); EXPECT_TRUE(slice.isArray()); size_t i = 0; for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) { auto const resolved = itr.value().resolveExternals(); EXPECT_TRUE((i < expected.size())); EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++], resolved, true))); } EXPECT_TRUE((i == expected.size())); } // test mulptiple match { std::vector expected = { insertedDocs[36].slice(), // (duplicate term) insertedDocs[37].slice(), // (duplicate term) insertedDocs[6].slice(), // (unique term) insertedDocs[26].slice(), // (unique term) }; auto result = arangodb::tests::executeQuery( vocbase, "FOR d IN testView SEARCH d.prefix IN TOKENS('z,xy,abcy,abcd,abc', " "'test_csv_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d"); ASSERT_TRUE(result.result.ok()); auto slice = result.data->slice(); EXPECT_TRUE(slice.isArray()); size_t i = 0; for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) { auto const resolved = itr.value().resolveExternals(); EXPECT_TRUE((i < expected.size())); EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++], resolved, true))); } EXPECT_TRUE((i == expected.size())); } // test mulptiple match via [] { std::vector expected = { insertedDocs[36].slice(), // (duplicate term) insertedDocs[37].slice(), // (duplicate term) insertedDocs[6].slice(), // (unique term) insertedDocs[26].slice(), // (unique term) }; auto result = arangodb::tests::executeQuery( vocbase, "FOR d IN testView SEARCH d['prefix'] IN TOKENS('z,xy,abcy,abcd,abc', " "'test_csv_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d"); ASSERT_TRUE(result.result.ok()); auto slice = result.data->slice(); EXPECT_TRUE(slice.isArray()); size_t i = 0; for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) { auto const resolved = itr.value().resolveExternals(); EXPECT_TRUE((i < expected.size())); EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++], resolved, true))); } EXPECT_TRUE((i == expected.size())); } }