1
0
Fork 0

Bug fix/internal issue #577 (#9035)

* properly handle shorthand of system analyzer

* don't allow accessing local analyzer from within another database

* ensure local analyzer are not acessible from outside

* fix duplicate ids
This commit is contained in:
Andrey Abramov 2019-05-19 19:00:13 +03:00 committed by Frank Celler
parent 148ba5f288
commit 56696e3ea6
6 changed files with 1271 additions and 77 deletions

View File

@ -115,17 +115,17 @@ bool optimizeSearchCondition(IResearchViewNode& viewNode, Query& query, Executio
}
}
// check filter condition
auto const conditionValid = !searchCondition.root();
if(!conditionValid) {
// check filter condition if present
if (searchCondition.root()) {
auto filterCreated = FilterFactory::filter(
nullptr,
{ query.trx(), nullptr, nullptr, nullptr, &viewNode.outVariable() },
*searchCondition.root()
);
if(filterCreated.fail()){
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_PARSE, "unsupported SEARCH condition: " + filterCreated.errorMessage());
if (filterCreated.fail()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_PARSE,
"unsupported SEARCH condition: " + filterCreated.errorMessage());
}
}

View File

@ -93,8 +93,8 @@ class IdentityAnalyzer : public irs::analysis::analyzer {
private:
irs::attribute_view _attrs;
IdentityValue _term;
irs::increment _inc;
irs::string_ref _value;
irs::increment _inc;
bool _empty;
};
@ -134,24 +134,12 @@ bool IdentityAnalyzer::reset(irs::string_ref const& data) {
return !_empty;
}
static std::string const while_tokens =
" while computing result for function 'TOKENS'";
namespace detail {
std::string operator+(std::string const& s, irs::string_ref const& r){
return s + std::string(r.c_str(), r.size());
}
std::string operator+(irs::string_ref const& r, std::string const& s){
return std::string(r.c_str(), r.size()) + s;
}
}
arangodb::aql::AqlValue aqlFnTokens(arangodb::aql::ExpressionContext* expressionContext,
arangodb::transaction::Methods* trx,
arangodb::aql::VPackFunctionParameters const& args) {
if (2 != args.size() || !args[0].isString() || !args[1].isString()) {
auto message = "invalid arguments" + while_tokens;
irs::string_ref const message = "invalid arguments while computing result for function 'TOKENS'";
LOG_TOPIC("740fd", WARN, arangodb::iresearch::TOPIC) << message;
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, message);
}
@ -162,7 +150,9 @@ arangodb::aql::AqlValue aqlFnTokens(arangodb::aql::ExpressionContext* expression
arangodb::application_features::ApplicationServer::lookupFeature<arangodb::iresearch::IResearchAnalyzerFeature>();
if (!analyzers) {
auto const message = "failure to find feature 'arangosearch'"s + while_tokens;
irs::string_ref const message = "failure to find feature 'arangosearch' while "
"computing result for function 'TOKENS'";
LOG_TOPIC("fbd91", WARN, arangodb::iresearch::TOPIC) << message;
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, message);
}
@ -173,23 +163,21 @@ arangodb::aql::AqlValue aqlFnTokens(arangodb::aql::ExpressionContext* expression
auto* sysDatabase = arangodb::application_features::ApplicationServer::lookupFeature< // find feature
arangodb::SystemDatabaseFeature // featue type
>();
auto sysVocbase = sysDatabase ? sysDatabase->use() : nullptr;
if (sysVocbase) {
pool = analyzers->get( // get analyzer
arangodb::iresearch::IResearchAnalyzerFeature::normalize( // normalize
name, trx->vocbase(), *sysVocbase // args
)
);
pool = analyzers->get(name, trx->vocbase(), *sysVocbase);
}
} else {
pool = analyzers->get(name); // verbatim
}
if (!pool) {
using detail::operator+;
auto const message = "failure to find arangosearch analyzer with name '"s +
name + "'" + while_tokens;
auto const message = "failure to find arangosearch analyzer with name '"s
+ static_cast<std::string>(name)
+ "' while computing result for function 'TOKENS'";
LOG_TOPIC("0d256", WARN, arangodb::iresearch::TOPIC) << message;
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, message);
}
@ -197,17 +185,18 @@ arangodb::aql::AqlValue aqlFnTokens(arangodb::aql::ExpressionContext* expression
auto analyzer = pool->get();
if (!analyzer) {
using detail::operator+;
auto const message = "failure to find arangosearch analyzer with name '"s +
name + "'" + while_tokens;
auto const message = "failure to find arangosearch analyzer with name '"s
+ static_cast<std::string>(name)
+ "' while computing result for function 'TOKENS'";
LOG_TOPIC("d7477", WARN, arangodb::iresearch::TOPIC) << message;
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, message);
}
if (!analyzer->reset(data)) {
using detail::operator+;
auto const message = "failure to reset arangosearch analyzer: ' "s +
name + "'" + while_tokens;
auto const message = "failure to reset arangosearch analyzer: ' "s
+ static_cast<std::string>(name)
+ "' while computing result for function 'TOKENS'";
LOG_TOPIC("45a2d", WARN, arangodb::iresearch::TOPIC) << message;
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, message);
}
@ -215,10 +204,11 @@ arangodb::aql::AqlValue aqlFnTokens(arangodb::aql::ExpressionContext* expression
auto& values = analyzer->attributes().get<irs::term_attribute>();
if (!values) {
using detail::operator+;
auto const message =
"failure to retrieve values from arangosearch analyzer name '"s +
name + "'" + while_tokens;
"failure to retrieve values from arangosearch analyzer name '"s
+ static_cast<std::string>(name)
+ "' while computing result for function 'TOKENS'";
LOG_TOPIC("f46f2", WARN, arangodb::iresearch::TOPIC) << message;
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, message);
}
@ -228,7 +218,9 @@ arangodb::aql::AqlValue aqlFnTokens(arangodb::aql::ExpressionContext* expression
auto buffer = irs::memory::make_unique<arangodb::velocypack::Buffer<uint8_t>>();
if (!buffer) {
auto const message = "failure to allocate result buffer"s + while_tokens;
irs::string_ref const message = "failure to allocate result buffer while "
"computing result for function 'TOKENS'";
LOG_TOPIC("97cd0", WARN, arangodb::iresearch::TOPIC) << message;
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_OUT_OF_MEMORY, message);
}
@ -1077,31 +1069,46 @@ arangodb::Result IResearchAnalyzerFeature::ensure( // ensure analyzer existence
IResearchAnalyzerFeature::AnalyzerPool::ptr IResearchAnalyzerFeature::get( // find analyzer
irs::string_ref const& name, // analyzer name
TRI_vocbase_t const& activeVocbase, // fallback vocbase if not part of name
TRI_vocbase_t const& systemVocbase, // the system vocbase for use with empty prefix
bool onlyCached /*= false*/ // check only locally cached analyzers
) const noexcept {
try {
auto split = splitAnalyzerName(name);
auto const normalizedName = normalize(name, activeVocbase, systemVocbase, true);
if (!split.first.null() && !onlyCached) { // do not trigger load for static-analyzer requests
auto res = // load analyzers for database
const_cast<IResearchAnalyzerFeature*>(this)->loadAnalyzers(split.first);
auto const split = splitAnalyzerName(normalizedName);
if (!res.ok()) {
LOG_TOPIC("36062", WARN, arangodb::iresearch::TOPIC)
<< "failure to load analyzers for database '" << split.first << "' while getting analyzer '" << name << "': " << res.errorNumber() << " " << res.errorMessage();
TRI_set_errno(res.errorNumber());
// FIXME deduplicate code below, see get(irs::string, bool)
if (!split.first.null()) { // check if analyzer is static
if (split.first != activeVocbase.name() && split.first != systemVocbase.name()) {
// accessing local analyzer from within another database
return nullptr;
}
if (!onlyCached) {
// load analyzers for database
auto res = const_cast<IResearchAnalyzerFeature*>(this)->loadAnalyzers(split.first);
if (!res.ok()) {
LOG_TOPIC("36062", WARN, arangodb::iresearch::TOPIC)
<< "failure to load analyzers for database '" << split.first << "' while getting analyzer '" << name << "': " << res.errorNumber() << " " << res.errorMessage();
TRI_set_errno(res.errorNumber());
return nullptr;
}
}
}
ReadMutex mutex(_mutex);
SCOPED_LOCK(mutex);
auto itr =
_analyzers.find(irs::make_hashed_ref(name, std::hash<irs::string_ref>()));
auto itr = _analyzers.find(
irs::make_hashed_ref(static_cast<irs::string_ref>(normalizedName),
std::hash<irs::string_ref>())
);
if (itr == _analyzers.end()) {
LOG_TOPIC("4049d", WARN, arangodb::iresearch::TOPIC)
LOG_TOPIC("4049c", WARN, arangodb::iresearch::TOPIC)
<< "failure to find arangosearch analyzer name '" << name << "'";
return nullptr;
@ -1136,6 +1143,67 @@ IResearchAnalyzerFeature::AnalyzerPool::ptr IResearchAnalyzerFeature::get( // fi
return nullptr;
}
IResearchAnalyzerFeature::AnalyzerPool::ptr IResearchAnalyzerFeature::get( // find analyzer
irs::string_ref const& name, // analyzer name
bool onlyCached /*= false*/ // check only locally cached analyzers
) const noexcept {
try {
auto const split = splitAnalyzerName(name);
if (!split.first.null() && !onlyCached) { // do not trigger load for static-analyzer requests
auto res = // load analyzers for database
const_cast<IResearchAnalyzerFeature*>(this)->loadAnalyzers(split.first);
if (!res.ok()) {
LOG_TOPIC("36068", WARN, arangodb::iresearch::TOPIC)
<< "failure to load analyzers for database '" << split.first << "' while getting analyzer '" << name << "': " << res.errorNumber() << " " << res.errorMessage();
TRI_set_errno(res.errorNumber());
return nullptr;
}
}
ReadMutex mutex(_mutex);
SCOPED_LOCK(mutex);
auto itr =
_analyzers.find(irs::make_hashed_ref(name, std::hash<irs::string_ref>()));
if (itr == _analyzers.end()) {
LOG_TOPIC("4049d", WARN, arangodb::iresearch::TOPIC)
<< "failure to find arangosearch analyzer name '" << name << "'";
return nullptr;
}
auto pool = itr->second;
if (pool) {
return pool;
}
LOG_TOPIC("1a29z", WARN, arangodb::iresearch::TOPIC)
<< "failure to get arangosearch analyzer name '" << name << "'";
TRI_set_errno(TRI_ERROR_INTERNAL);
} catch (arangodb::basics::Exception& e) {
LOG_TOPIC("89eff", WARN, arangodb::iresearch::TOPIC)
<< "caught exception while retrieving an arangosearch analizer name '"
<< name << "': " << e.code() << " " << e.what();
IR_LOG_EXCEPTION();
} catch (std::exception& e) {
LOG_TOPIC("ce8d9", WARN, arangodb::iresearch::TOPIC)
<< "caught exception while retrieving an arangosearch analizer name '"
<< name << "': " << e.what();
IR_LOG_EXCEPTION();
} catch (...) {
LOG_TOPIC("55050", WARN, arangodb::iresearch::TOPIC)
<< "caught exception while retrieving an arangosearch analizer name '"
<< name << "'";
IR_LOG_EXCEPTION();
}
return nullptr;
}
IResearchAnalyzerFeature::AnalyzerPool::ptr IResearchAnalyzerFeature::get( // find analyzer
irs::string_ref const& name, // analyzer name
irs::string_ref const& type, // analyzer type
@ -1650,7 +1718,7 @@ arangodb::Result IResearchAnalyzerFeature::loadAnalyzers( // load
// normalize vocbase such that active vocbase takes precedence over system
// vocbase i.e. prefer NIL over EMPTY
// .........................................................................
if (split.first.null() || split.first == activeVocbase.name()) { // active vocbase
if (&systemVocbase == &activeVocbase || split.first.null() || (split.first == activeVocbase.name())) { // active vocbase
return split.second;
}

View File

@ -143,6 +143,17 @@ class IResearchAnalyzerFeature final : public arangodb::application_features::Ap
bool onlyCached = false // check only locally cached analyzers
) const noexcept;
//////////////////////////////////////////////////////////////////////////////
/// @brief find analyzer
/// @return analyzer with the specified name or nullptr
//////////////////////////////////////////////////////////////////////////////
AnalyzerPool::ptr get( // find analyzer
irs::string_ref const& name, // analyzer name
TRI_vocbase_t const& activeVocbase, // fallback vocbase if not part of name
TRI_vocbase_t const& systemVocbase, // the system vocbase for use with empty prefix
bool onlyCached = false // check only locally cached analyzers
) const noexcept;
//////////////////////////////////////////////////////////////////////////////
/// @brief find analyzer
/// @param name analyzer name (already normalized)
@ -272,4 +283,4 @@ class IResearchAnalyzerFeature final : public arangodb::application_features::Ap
} // namespace iresearch
} // namespace arangodb
#endif
#endif

View File

@ -76,26 +76,33 @@ struct FilterContext {
irs::boost::boost_t boost;
}; // FilterContext
typedef std::function<arangodb::Result(irs::boolean_filter*, QueryContext const&, FilterContext const&, arangodb::aql::AstNode const&)> ConvertionHandler;
typedef std::function<
arangodb::Result(irs::boolean_filter*,
QueryContext const&,
FilterContext const&,
arangodb::aql::AstNode const&)
> ConvertionHandler;
// forward declaration
arangodb::Result filter(irs::boolean_filter* filter, QueryContext const& queryCtx,
FilterContext const& filterCtx, arangodb::aql::AstNode const& node);
arangodb::Result filter(irs::boolean_filter* filter,
QueryContext const& queryctx,
FilterContext const& filterCtx,
arangodb::aql::AstNode const& node);
////////////////////////////////////////////////////////////////////////////////
/// @brief logs message about malformed AstNode with the specified type
////////////////////////////////////////////////////////////////////////////////
static const auto prefix = "Can't process malformed AstNode of type '";
arangodb::Result logMalformedNode(arangodb::aql::AstNodeType type) {
auto const* typeName = arangodb::iresearch::getNodeTypeName(type);
std::string message;
std::string message("Can't process malformed AstNode of type '");
if (typeName) {
message = prefix + *typeName + "'";
message += *typeName;
} else {
std::stringstream ss; ss << type;
message = prefix + ss.str() + "'";
message += std::to_string(type);
}
message += "'";
LOG_TOPIC("5070f", WARN, arangodb::iresearch::TOPIC) << message;
return {TRI_ERROR_BAD_PARAMETER, message};
}
@ -200,11 +207,8 @@ arangodb::iresearch::IResearchLinkMeta::Analyzer extractAnalyzerFromArg(
auto sysVocbase = sysDatabase ? sysDatabase->use() : nullptr;
if (sysVocbase) {
analyzer = analyzerFeature->get( // get analyzer
arangodb::iresearch::IResearchAnalyzerFeature::normalize( // normalize
analyzerId, ctx.trx->vocbase(), *sysVocbase // args
)
);
analyzer = analyzerFeature->get(analyzerId, ctx.trx->vocbase(), *sysVocbase);
shortName = arangodb::iresearch::IResearchAnalyzerFeature::normalize( // normalize
analyzerId, ctx.trx->vocbase(), *sysVocbase, false // args
);
@ -1177,11 +1181,8 @@ arangodb::Result fromFuncAnalyzer(irs::boolean_filter* filter, QueryContext cons
auto sysVocbase = sysDatabase ? sysDatabase->use() : nullptr;
if (sysVocbase) {
analyzer = analyzerFeature->get( // get analyzer
arangodb::iresearch::IResearchAnalyzerFeature::normalize( // normalize
analyzerIdValue, ctx.trx->vocbase(), *sysVocbase // args
)
);
analyzer = analyzerFeature->get(analyzerIdValue, ctx.trx->vocbase(), *sysVocbase);
shortName = arangodb::iresearch::IResearchAnalyzerFeature::normalize( // normalize
analyzerIdValue, ctx.trx->vocbase(), *sysVocbase, false // args
);

View File

@ -229,6 +229,7 @@ struct IResearchAnalyzerFeatureSetup {
StorageEngineWrapper engine; // can only nullify 'ENGINE' after all TRI_vocbase_t and ApplicationServer have been destroyed
arangodb::application_features::ApplicationServer server;
std::vector<std::pair<arangodb::application_features::ApplicationFeature*, bool>> features;
arangodb::SystemDatabaseFeature* sysDatabaseFeature{};
IResearchAnalyzerFeatureSetup(): engine(server), server(nullptr, nullptr) {
auto* agencyCommManager = new AgencyCommManagerMock("arango");
@ -248,7 +249,7 @@ struct IResearchAnalyzerFeatureSetup {
features.emplace_back(new arangodb::ShardingFeature(server), false);
features.emplace_back(new arangodb::QueryRegistryFeature(server), false); // required for constructing TRI_vocbase_t
arangodb::application_features::ApplicationServer::server->addFeature(features.back().first); // need QueryRegistryFeature feature to be added now in order to create the system database
features.emplace_back(new arangodb::SystemDatabaseFeature(server), true); // required for IResearchAnalyzerFeature
features.emplace_back(sysDatabaseFeature = new arangodb::SystemDatabaseFeature(server), true); // required for IResearchAnalyzerFeature
features.emplace_back(new arangodb::V8DealerFeature(server), false); // required for DatabaseFeature::createDatabase(...)
features.emplace_back(new arangodb::aql::AqlFunctionFeature(server), true); // required for IResearchAnalyzerFeature
@ -269,7 +270,9 @@ struct IResearchAnalyzerFeatureSetup {
f.first->prepare();
}
auto const databases = arangodb::velocypack::Parser::fromJson(std::string("[ { \"name\": \"") + arangodb::StaticStrings::SystemDatabase + "\" } ]");
auto const databases = arangodb::velocypack::Parser::fromJson(
std::string("[ { \"name\": \"" + arangodb::StaticStrings::SystemDatabase + "\" } ]")
);
auto* dbFeature = arangodb::application_features::ApplicationServer::lookupFeature<
arangodb::DatabaseFeature
>("Database");
@ -615,17 +618,78 @@ SECTION("test_get") {
aqlFeature.start(); // required for Query::Query(...), must not call ~AqlFeature() for the duration of the test
{
REQUIRE(s.sysDatabaseFeature);
auto sysVocbase = s.sysDatabaseFeature->use();
REQUIRE(sysVocbase);
TRI_vocbase_t* vocbase;
REQUIRE((TRI_ERROR_NO_ERROR == dbFeature->createDatabase(1, "testVocbase", vocbase)));
auto dropDB = irs::make_finally([dbFeature]()->void { dbFeature->dropDatabase("testVocbase", true, true); });
REQUIRE(vocbase);
arangodb::iresearch::IResearchAnalyzerFeature::EmplaceResult result;
arangodb::iresearch::IResearchAnalyzerFeature feature(s.server);
feature.prepare(); // add static analyzers
REQUIRE((feature.emplace(result, arangodb::StaticStrings::SystemDatabase + "::test_analyzer", "TestAnalyzer", "abc").ok()));
REQUIRE((feature.emplace(result, vocbase->name() + "::test_analyzer", "TestAnalyzer", "def").ok()));
// get valid
{
auto pool = feature.get(arangodb::StaticStrings::SystemDatabase + "::test_analyzer");
REQUIRE((false == !pool));
CHECK((irs::flags() == pool->features()));
CHECK("abc" == pool->properties());
auto analyzer = pool.get();
CHECK((false == !analyzer));
}
// get global
{
auto pool = feature.get(arangodb::StaticStrings::SystemDatabase + "::test_analyzer", *sysVocbase, *sysVocbase);
REQUIRE((false == !pool));
CHECK((irs::flags() == pool->features()));
CHECK("abc" == pool->properties());
auto analyzer = pool.get();
CHECK((false == !analyzer));
}
// get global
{
auto pool = feature.get(arangodb::StaticStrings::SystemDatabase + "::test_analyzer", *vocbase, *sysVocbase);
REQUIRE((false == !pool));
CHECK((irs::flags() == pool->features()));
CHECK("abc" == pool->properties());
auto analyzer = pool.get();
CHECK((false == !analyzer));
}
// get global
{
auto pool = feature.get("::test_analyzer", *vocbase, *sysVocbase);
REQUIRE((false == !pool));
CHECK((irs::flags() == pool->features()));
CHECK("abc" == pool->properties());
auto analyzer = pool.get();
CHECK((false == !analyzer));
}
// get local
{
auto pool = feature.get("test_analyzer", *vocbase, *sysVocbase);
REQUIRE((false == !pool));
CHECK((irs::flags() == pool->features()));
CHECK("def" == pool->properties());
auto analyzer = pool.get();
CHECK((false == !analyzer));
}
// get local
{
auto pool = feature.get(vocbase->name() + "::test_analyzer", *vocbase, *sysVocbase);
REQUIRE((false == !pool));
CHECK((irs::flags() == pool->features()));
CHECK("def" == pool->properties());
auto analyzer = pool.get();
CHECK((false == !analyzer));
}
@ -635,6 +699,14 @@ SECTION("test_get") {
CHECK((true == !feature.get(arangodb::StaticStrings::SystemDatabase + "::invalid")));
}
// get invalid
{
CHECK(nullptr == feature.get(arangodb::StaticStrings::SystemDatabase + "::invalid", *sysVocbase, *sysVocbase));
CHECK(nullptr == feature.get("::invalid", *sysVocbase, *sysVocbase));
CHECK(nullptr == feature.get("invalid", *sysVocbase, *sysVocbase));
CHECK(nullptr == feature.get("testAnalyzer", *vocbase, *sysVocbase));
}
// get static analyzer
{
auto pool = feature.get("identity");
@ -643,6 +715,15 @@ SECTION("test_get") {
auto analyzer = pool->get();
CHECK((false == !analyzer));
}
// get static analyzer
{
auto pool = feature.get("identity", *sysVocbase, *sysVocbase);
REQUIRE((false == !pool));
CHECK((irs::flags({irs::norm::type(), irs::frequency::type()}) == pool->features()));
auto analyzer = pool->get();
CHECK((false == !analyzer));
}
}
// get existing with parameter match
@ -1038,6 +1119,13 @@ SECTION("test_normalize") {
CHECK((std::string("::name") == normalized));
}
// normalize system + delimiter + name (without prefix) in system
{
irs::string_ref analyzer = "system::name";
auto normalized = arangodb::iresearch::IResearchAnalyzerFeature::normalize(analyzer, system, system, false);
CHECK((std::string("name") == normalized));
}
// normalize vocbase + delimiter + name (with prefix)
{
irs::string_ref analyzer = "active::name";

File diff suppressed because it is too large Load Diff