1
0
Fork 0

Bug fix 3.5/internal issue #633 (#9902)

* Fix for internal issue #633 (#9884)

* Made ANALYZER BOOST and MIN_MATCH non deterministic to prevent premature calls

* Returned Deterministic flag. Added context function implementation for cons arguments calls.

* Fixed filter volatility detection

* Added assertion

* Fixed formatting.  Removed redundant checks.

* Applied review suggestion

Co-Authored-By: Jan <jsteemann@users.noreply.github.com>

* Added CHANGELOG record

* Update CHANGELOG
This commit is contained in:
Dronplane 2019-09-05 12:21:29 +03:00 committed by KVS85
parent 5771502a83
commit 85814c05f1
9 changed files with 237 additions and 30 deletions

View File

@ -1,5 +1,8 @@
v3.5.1 (XXXX-XX-XX)
-------------------
* Fixed internal issue #633: made ArangoSearch functions BOOST, ANALYZER, MIN_MATCH
callable with constant arguments. This will allow running queries where all arguments
for these functions are deterministic and do not depend on loop variables.
* Drop collection action to timeout more quickly to stay on fast lane.

View File

@ -362,10 +362,6 @@ bool parseOptions(aql::Query& query, LogicalView const& view, aql::AstNode const
bool hasDependencies(aql::ExecutionPlan const& plan, aql::AstNode const& node,
aql::Variable const& ref,
arangodb::HashSet<aql::Variable const*>& vars) {
if (!node.isDeterministic()) {
return false;
}
vars.clear();
aql::Ast::getReferencedVariables(&node, vars);
vars.erase(&ref); // remove "our" variable

View File

@ -133,7 +133,7 @@ class IResearchLogTopic final : public arangodb::LogTopic {
}
}; // IResearchLogTopic
// template <char const *name>
arangodb::aql::AqlValue dummyFilterFunc(arangodb::aql::ExpressionContext*,
arangodb::transaction::Methods*,
arangodb::SmallVector<arangodb::aql::AqlValue> const&) {
@ -146,6 +146,44 @@ arangodb::aql::AqlValue dummyFilterFunc(arangodb::aql::ExpressionContext*,
" Please ensure function signature is correct.");
}
/// function body for ArangoSearchContext functions ANALYZER/BOOST.
/// Just returns its first argument as outside ArangoSearch context
/// there is nothing to do with search stuff, but optimization could roll.
arangodb::aql::AqlValue dummyContextFunc(arangodb::aql::ExpressionContext*,
arangodb::transaction::Methods*,
arangodb::SmallVector<arangodb::aql::AqlValue> const& args) {
TRI_ASSERT(!args.empty()); //ensured by function signature
return args[0];
}
/// Executes MIN_MATCH function with const parameters locally the same way it will be done in ArangoSearch on runtime
/// This will allow optimize out MIN_MATCH call if all arguments are const
arangodb::aql::AqlValue dummyMinMatchContextFunc(arangodb::aql::ExpressionContext*,
arangodb::transaction::Methods*,
arangodb::SmallVector<arangodb::aql::AqlValue> const& args) {
TRI_ASSERT(args.size() > 1); // ensured by function signature
auto& minMatchValue = args.back();
if (ADB_LIKELY(minMatchValue.isNumber())) {
auto matchesLeft = minMatchValue.toInt64();
const auto argsCount = args.size() - 1;
for (size_t i = 0; i < argsCount && matchesLeft > 0; ++i) {
auto& currValue = args[i];
if (currValue.toBoolean()) {
matchesLeft--;
}
}
return arangodb::aql::AqlValue(arangodb::aql::AqlValueHintBool(matchesLeft == 0));
} else {
auto message = std::string("'MIN_MATCH' AQL function: ")
.append(" last argument has invalid type '")
.append(minMatchValue.getTypeString())
.append("' (numeric expected)");
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_BAD_PARAMETER,
message);
}
}
arangodb::aql::AqlValue dummyScorerFunc(arangodb::aql::ExpressionContext*,
arangodb::transaction::Methods*,
arangodb::SmallVector<arangodb::aql::AqlValue> const&) {
@ -357,6 +395,7 @@ bool upgradeSingleServerArangoSearchView0_1(
void registerFilters(arangodb::aql::AqlFunctionFeature& functions) {
using arangodb::iresearch::addFunction;
auto flags =
arangodb::aql::Function::makeFlags(arangodb::aql::Function::Flags::Deterministic,
arangodb::aql::Function::Flags::Cacheable,
@ -365,9 +404,9 @@ void registerFilters(arangodb::aql::AqlFunctionFeature& functions) {
addFunction(functions, { "STARTS_WITH", ".,.|.", flags, &dummyFilterFunc }); // (attribute, prefix, scoring-limit)
addFunction(functions, { "PHRASE", ".,.|.+", flags, &dummyFilterFunc }); // (attribute, input [, offset, input... ] [, analyzer])
addFunction(functions, { "IN_RANGE", ".,.,.,.,.", flags, &dummyFilterFunc }); // (attribute, lower, upper, include lower, include upper)
addFunction(functions, {"MIN_MATCH", ".,.|.+", flags, &dummyFilterFunc}); // (filter expression [, filter expression, ... ], min match count)
addFunction(functions, {"BOOST", ".,.", flags, &dummyFilterFunc}); // (filter expression, boost)
addFunction(functions, {"ANALYZER", ".,.", flags, &dummyFilterFunc}); // (filter expression, analyzer)
addFunction(functions, { "MIN_MATCH", ".,.|.+", flags, &dummyMinMatchContextFunc }); // (filter expression [, filter expression, ... ], min match count)
addFunction(functions, { "BOOST", ".,.", flags, &dummyContextFunc }); // (filter expression, boost)
addFunction(functions, { "ANALYZER", ".,.", flags, &dummyContextFunc }); // (filter expression, analyzer)
}
void registerIndexFactory() {
@ -544,7 +583,9 @@ namespace arangodb {
namespace iresearch {
bool isFilter(arangodb::aql::Function const& func) noexcept {
return func.implementation == &dummyFilterFunc;
return func.implementation == &dummyFilterFunc ||
func.implementation == &dummyContextFunc ||
func.implementation == &dummyMinMatchContextFunc;
}
bool isScorer(arangodb::aql::Function const& func) noexcept {

View File

@ -493,11 +493,25 @@ TEST_F(IResearchFilterBooleanTest, UnaryNot) {
"LET c=41 FOR d IN collection FILTER not ANALYZER(TO_STRING(c+1) == "
"d['a']['b'][23]['c'], 'test_analyzer') RETURN d",
expected, &ctx);
}
// filter with constexpr analyzer
{
arangodb::aql::Variable var("c", 0);
arangodb::aql::AqlValue value(arangodb::aql::AqlValueHintInt{41});
arangodb::aql::AqlValueGuard guard(value, true);
assertFilterExecutionFail(
ExpressionContextMock ctx;
ctx.vars.emplace(var.name, value);
irs::Or expected;
expected.add<irs::Not>()
.filter<irs::And>()
.add<irs::by_term>()
.field(mangleStringIdentity("a.b[23].c"))
.term("42");
assertFilterSuccess(
"LET c=41 FOR d IN collection FILTER not (ANALYZER(TO_STRING(c+1), "
"'test_analyzer') == d['a']['b'][23]['c']) RETURN d",
&ctx);
expected, &ctx);
}
// dynamic complex attribute name

View File

@ -489,16 +489,30 @@ TEST_F(IResearchQueryMinMatchTest, test) {
ASSERT_TRUE(result.result.is(TRI_ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH));
}
// FIXME currently optimizer tries to evaluate MIN_MATCH function
// constexpr min match (true)
{
std::string const query =
"FOR d IN testView SEARCH MIN_MATCH(1==1, 2==2, 3==3, 2) "
"SORT d.seq "
"RETURN d";
auto queryResult = arangodb::tests::executeQuery(vocbase, query);
ASSERT_FALSE(queryResult.result.ok());
ASSERT_TRUE(queryResult.result.is(TRI_ERROR_NOT_IMPLEMENTED));
ASSERT_TRUE(queryResult.result.ok());
auto slice = queryResult.data->slice();
ASSERT_TRUE(slice.isArray());
ASSERT_EQ(insertedDocs.size(), slice.length());
}
// constexpr min match (false)
{
std::string const query =
"FOR d IN testView SEARCH MIN_MATCH(1==5, 2==6, 3==3, 2) "
"SORT d.seq "
"RETURN d";
auto queryResult = arangodb::tests::executeQuery(vocbase, query);
ASSERT_TRUE(queryResult.result.ok());
auto slice = queryResult.data->slice();
ASSERT_TRUE(slice.isArray());
ASSERT_EQ(0, slice.length());
}
// same as disjunction

View File

@ -674,14 +674,42 @@ TEST_F(IResearchQueryPhraseTest, SysVocbase) {
ASSERT_TRUE(result.result.is(TRI_ERROR_BAD_PARAMETER));
}
// FIXME currently optimizer tries to evaluate ANALYZER function
// constexpr ANALYZER function (true)
{
std::vector<arangodb::velocypack::Slice> expected = {
insertedDocs[7].slice(), insertedDocs[8].slice(),
insertedDocs[13].slice(), insertedDocs[19].slice(),
insertedDocs[22].slice(), insertedDocs[24].slice(),
insertedDocs[29].slice(),
};
auto result = arangodb::tests::executeQuery(
vocbase,
"FOR d IN testView SEARCH ANALYZER(1==1, 'test_analyzer') && ANALYZER(PHRASE(d.duplicated, 'z'), "
"'test_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d");
ASSERT_FALSE(result.result.ok());
ASSERT_TRUE(result.result.is(TRI_ERROR_NOT_IMPLEMENTED));
ASSERT_TRUE(result.result.ok());
auto slice = result.data->slice();
EXPECT_TRUE(slice.isArray());
size_t i = 0;
for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) {
auto const resolved = itr.value().resolveExternals();
EXPECT_TRUE((i < expected.size()));
EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++],
resolved, true)));
}
EXPECT_TRUE((i == expected.size()));
}
// constexpr ANALYZER function (false)
{
auto result = arangodb::tests::executeQuery(
vocbase,
"FOR d IN testView SEARCH ANALYZER(1==2, 'test_analyzer') && ANALYZER(PHRASE(d.duplicated, 'z'), "
"'test_analyzer') SORT BM25(d) ASC, TFIDF(d) DESC, d.seq RETURN d");
ASSERT_TRUE(result.result.ok());
auto slice = result.data->slice();
ASSERT_TRUE(slice.isArray());
ASSERT_EQ(0, slice.length());
}
// test custom analyzer

View File

@ -461,17 +461,27 @@ TEST_F(IResearchQueryScorerTest, test) {
ASSERT_FALSE(queryResult.result.ok());
ASSERT_TRUE(queryResult.result.is(TRI_ERROR_BAD_PARAMETER));
}
// FIXME currently optimizer tries to evaluate BOOST function
// constexpr BOOST (true)
{
std::string const query =
"FOR d IN testView SEARCH BOOST(1==1, 42) "
"LIMIT 1 "
"RETURN { d, score: BOOSTSCORER(d) }";
auto queryResult = arangodb::tests::executeQuery(vocbase, query);
ASSERT_FALSE(queryResult.result.ok());
ASSERT_TRUE(queryResult.result.is(TRI_ERROR_NOT_IMPLEMENTED));
ASSERT_TRUE(queryResult.result.ok());
ASSERT_TRUE(queryResult.data->slice().isArray());
ASSERT_EQ(1, queryResult.data->slice().length());
}
// constexpr BOOST (false)
{
std::string const query =
"FOR d IN testView SEARCH BOOST(1==2, 42) "
"LIMIT 1 "
"RETURN { d, score: BOOSTSCORER(d) }";
auto queryResult = arangodb::tests::executeQuery(vocbase, query);
ASSERT_TRUE(queryResult.result.ok());
ASSERT_TRUE(queryResult.data->slice().isArray());
ASSERT_EQ(0, queryResult.data->slice().length());
}
{

View File

@ -601,6 +601,30 @@
});
},
testViewInInnerLoopSortByAttributeWithNonDeterministic : function() {
var expected = [];
expected.push({ a: "bar", b: "foo", c: 1 });
expected.push({ a: "baz", b: "foo", c: 1 });
expected.push({ a: "foo", b: "bar", c: 0 });
expected.push({ a: "foo", b: "baz", c: 0 });
var result = db._query(
"FOR adoc IN AnotherUnitTestsCollection " +
"FOR doc IN UnitTestsView SEARCH RAND() != -10 && STARTS_WITH(doc['a'], adoc.a) && adoc.id == doc.c OPTIONS { waitForSync : true } " +
"SORT doc.c DESC, doc.a, doc.b " +
"RETURN doc"
).toArray();
assertEqual(result.length, expected.length);
var i = 0;
result.forEach(function(res) {
var doc = expected[i++];
assertEqual(doc.a, res.a);
assertEqual(doc.b, res.b);
assertEqual(doc.c, res.c);
});
},
testJoinTwoViewsSortByAttribute : function() {
var expected = [];
expected.push({ a: "bar", b: "foo", c: 1 });
@ -935,6 +959,33 @@
var result = db._query("FOR c IN [[[1, 3]]] FOR doc IN UnitTestsView SEARCH 1 NOT IN NOOPT(FLATTEN(c)) OPTIONS { waitForSync : true } RETURN doc").toArray();
assertEqual(result.length, 0);
},
testAnalyzerFunctionPrematureCall : function () {
assertEqual(
db._query("FOR d in UnitTestsView SEARCH ANALYZER(d.a IN TOKENS('#', 'text_en'), 'text_en') OPTIONS { waitForSync : true } RETURN d").toArray().length,
0);
assertEqual(
db._query("FOR d in UnitTestsView SEARCH ANALYZER(d.a NOT IN TOKENS('#', 'text_en'), 'text_en') OPTIONS { waitForSync : true } RETURN d").toArray().length,
28);
},
testBoostFunctionPrematureCall : function () {
assertEqual(
db._query("FOR d in UnitTestsView SEARCH BOOST(d.a IN TOKENS('#', 'text_en'), 2) OPTIONS { waitForSync : true } SORT BM25(d) RETURN d").toArray().length,
0);
assertEqual(
db._query("FOR d in UnitTestsView SEARCH BOOST(d.a NOT IN TOKENS('#', 'text_en'), 2) OPTIONS { waitForSync : true } SORT BM25(d) RETURN d").toArray().length,
28);
},
testMinMatchFunctionPrematureCall : function () {
assertEqual(
db._query("FOR d in UnitTestsView SEARCH MIN_MATCH(d.a IN TOKENS('#', 'text_en'), d.a IN TOKENS('#', 'text_de'), 1) OPTIONS { waitForSync : true } RETURN d").toArray().length,
0);
assertEqual(
db._query("FOR d in UnitTestsView SEARCH MIN_MATCH(false, true, true, 2) OPTIONS { waitForSync : true } RETURN d").toArray().length,
28);
assertEqual(
db._query("FOR d in UnitTestsView SEARCH MIN_MATCH(false, false, false, 0) OPTIONS { waitForSync : true } RETURN d").toArray().length,
28);
}
};
}

View File

@ -641,6 +641,30 @@ function iResearchAqlTestSuite () {
});
},
testViewInInnerLoopSortByAttributeWithNonDeterministic : function() {
var expected = [];
expected.push({ a: "bar", b: "foo", c: 1 });
expected.push({ a: "baz", b: "foo", c: 1 });
expected.push({ a: "foo", b: "bar", c: 0 });
expected.push({ a: "foo", b: "baz", c: 0 });
var result = db._query(
"FOR adoc IN AnotherUnitTestsCollection " +
"FOR doc IN UnitTestsView SEARCH RAND() != -10 && STARTS_WITH(doc['a'], adoc.a) && adoc.id == doc.c OPTIONS { waitForSync : true } " +
"SORT doc.c DESC, doc.a, doc.b " +
"RETURN doc"
).toArray();
assertEqual(result.length, expected.length);
var i = 0;
result.forEach(function(res) {
var doc = expected[i++];
assertEqual(doc.a, res.a);
assertEqual(doc.b, res.b);
assertEqual(doc.c, res.c);
});
},
testViewInInnerLoopSortByTFIDF_BM25_Attribute : function() {
var expected = [];
expected.push({ a: "baz", b: "foo", c: 1 });
@ -959,8 +983,34 @@ function iResearchAqlTestSuite () {
var result = db._query("FOR c IN [[[1, 3]]] FOR doc IN UnitTestsView SEARCH 1 NOT IN NOOPT(FLATTEN(c)) OPTIONS { waitForSync : true } RETURN doc").toArray();
assertEqual(result.length, 0);
},
testAnalyzerFunctionPrematureCall : function () {
assertEqual(
db._query("FOR d in UnitTestsView SEARCH ANALYZER(d.a IN TOKENS('#', 'text_en'), 'text_en') OPTIONS { waitForSync : true } RETURN d").toArray().length,
0);
assertEqual(
db._query("FOR d in UnitTestsView SEARCH ANALYZER(d.a NOT IN TOKENS('#', 'text_en'), 'text_en') OPTIONS { waitForSync : true } RETURN d").toArray().length,
28);
},
testBoostFunctionPrematureCall : function () {
assertEqual(
db._query("FOR d in UnitTestsView SEARCH BOOST(d.a IN TOKENS('#', 'text_en'), 2) OPTIONS { waitForSync : true } SORT BM25(d) RETURN d").toArray().length,
0);
assertEqual(
db._query("FOR d in UnitTestsView SEARCH BOOST(d.a NOT IN TOKENS('#', 'text_en'), 2) OPTIONS { waitForSync : true } SORT BM25(d) RETURN d").toArray().length,
28);
},
testMinMatchFunctionPrematureCall : function () {
assertEqual(
db._query("FOR d in UnitTestsView SEARCH MIN_MATCH(d.a IN TOKENS('#', 'text_en'), d.a IN TOKENS('#', 'text_de'), 1) OPTIONS { waitForSync : true } RETURN d").toArray().length,
0);
assertEqual(
db._query("FOR d in UnitTestsView SEARCH MIN_MATCH(false, true, true, 2) OPTIONS { waitForSync : true } RETURN d").toArray().length,
28);
assertEqual(
db._query("FOR d in UnitTestsView SEARCH MIN_MATCH(false, false, false, 0) OPTIONS { waitForSync : true } RETURN d").toArray().length,
28);
}
};
}