1
0
Fork 0
arangodb/arangod/IResearch/IResearchOrderFactory.cpp

383 lines
11 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2017 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Andrey Abramov
/// @author Vasiliy Nabatchikov
////////////////////////////////////////////////////////////////////////////////
// otherwise define conflict between 3rdParty\date\include\date\date.h and 3rdParty\iresearch\core\shared.hpp
#if defined(_MSC_VER)
#include "date/date.h"
#undef NOEXCEPT
#endif
#include "Aql/Ast.h"
#include "Aql/AstNode.h"
#include "Aql/ExecutionNode.h"
#include "Aql/Expression.h"
#include "Aql/Function.h"
#include "Aql/SortCondition.h"
#include "Basics/fasthash.h"
#include "IResearch/IResearchFeature.h"
#include "IResearch/IResearchOrderFactory.h"
#include "IResearch/VelocyPackHelper.h"
#include <search/scorers.hpp>
// ----------------------------------------------------------------------------
// --SECTION-- OrderFactory dependencies
// ----------------------------------------------------------------------------
namespace {
arangodb::aql::AstNode const EMPTY_ARGS(arangodb::aql::NODE_TYPE_ARRAY);
// checks a specified args to be deterministic
// and retuns reference to a loop variable
arangodb::aql::Variable const* getScorerRef(arangodb::aql::AstNode const* args) noexcept {
if (!args || arangodb::aql::NODE_TYPE_ARRAY != args->type) {
return nullptr;
}
size_t const size = args->numMembers();
if (size < 1) {
return nullptr; // invalid args
}
// 1st argument has to be reference to `ref`
auto const* arg0 = args->getMemberUnchecked(0);
if (!arg0 || arangodb::aql::NODE_TYPE_REFERENCE != arg0->type) {
return nullptr;
}
for (size_t i = 1, size = args->numMembers(); i < size; ++i) {
auto const* arg = args->getMemberUnchecked(i);
if (!arg || !arg->isDeterministic()) {
// we don't support non-deterministic arguments for scorers
return nullptr;
}
}
return reinterpret_cast<arangodb::aql::Variable const*>(arg0->getData());
}
bool makeScorer(irs::sort::ptr& scorer, irs::string_ref const& name,
arangodb::aql::AstNode const& args,
arangodb::iresearch::QueryContext const& ctx) {
TRI_ASSERT(!args.numMembers() ||
arangodb::iresearch::findReference(*args.getMember(0), *ctx.ref));
switch (args.numMembers()) {
case 0:
break;
case 1: {
// ArangoDB, for API consistency, only supports scorers configurable via jSON
scorer = irs::scorers::get( // get scorer
name, irs::text_format::json, irs::string_ref::NIL, false // args
);
if (!scorer) {
// ArangoDB, for API consistency, only supports scorers configurable via jSON
scorer = irs::scorers::get(name, irs::text_format::json, "[]", false); // pass arg as json array
}
} break;
default: { // fall through
arangodb::velocypack::Builder builder;
arangodb::iresearch::ScopedAqlValue arg;
builder.openArray();
for (size_t i = 1, count = args.numMembers(); i < count; ++i) {
auto const* argNode = args.getMemberUnchecked(i);
if (!argNode) {
return false; // invalid arg
}
arg.reset(*argNode);
if (!arg.execute(ctx)) {
// failed to execute value
return false;
}
arg.toVelocyPack(builder);
}
builder.close();
// ArangoDB, for API consistency, only supports scorers configurable via jSON
scorer = irs::scorers::get( // get scorer
name, irs::text_format::json, builder.toJson(), false // pass arg as json
);
}
}
return bool(scorer);
}
bool fromFCall(irs::sort::ptr* scorer, irs::string_ref const& scorerName,
arangodb::aql::AstNode const* args,
arangodb::iresearch::QueryContext const& ctx) {
auto const* ref = getScorerRef(args);
if (ref != ctx.ref) {
// invalid arguments
return false;
}
if (!scorer) {
// cheap shallow check
// ArangoDB, for API consistency, only supports scorers configurable via jSON
return irs::scorers::exists(scorerName, irs::text_format::json, false);
}
// we don't support non-constant arguments for scorers now, if it
// will change ensure that proper `ExpressionContext` set in `ctx`
return makeScorer(*scorer, scorerName, *args, ctx);
}
bool nameFromFCall(std::string& scorerName, arangodb::aql::AstNode const& node) {
TRI_ASSERT(arangodb::aql::NODE_TYPE_FCALL == node.type);
auto* fn = static_cast<arangodb::aql::Function*>(node.getData());
if (!fn || 1 != node.numMembers() || !arangodb::iresearch::isScorer(*fn)) {
return false; // no function
}
scorerName = fn->name;
// convert name to lower case
std::transform(scorerName.begin(), scorerName.end(), scorerName.begin(), ::tolower);
return true;
}
bool fromFCall(irs::sort::ptr* scorer, arangodb::aql::AstNode const& node,
arangodb::iresearch::QueryContext const& ctx) {
std::string scorerName;
if (!nameFromFCall(scorerName, node)) {
return false;
}
return fromFCall(scorer, scorerName, node.getMemberUnchecked(0), ctx);
}
bool nameFromFCallUser(irs::string_ref& scorerName, arangodb::aql::AstNode const& node) {
TRI_ASSERT(arangodb::aql::NODE_TYPE_FCALL_USER == node.type);
if (arangodb::aql::VALUE_TYPE_STRING != node.value.type || 1 != node.numMembers()) {
return false; // no function name
}
return arangodb::iresearch::parseValue(scorerName, node);
}
bool fromFCallUser(irs::sort::ptr* scorer, arangodb::aql::AstNode const& node,
arangodb::iresearch::QueryContext const& ctx) {
irs::string_ref scorerName;
if (!nameFromFCallUser(scorerName, node)) {
return false;
}
return fromFCall(scorer, scorerName, node.getMemberUnchecked(0), ctx);
}
arangodb::aql::Variable const* refFromScorer(arangodb::aql::AstNode const& node) {
if (arangodb::aql::NODE_TYPE_FCALL != node.type && arangodb::aql::NODE_TYPE_FCALL_USER != node.type) {
return nullptr;
}
auto* ref = getScorerRef(node.getMember(0));
if (!ref) {
// invalid arguments or reference
return nullptr;
}
arangodb::iresearch::QueryContext const ctx{nullptr, nullptr, nullptr, nullptr, ref};
if (!arangodb::iresearch::OrderFactory::scorer(nullptr, node, ctx)) {
// not a scorer function
return nullptr;
}
return ref;
}
////////////////////////////////////////////////////////////////////////////////
/// @return true if a specified node contains at least one scorer
////////////////////////////////////////////////////////////////////////////////
bool hasScorer(arangodb::aql::AstNode const& root) {
auto visitor = [](arangodb::aql::AstNode const& node) {
return nullptr == refFromScorer(node);
};
return !arangodb::iresearch::visit<true>(root, visitor);
}
} // namespace
namespace arangodb {
namespace iresearch {
// ----------------------------------------------------------------------------
// --SECTION-- ScorerReplacer implementation
// ----------------------------------------------------------------------------
void ScorerReplacer::replace(aql::CalculationNode& node) {
if (!node.expression()) {
return;
}
auto& expr = *node.expression();
auto* ast = expr.ast();
if (!expr.ast()) {
// ast is not set
return;
}
auto* exprNode = expr.nodeForModification();
if (!exprNode) {
// node is not set
return;
}
auto replaceScorers = [this, ast](aql::AstNode* node) -> aql::AstNode* {
TRI_ASSERT(node); // ensured by 'Ast::traverseAndModify(...)'
auto* ref = refFromScorer(*node);
if (!ref) {
// not a scorer
return node;
}
HashedScorer const key(ref, node);
auto it = _dedup.find(key);
if (it == _dedup.end()) {
// create variable
auto* var = ast->variables()->createTemporaryVariable();
it = _dedup.try_emplace(key, var).first;
}
return ast->createNodeReference(it->second);
};
// Try to modify root node of the expression
auto newNode = replaceScorers(exprNode);
if (exprNode != newNode) {
// simple expression, e.g LET x = BM25(d)
expr.replaceNode(newNode);
} else if (hasScorer(*exprNode)) {
auto* exprClone = exprNode->clone(ast);
aql::Ast::traverseAndModify(exprClone, replaceScorers);
expr.replaceNode(exprClone);
}
}
void ScorerReplacer::extract(aql::Variable const& var, std::vector<Scorer>& scorers) {
for (auto it = _dedup.begin(), end = _dedup.end(); it != end;) {
if (it->first.var == &var) {
scorers.emplace_back(it->second, it->first.node);
it = _dedup.erase(it);
} else {
++it;
}
}
}
// ----------------------------------------------------------------------------
// --SECTION-- OrderFactory implementation
// ----------------------------------------------------------------------------
/*static*/ bool OrderFactory::scorer(irs::sort::ptr* scorer,
arangodb::aql::AstNode const& node,
arangodb::iresearch::QueryContext const& ctx) {
switch (node.type) {
case arangodb::aql::NODE_TYPE_FCALL: // function call
return fromFCall(scorer, node, ctx);
case arangodb::aql::NODE_TYPE_FCALL_USER: // user function call
return fromFCallUser(scorer, node, ctx);
default:
// IResearch does not support any
// expressions except function calls
return false;
}
}
/*static*/ bool OrderFactory::comparer(irs::sort::ptr* comparer,
arangodb::aql::AstNode const& node) {
std::string buf;
irs::string_ref scorerName;
switch (node.type) {
case arangodb::aql::NODE_TYPE_FCALL: { // function call
if (!nameFromFCall(buf, node)) {
return false;
}
scorerName = buf;
} break;
case arangodb::aql::NODE_TYPE_FCALL_USER: { // user function call
if (!nameFromFCallUser(scorerName, node)) {
return false;
}
} break;
default:
// IResearch does not support any
// expressions except function calls
return false;
}
if (!comparer) {
// cheap shallow check
// ArangoDB, for API consistency, only supports scorers configurable via jSON
return irs::scorers::exists(scorerName, irs::text_format::json, false);
}
// create scorer with default arguments
// ArangoDB, for API consistency, only supports scorers configurable via jSON
*comparer = irs::scorers::get( // get scorer
scorerName, irs::text_format::json, irs::string_ref::NIL, false // args
);
return bool(*comparer);
}
} // namespace iresearch
} // namespace arangodb
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------