1
0
Fork 0
arangodb/arangod/Aql/OptimizerRulesReplaceFuncti...

469 lines
16 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Christoph Uhde
////////////////////////////////////////////////////////////////////////////////
#include "OptimizerRules.h"
#include "Aql/Condition.h"
#include "Aql/ExecutionNode.h"
#include "Aql/ExecutionPlan.h"
#include "Aql/Function.h"
#include "Aql/IndexNode.h"
#include "Aql/Optimizer.h"
#include "Aql/Query.h"
#include "Aql/SortNode.h"
#include "Aql/Variable.h"
#include "Aql/types.h"
#include "Basics/AttributeNameParser.h"
#include "Basics/SmallVector.h"
#include "Basics/StaticStrings.h"
#include "Basics/StringBuffer.h"
#include "Indexes/Index.h"
#include "VocBase/Methods/Collections.h"
using namespace arangodb;
using namespace arangodb::aql;
using EN = arangodb::aql::ExecutionNode;
namespace {
//NEAR(coll, 0 /*lat*/, 0 /*lon*/[, 10 /*limit*/])
struct NearOrWithinParams{
std::string collection;
AstNode* latitude = nullptr;
AstNode* longitude = nullptr;
AstNode* limit = nullptr;
AstNode* radius = nullptr;
AstNode* distanceName = nullptr;
NearOrWithinParams(AstNode const* node, bool isNear){
TRI_ASSERT(node->type == AstNodeType::NODE_TYPE_FCALL);
AstNode* arr = node->getMember(0);
TRI_ASSERT(arr->type == AstNodeType::NODE_TYPE_ARRAY);
if (arr->getMember(0)->isStringValue()){
collection = arr->getMember(0)->getString();
// otherwise the "" collection will not be found
}
latitude = arr->getMember(1);
longitude = arr->getMember(2);
if(arr->numMembers() > 4){
distanceName = arr->getMember(4);
}
if(isNear){
limit = arr->getMember(3);
} else {
radius = arr->getMember(3);
}
}
};
//FULLTEXT(collection, "attribute", "search", 100 /*limit*/[, "distance name"])
struct FulltextParams{
std::string collection;
std::string attribute;
AstNode* limit = nullptr;
FulltextParams(AstNode const* node){
TRI_ASSERT(node->type == AstNodeType::NODE_TYPE_FCALL);
AstNode* arr = node->getMember(0);
TRI_ASSERT(arr->type == AstNodeType::NODE_TYPE_ARRAY);
if (arr->getMember(0)->isStringValue()){
collection = arr->getMember(0)->getString();
}
if (arr->getMember(1)->isStringValue()){
attribute = arr->getMember(1)->getString();
}
if(arr->numMembers() > 3){
limit = arr->getMember(3);
}
}
};
AstNode* getAstNode(CalculationNode* c){
return c->expression()->nodeForModification();
}
Function* getFunction(AstNode const* ast){
if (ast->type == AstNodeType::NODE_TYPE_FCALL){
return static_cast<Function*>(ast->getData());
}
return nullptr;
}
AstNode* createSubqueryWithLimit(
ExecutionPlan* plan,
ExecutionNode* node,
ExecutionNode* first,
ExecutionNode* last,
Variable* lastOutVariable,
AstNode* limit
){
// Creates a subquery of the following form:
//
// singleton
// |
// first
// |
// ...
// |
// last
// |
// [limit]
// |
// return
//
// The subquery is then injected into the plan before the given `node`
// This function returns an `AstNode*` of type reference to the subquery's
// `outVariable` that can be used to replace the expression (or only a
// part) of a `CalculationNode`.
//
if(limit && !(limit->isIntValue() || limit->isNullValue())) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH,"limit parameter is for wrong type");
}
auto* ast = plan->getAst();
/// singleton
ExecutionNode* eSingleton = plan->registerNode(
new SingletonNode(plan, plan->nextId())
);
/// return
ExecutionNode* eReturn = plan->registerNode(
// link output of index with the return node
new ReturnNode(plan, plan->nextId(), lastOutVariable)
);
/// link nodes together
first->addDependency(eSingleton);
eReturn->addDependency(last);
/// add optional limit node
if(limit && !limit->isNullValue()) {
ExecutionNode* eLimit = plan->registerNode(
new LimitNode(plan, plan->nextId(), 0 /*offset*/, limit->getIntValue())
);
plan->insertAfter(last, eLimit); // inject into plan
}
/// create subquery
Variable* subqueryOutVariable = ast->variables()->createTemporaryVariable();
ExecutionNode* eSubquery = plan->registerSubquery(
new SubqueryNode(plan, plan->nextId(), eReturn, subqueryOutVariable)
);
plan->insertBefore(node, eSubquery);
// return reference to outVariable
return ast->createNodeReference(subqueryOutVariable);
}
std::pair<AstNode*, AstNode*> getAttributeAccessFromIndex(Ast* ast, AstNode* docRef, NearOrWithinParams& params){
auto* trx = ast->query()->trx();
AstNode* accessNodeLat = docRef;
AstNode* accessNodeLon = docRef;
bool indexFound = false;
// figure out index to use
std::vector<basics::AttributeName> field;
auto indexes = trx->indexesForCollection(params.collection);
for(auto& idx : indexes){
if(Index::isGeoIndex(idx->type())) {
// we take the first index that is found
bool isGeo1 = idx->type() == Index::IndexType::TRI_IDX_TYPE_GEO1_INDEX;
bool isGeo2 = idx->type() == Index::IndexType::TRI_IDX_TYPE_GEO2_INDEX;
bool isGeo = idx->type() == Index::IndexType::TRI_IDX_TYPE_GEO_INDEX;
auto fieldNum = idx->fields().size();
if ((isGeo2 || isGeo) && fieldNum == 2) { // individual fields
auto accessLatitude = idx->fields()[0];
auto accessLongitude = idx->fields()[1];
accessNodeLat = ast->createNodeAttributeAccess(accessNodeLat, accessLatitude);
accessNodeLon = ast->createNodeAttributeAccess(accessNodeLon, accessLongitude);
indexFound = true;
} else if ((isGeo1 || isGeo) && fieldNum == 1) {
auto accessBase = idx->fields()[0];
AstNode * base = ast->createNodeAttributeAccess(accessNodeLon, accessBase);
VPackBuilder builder;
idx->toVelocyPack(builder,true,false);
bool geoJson = basics::VelocyPackHelper::getBooleanValue(builder.slice(), "geoJson", false);
accessNodeLat = ast->createNodeIndexedAccess(base, ast->createNodeValueInt(geoJson ? 1 : 0));
accessNodeLon = ast->createNodeIndexedAccess(base, ast->createNodeValueInt(geoJson ? 0 : 1));
indexFound = true;
}
break;
}
} // for index in collection
if(!indexFound) {
THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_GEO_INDEX_MISSING);
}
return std::pair<AstNode*, AstNode*>(accessNodeLat, accessNodeLon);
}
AstNode* replaceNearOrWithin(AstNode* funAstNode, ExecutionNode* calcNode, ExecutionPlan* plan, bool isNear){
auto* ast = plan->getAst();
auto* query = ast->query();
NearOrWithinParams params(funAstNode,isNear);
// RETURN (
// FOR d IN col
// SORT DISTANCE(d.lat, d.long, param.lat, param.lon) // NEAR
// // FILTER DISTANCE(d.lat, d.long, param.lat, param.lon) < param.radius //WHITHIN
// MERGE(d, { param.distname : DISTANCE(d.lat, d.long, param.lat, param.lon)})
// LIMIT param.limit // NEAR
// RETURN d MERGE {param.distname : calculated_distance}
// )
//// enumerate collection
auto* aqlCollection = aql::addCollectionToQuery(query, params.collection, false);
if(!aqlCollection) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH,"collection used in NEAR or WITHIN not found");
}
Variable* enumerateOutVariable = ast->variables()->createTemporaryVariable();
ExecutionNode* eEnumerate = plan->registerNode(
// link output of index with the return node
new EnumerateCollectionNode(
plan, plan->nextId(), aqlCollection,
enumerateOutVariable, false
)
);
//// build sort condition - DISTANCE(d.lat, d.long, param.lat, param.lon)
auto* docRef = ast->createNodeReference(enumerateOutVariable);
AstNode *accessNodeLat, *accessNodeLon;
std::tie(accessNodeLat, accessNodeLon) = getAttributeAccessFromIndex(ast, docRef, params);
auto* argsArray = ast->createNodeArray();
argsArray->addMember(accessNodeLat);
argsArray->addMember(accessNodeLon);
argsArray->addMember(params.latitude);
argsArray->addMember(params.longitude);
auto* funDist = ast->createNodeFunctionCall(TRI_CHAR_LENGTH_PAIR("DISTANCE"), argsArray);
AstNode* expressionAst = funDist;
//// build filter condition for
if(!isNear){
if(!params.radius->isNumericValue()){
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH,"radius argument is not a numeric value");
}
expressionAst = ast->createNodeBinaryOperator(
AstNodeType::NODE_TYPE_OPERATOR_BINARY_LE, funDist ,params.radius
);
}
//// create calculation node used in SORT or FILTER
//Calculation Node will acquire ownership
Expression* calcExpr = new Expression(plan, ast, expressionAst);
// put condition into calculation node
Variable* calcOutVariable = ast->variables()->createTemporaryVariable();
ExecutionNode* eCalc = plan->registerNode(
new CalculationNode(plan, plan->nextId(), calcExpr, nullptr, calcOutVariable)
);
eCalc->addDependency(eEnumerate);
//// create SORT of FILTER
ExecutionNode* eSortOrFilter = nullptr;
if(isNear){
// use calculation node in sort node
SortElementVector sortElements { SortElement{ calcOutVariable, /*asc*/ true} };
eSortOrFilter = plan->registerNode(
new SortNode(plan, plan->nextId(), sortElements, false)
);
} else {
eSortOrFilter = plan->registerNode(
new FilterNode(plan, plan->nextId(), calcOutVariable)
);
}
eSortOrFilter->addDependency(eCalc);
//// create MERGE(d, { param.distname : DISTANCE(d.lat, d.long, param.lat, param.lon)})
if(params.distanceName) { //return without merging the distance into the result
if(!params.distanceName->isStringValue()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH,"distance argument is not a string");
}
AstNode* elem = nullptr;
AstNode* funDistMerge = nullptr;
if(isNear){
funDistMerge = ast->createNodeReference(calcOutVariable);
} else {
//NOTE - recycling the Ast seems to work - tested with ASAN
funDistMerge = funDist;
}
if(params.distanceName->isConstant()){
elem = ast->createNodeObjectElement(params.distanceName->getStringValue()
,params.distanceName->getStringLength()
,funDistMerge);
} else {
elem = ast->createNodeCalculatedObjectElement(params.distanceName, funDistMerge );
}
auto* obj = ast->createNodeObject();
obj->addMember(elem);
auto* argsArrayMerge = ast->createNodeArray();
argsArrayMerge->addMember(docRef);
argsArrayMerge->addMember(obj);
auto* funMerge = ast->createNodeFunctionCall(TRI_CHAR_LENGTH_PAIR("MERGE"), argsArrayMerge);
Variable* calcMergeOutVariable = ast->variables()->createTemporaryVariable();
Expression* calcMergeExpr = new Expression(plan, ast, funMerge);
ExecutionNode* eCalcMerge = plan->registerNode(
new CalculationNode(plan, plan->nextId(), calcMergeExpr, nullptr, calcMergeOutVariable)
);
plan->insertAfter(eSortOrFilter, eCalcMerge);
//// wrap plan part into subquery
return createSubqueryWithLimit(plan, calcNode, eEnumerate, eCalcMerge, calcMergeOutVariable, params.limit);
}
//// wrap plan part into subquery
return createSubqueryWithLimit(plan, calcNode,
eEnumerate /* first */, eSortOrFilter /* last */,
enumerateOutVariable, params.limit);
}
AstNode* replaceFullText(AstNode* funAstNode, ExecutionNode* calcNode, ExecutionPlan* plan){
auto* ast = plan->getAst();
auto* query = ast->query();
auto* trx = query->trx();
FulltextParams params(funAstNode); // must be NODE_TYPE_FCALL
/// index
// we create this first as creation of this node is more
// likely to fail than the creation of other nodes
// index - part 1 - figure out index to use
std::shared_ptr<arangodb::Index> index = nullptr;
std::vector<basics::AttributeName> field;
TRI_ParseAttributeString(params.attribute, field, false);
auto indexes = trx->indexesForCollection(params.collection);
for(auto& idx : indexes){
if(idx->type() == arangodb::Index::IndexType::TRI_IDX_TYPE_FULLTEXT_INDEX) {
if(basics::AttributeName::isIdentical(idx->fields()[0], field, false /*ignore expansion in last?!*/)) {
index = idx;
break;
}
}
}
if(!index){ // not found or error
THROW_ARANGO_EXCEPTION(TRI_ERROR_QUERY_FULLTEXT_INDEX_MISSING);
}
// index part 2 - get remaining vars required for index creation
auto* aqlCollection = aql::addCollectionToQuery(query, params.collection, false);
if(!aqlCollection) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH,"collection used in FULLTEXT not found");
}
auto condition = std::make_unique<Condition>(ast);
condition->andCombine(funAstNode);
condition->normalize(plan);
// create a fresh out variable
Variable* indexOutVariable = ast->variables()->createTemporaryVariable();
ExecutionNode* eIndex = plan->registerNode(
new IndexNode(
plan,
plan->nextId(),
aqlCollection,
indexOutVariable,
std::vector<transaction::Methods::IndexHandle> {
transaction::Methods::IndexHandle{index}
},
std::move(condition),
IndexIteratorOptions()
)
);
//// wrap plan part into subquery
return createSubqueryWithLimit(plan, calcNode, eIndex, eIndex, indexOutVariable, params.limit);
}
} // namespace
//! @brief replace legacy JS Functions with pure AQL
void arangodb::aql::replaceNearWithinFulltext(Optimizer* opt
,std::unique_ptr<ExecutionPlan> plan
,OptimizerRule const* rule){
bool modified = false;
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
plan->findNodesOfType(nodes, ExecutionNode::CALCULATION, true);
for(auto const& node : nodes){
auto visitor = [&modified, &node, &plan](AstNode* astnode){
auto* fun = getFunction(astnode); // if fun != nullptr -> astnode->type NODE_TYPE_FCALL
AstNode* replacement = nullptr;
if(fun){
if (fun->name == "NEAR"){
replacement = replaceNearOrWithin(astnode, node, plan.get(), true /*isNear*/);
TRI_ASSERT(replacement);
} else if (fun->name == "WITHIN"){
replacement = replaceNearOrWithin(astnode, node, plan.get(), false /*isNear*/);
TRI_ASSERT(replacement);
} else if (fun->name == "FULLTEXT"){
replacement = replaceFullText(astnode, node,plan.get());
TRI_ASSERT(replacement);
}
}
if (replacement) {
modified = true;
return replacement;
}
return astnode;
};
CalculationNode* calc = static_cast<CalculationNode*>(node);
auto* original = getAstNode(calc);
auto* replacement = Ast::traverseAndModify(original,visitor);
// replace root node if it was modified
// TraverseAndModify has no access to roots parent
if (replacement != original) {
calc->expression()->replaceNode(replacement);
}
}
opt->addPlan(std::move(plan), rule, modified);
}; // replaceJSFunctions