1
0
Fork 0
arangodb/arangod/IResearch/IResearchViewOptimizerRules...

604 lines
18 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2017 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Andrey Abramov
/// @author Vasiliy Nabatchikov
////////////////////////////////////////////////////////////////////////////////
#include "IResearchViewOptimizerRules.h"
#include "IResearchViewNode.h"
#include "IResearchFilterFactory.h"
#include "IResearchOrderFactory.h"
#include "AqlHelper.h"
#include "Aql/ExecutionNode.h"
#include "Aql/ExecutionPlan.h"
#include "Aql/ClusterNodes.h"
#include "Aql/Condition.h"
#include "Aql/SortNode.h"
#include "Aql/Optimizer.h"
#include "Aql/WalkerWorker.h"
#include "Cluster/ServerState.h"
using namespace arangodb::iresearch;
using namespace arangodb::aql;
using EN = arangodb::aql::ExecutionNode;
NS_LOCAL
std::vector<arangodb::iresearch::IResearchSort> buildSort(
ExecutionPlan const& plan,
arangodb::aql::Variable const& ref,
std::vector<std::pair<Variable const*, bool>> const& sorts,
std::unordered_map<VariableId, AstNode const*> const& vars,
bool scorersOnly
) {
std::vector<IResearchSort> entries;
QueryContext const ctx { nullptr, nullptr, nullptr, nullptr, &ref };
for (auto& sort : sorts) {
auto const* var = sort.first;
auto varId = var->id;
AstNode const* rootNode = nullptr;
auto it = vars.find(varId);
if (it != vars.end()) {
auto const* node = rootNode = it->second;
while (node && NODE_TYPE_ATTRIBUTE_ACCESS == node->type) {
node = node->getMember(0);
}
if (node && NODE_TYPE_REFERENCE == node->type) {
var = reinterpret_cast<Variable const*>(node->getData());
}
} else {
auto const* setter = plan.getVarSetBy(varId);
if (setter && EN::CALCULATION == setter->getType()) {
auto const* expr = static_cast<CalculationNode const*>(setter)->expression();
if (expr) {
rootNode = expr->node();
}
}
}
if (var && rootNode && (!scorersOnly || OrderFactory::scorer(nullptr, *rootNode, ctx))) {
entries.emplace_back(var, rootNode, sort.second);
}
}
return entries;
}
///////////////////////////////////////////////////////////////////////////////
/// @class IResearchViewConditionFinder
///////////////////////////////////////////////////////////////////////////////
class IResearchViewConditionFinder final
: public arangodb::aql::WalkerWorker<ExecutionNode> {
public:
IResearchViewConditionFinder(
ExecutionPlan* plan,
std::unordered_map<size_t, ExecutionNode*>* changes,
bool* hasEmptyResult) noexcept
: _plan(plan),
_changes(changes),
_hasEmptyResult(hasEmptyResult) {
}
virtual bool before(ExecutionNode*) override;
virtual bool enterSubquery(ExecutionNode*, ExecutionNode*) override {
return false;
}
private:
bool handleFilterCondition(
ExecutionNode* en,
Condition& condition
);
ExecutionPlan* _plan;
std::unordered_map<VariableId, AstNode const*> _variableDefinitions;
std::unordered_set<VariableId> _filters;
std::vector<std::pair<Variable const*, bool>> _sorts;
// note: this class will never free the contents of this map
std::unordered_map<size_t, ExecutionNode*>* _changes;
bool* _hasEmptyResult;
}; // IResearchViewConditionFinder
bool IResearchViewConditionFinder::before(ExecutionNode* en) {
switch (en->getType()) {
case EN::LIMIT:
// LIMIT invalidates the sort expression we already found
_sorts.clear();
_filters.clear();
break;
case EN::SINGLETON:
case EN::NORESULTS:
// in all these cases we better abort
return true;
case EN::FILTER: {
std::vector<Variable const*> invars(en->getVariablesUsedHere());
TRI_ASSERT(invars.size() == 1);
// register which variable is used in a FILTER
_filters.emplace(invars[0]->id);
break;
}
case EN::SORT: {
// register which variables are used in a SORT
if (_sorts.empty()) {
for (auto& it : static_cast<SortNode const*>(en)->elements()) {
_sorts.emplace_back(it.var, it.ascending);
TRI_IF_FAILURE("IResearchViewConditionFinder::sortNode") {
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
}
}
}
break;
}
case EN::CALCULATION: {
auto outvars = en->getVariablesSetHere();
TRI_ASSERT(outvars.size() == 1);
_variableDefinitions.emplace(
outvars[0]->id,
static_cast<CalculationNode const*>(en)->expression()->node());
TRI_IF_FAILURE("IResearchViewConditionFinder::variableDefinition") {
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
}
break;
}
case EN::ENUMERATE_IRESEARCH_VIEW: {
auto node = static_cast<IResearchViewNode const*>(en);
if (_changes->find(node->id()) != _changes->end()) {
// already optimized this node
break;
}
Condition filterCondition(_plan->getAst());
if (!handleFilterCondition(en, filterCondition)) {
break;
}
auto sortCondition = buildSort(
*_plan,
node->outVariable(),
_sorts,
_variableDefinitions,
true // node->isInInnerLoop() // build scorers only in case if we're inside a loop
);
if (filterCondition.isEmpty() && sortCondition.empty()) {
// no conditions left
break;
}
auto const canUseView = !filterCondition.root() || FilterFactory::filter(
nullptr,
{ nullptr, nullptr, nullptr, nullptr, &node->outVariable() },
*filterCondition.root()
);
if (canUseView) {
auto newNode = std::make_unique<arangodb::iresearch::IResearchViewNode>(
*_plan,
_plan->nextId(),
node->vocbase(),
node->view(),
node->outVariable(),
filterCondition.root(),
std::move(sortCondition)
);
TRI_IF_FAILURE("IResearchViewConditionFinder::insertViewNode") {
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
}
// We keep this node's change
_changes->emplace(node->id(), newNode.get());
newNode.release();
} else {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUERY_PARSE, "filter clause "
"not yet supported with view");
}
break;
}
default:
// in these cases we simply ignore the intermediate nodes, note
// that we have taken care of nodes that could throw exceptions
// above.
break;
}
return false;
}
bool IResearchViewConditionFinder::handleFilterCondition(
ExecutionNode* en,
Condition& condition) {
bool foundCondition = false;
for (auto& it : _variableDefinitions) {
if (_filters.find(it.first) != _filters.end()) {
// a variable used in a FILTER
AstNode* var = const_cast<AstNode*>(it.second);
if (!var->canThrow() && var->isDeterministic() && var->isSimple()) {
// replace all variables inside the FILTER condition with the
// expressions represented by the variables
var = it.second->clone(_plan->getAst());
auto func = [this](AstNode* node) -> AstNode* {
if (node->type == NODE_TYPE_REFERENCE) {
auto variable = static_cast<Variable*>(node->getData());
if (variable != nullptr) {
auto setter = _plan->getVarSetBy(variable->id);
if (setter != nullptr && setter->getType() == EN::CALCULATION) {
auto s = static_cast<CalculationNode*>(setter);
auto filterExpression = s->expression();
AstNode* inNode = filterExpression->nodeForModification();
if (!inNode->canThrow() && inNode->isDeterministic() &&
inNode->isSimple()) {
return inNode;
}
}
}
}
return node;
};
var = Ast::traverseAndModify(var, func);
}
condition.andCombine(var);
foundCondition = true;
}
}
// normalize the condition
condition.normalize(_plan);
TRI_IF_FAILURE("ConditionFinder::normalizePlan") {
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
}
bool const conditionIsImpossible = (foundCondition && condition.isEmpty());
if (conditionIsImpossible) {
// condition is always false
for (auto const& x : en->getParents()) {
auto noRes = new NoResultsNode(_plan, _plan->nextId());
_plan->registerNode(noRes);
_plan->insertDependency(x, noRes);
*_hasEmptyResult = true;
}
return false;
}
auto const& varsValid = en->getVarsValid();
// remove all invalid variables from the condition
if (condition.removeInvalidVariables(varsValid)) {
// removing left a previously non-empty OR block empty...
// this means we can't use the index to restrict the results
return false;
}
return true;
}
NS_END // NS_LOCAL
NS_BEGIN(arangodb)
NS_BEGIN(iresearch)
/// @brief move filters and sort conditions into views
void handleViewsRule(
arangodb::aql::Optimizer* opt,
std::unique_ptr<arangodb::aql::ExecutionPlan> plan,
arangodb::aql::OptimizerRule const* rule
) {
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
std::unordered_map<size_t, ExecutionNode*> changes;
auto cleanupChanges = [&changes](){
for (auto& v : changes) {
delete v.second;
}
};
TRI_DEFER(cleanupChanges());
// newly created view nodes (replacement)
std::unordered_set<ExecutionNode const*> createdViewNodes;
// try to find `EnumerateViewNode`s and push corresponding filters and sorts inside
plan->findEndNodes(nodes, true);
bool hasEmptyResult = false;
for (auto const& n : nodes) {
IResearchViewConditionFinder finder(plan.get(), &changes, &hasEmptyResult);
n->walk(finder);
}
createdViewNodes.reserve(changes.size());
for (auto& it : changes) {
auto*& node = it.second;
if (!node || ExecutionNode::ENUMERATE_IRESEARCH_VIEW != node->getType()) {
// filter out invalid nodes
continue;
}
plan->registerNode(node);
plan->replaceNode(plan->getNodeById(it.first), node);
// necessary here, because replaceNode will set "varUsageComputed" to false
// however, we want to keep the *original* variable definitions here (e.g.
// CalculationNodes create the sort and filter statements and not the
// EnumerateViewNode). If we recalculated the variable usage here, from now
// on the EnumerateViewNode would produce the sort and filter variables, and
// the below logic (that filters on the variable setters being CalculationNodes)
// would fail
plan->setVarUsageComputed();
createdViewNodes.insert(node);
// prevent double deletion by cleanupChanges()
node = nullptr;
}
if (!changes.empty()) {
std::unordered_set<ExecutionNode*> toUnlink;
// remove filters covered by a view
nodes.clear(); // ensure array is empty
plan->findNodesOfType(nodes, ExecutionNode::FILTER, true);
// `createdViewNodes` will not change
auto const noMatch = createdViewNodes.end();
for (auto* node : nodes) {
// find the node with the filter expression
auto inVar = static_cast<FilterNode const*>(node)->getVariablesUsedHere();
TRI_ASSERT(inVar.size() == 1);
auto setter = plan->getVarSetBy(inVar[0]->id);
if (!setter || setter->getType() != EN::CALCULATION) {
continue;
}
auto const it = createdViewNodes.find(setter->getLoop());
if (it != noMatch) {
toUnlink.emplace(node);
toUnlink.emplace(setter);
static_cast<CalculationNode*>(setter)->canRemoveIfThrows(true);
}
}
// FIXME remove all sorts in case if view doesn't located inside a loop,
// otherwise remove setters for covered sorts
// remove setters covered by a view internally
for (auto* node : createdViewNodes) {
auto& viewNode = static_cast<IResearchViewNode const&>(*node);
for (auto const& sort : viewNode.sortCondition()) {
auto const* var = sort.var;
if (!var) {
continue;
}
auto* setter = plan->getVarSetBy(var->id);
if (!setter || EN::CALCULATION != setter->getType()) {
continue;
}
toUnlink.emplace(setter);
static_cast<CalculationNode*>(setter)->canRemoveIfThrows(true);
}
}
// nodes.clear(); // ensure array is empty
// plan->findNodesOfType(nodes, ExecutionNode::SORT, true);
//
// for (auto* node : nodes) {
// // find the node with the sort expression
// auto inVar = static_cast<aql::SortNode const*>(node)->getVariablesUsedHere();
// TRI_ASSERT(!inVar.empty());
//
// for (auto& var : inVar) {
// auto setter = plan->getVarSetBy(var->id);
//
// if (!setter || setter->getType() != ExecutionNode::CALCULATION) {
// continue;
// }
//
// auto const it = createdViewNodes.find(setter->getLoop());
//
// if (it != noMatch) {
// if (!(*it)->isInInnerLoop()) {
// toUnlink.emplace(node);
// toUnlink.emplace(setter);
// }
////FIXME uncomment when EnumerateViewNode can create variables
//// toUnlink.emplace(setter);
//// if (!(*it)->isInInnerLoop()) {
//// toUnlink.emplace(node);
//// }
// static_cast<CalculationNode*>(setter)->canRemoveIfThrows(true);
// }
// }
// }
//
plan->unlinkNodes(toUnlink);
}
opt->addPlan(std::move(plan), rule, !changes.empty());
}
void scatterViewInClusterRule(
arangodb::aql::Optimizer* opt,
std::unique_ptr<arangodb::aql::ExecutionPlan> plan,
arangodb::aql::OptimizerRule const* rule
) {
TRI_ASSERT(arangodb::ServerState::instance()->isCoordinator());
bool wasModified = false;
SmallVector<ExecutionNode*>::allocator_type::arena_type a;
SmallVector<ExecutionNode*> nodes{a};
// find subqueries
std::unordered_map<ExecutionNode*, ExecutionNode*> subqueries;
plan->findNodesOfType(nodes, ExecutionNode::SUBQUERY, true);
for (auto& it : nodes) {
subqueries.emplace(
static_cast<SubqueryNode const*>(it)->getSubquery(), it
);
}
// we are a coordinator. now look in the plan for nodes of type
// EnumerateIResearchViewNode
nodes.clear();
plan->findNodesOfType(nodes, ExecutionNode::ENUMERATE_IRESEARCH_VIEW, true);
for (auto* node : nodes) {
TRI_ASSERT(node);
auto& viewNode = static_cast<IResearchViewNode&>(*node);
if (viewNode.collections().empty()) {
// FIXME we have to invalidate plan cache (if exists)
// in case if corresponding view has been modified
// view has no associated collection, nothing to scatter
continue;
}
auto const& parents = node->getParents();
auto const& deps = node->getDependencies();
TRI_ASSERT(deps.size() == 1);
// don't do this if we are already distributing!
if (deps[0]->getType() == ExecutionNode::REMOTE) {
auto const* firstDep = deps[0]->getFirstDependency();
if (!firstDep || firstDep->getType() == ExecutionNode::DISTRIBUTE) {
continue;
}
}
if (plan->shouldExcludeFromScatterGather(node)) {
continue;
}
auto& vocbase = viewNode.vocbase();
auto& view = viewNode.view();
bool const isRootNode = plan->isRoot(node);
plan->unlinkNode(node, true);
// insert a scatter node
auto scatterNode = plan->registerNode(
std::make_unique<IResearchViewScatterNode>(
*plan, plan->nextId(), vocbase, view
));
TRI_ASSERT(!deps.empty());
scatterNode->addDependency(deps[0]);
// insert a remote node
auto* remoteNode = plan->registerNode(
std::make_unique<RemoteNode>(
plan.get(),
plan->nextId(),
&vocbase,
"", "", ""
));
TRI_ASSERT(scatterNode);
remoteNode->addDependency(scatterNode);
node->addDependency(remoteNode); // re-link with the remote node
// insert another remote node
remoteNode = plan->registerNode(
std::make_unique<RemoteNode>(
plan.get(),
plan->nextId(),
&vocbase,
"", "", ""
));
TRI_ASSERT(node);
remoteNode->addDependency(node);
// insert a gather node
auto* gatherNode = plan->registerNode(
std::make_unique<GatherNode>(
plan.get(),
plan->nextId(),
&vocbase,
nullptr //FIXME collection
));
TRI_ASSERT(remoteNode);
gatherNode->addDependency(remoteNode);
// FIXME
// if (!elements.empty() && gatherNode->collection()->numberOfShards() > 1) {
// gatherNode->setElements(elements);
// }
// and now link the gather node with the rest of the plan
if (parents.size() == 1) {
parents[0]->replaceDependency(deps[0], gatherNode);
}
// check if the node that we modified was at the end of a subquery
auto it = subqueries.find(node);
if (it != subqueries.end()) {
auto* subQueryNode = static_cast<SubqueryNode*>((*it).second);
subQueryNode->setSubquery(gatherNode, true);
}
if (isRootNode) {
// if we replaced the root node, set a new root node
plan->root(gatherNode);
}
wasModified = true;
}
opt->addPlan(std::move(plan), rule, wasModified);
}
NS_END // iresearch
NS_END // arangodb
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------