//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2017 ArangoDB GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Andrey Abramov /// @author Vasiliy Nabatchikov //////////////////////////////////////////////////////////////////////////////// #include "IResearchViewOptimizerRules.h" #include "Aql/ClusterNodes.h" #include "Aql/Condition.h" #include "Aql/ExecutionNode.h" #include "Aql/ExecutionPlan.h" #include "Aql/Function.h" #include "Aql/IResearchViewNode.h" #include "Aql/Optimizer.h" #include "Aql/OptimizerRule.h" #include "Aql/Query.h" #include "Aql/SortCondition.h" #include "Aql/SortNode.h" #include "Aql/WalkerWorker.h" #include "Cluster/ServerState.h" #include "IResearch/AqlHelper.h" #include "IResearch/IResearchView.h" #include "IResearch/IResearchViewCoordinator.h" #include "IResearch/IResearchFilterFactory.h" #include "IResearch/IResearchOrderFactory.h" #include "Utils/CollectionNameResolver.h" #include "VocBase/LogicalCollection.h" #include "utils/misc.hpp" using namespace arangodb::iresearch; using namespace arangodb::aql; using EN = arangodb::aql::ExecutionNode; namespace { inline IResearchViewSort const& primarySort(arangodb::LogicalView const& view) { if (arangodb::ServerState::instance()->isCoordinator()) { auto& viewImpl = arangodb::LogicalView::cast(view); return viewImpl.primarySort(); } auto& viewImpl = arangodb::LogicalView::cast(view); return viewImpl.primarySort(); } bool addView(arangodb::LogicalView const& view, arangodb::aql::Query& query) { auto* collections = query.collections(); if (!collections) { return false; } // linked collections auto visitor = [&query](TRI_voc_cid_t cid) { query.addCollection(arangodb::basics::StringUtils::itoa(cid), arangodb::AccessMode::Type::READ); return true; }; return view.visitCollections(visitor); } bool optimizeSearchCondition(IResearchViewNode& viewNode, Query& query, ExecutionPlan& plan) { auto view = viewNode.view(); // add view and linked collections to the query if (!addView(*view, query)) { THROW_ARANGO_EXCEPTION_MESSAGE( TRI_ERROR_QUERY_PARSE, "failed to process all collections linked with the view '" + view->name() + "'"); } // build search condition Condition searchCondition(plan.getAst()); if (!viewNode.filterConditionIsEmpty()) { searchCondition.andCombine(&viewNode.filterCondition()); searchCondition.normalize(&plan, true); // normalize the condition if (searchCondition.isEmpty()) { // condition is always false for (auto const& x : viewNode.getParents()) { plan.insertDependency(x, plan.registerNode( std::make_unique(&plan, plan.nextId()))); } return false; } auto const& varsValid = viewNode.getVarsValid(); // remove all invalid variables from the condition if (searchCondition.removeInvalidVariables(varsValid)) { // removing left a previously non-empty OR block empty... // this means we can't use the index to restrict the results return false; } } // check filter condition if present if (searchCondition.root()) { auto filterCreated = FilterFactory::filter( nullptr, { query.trx(), nullptr, nullptr, nullptr, &viewNode.outVariable() }, *searchCondition.root() ); if (filterCreated.fail()) { THROW_ARANGO_EXCEPTION_MESSAGE(filterCreated.errorNumber(), "unsupported SEARCH condition: " + filterCreated.errorMessage()); } } if (!searchCondition.isEmpty()) { viewNode.filterCondition(searchCondition.root()); } return true; } bool optimizeSort(IResearchViewNode& viewNode, ExecutionPlan* plan) { TRI_ASSERT(viewNode.view()); auto& primarySort = ::primarySort(*viewNode.view()); if (primarySort.empty()) { // use system sort return false; } std::unordered_map variableDefinitions; ExecutionNode* current = static_cast(&viewNode); while (true) { current = current->getFirstParent(); if (current == nullptr) { // we are at the bottom end of the plan return false; } if (current->getType() == EN::ENUMERATE_IRESEARCH_VIEW || current->getType() == EN::ENUMERATE_COLLECTION || current->getType() == EN::TRAVERSAL || current->getType() == EN::SHORTEST_PATH || current->getType() == EN::K_SHORTEST_PATHS || current->getType() == EN::INDEX || current->getType() == EN::COLLECT) { // any of these node types will lead to more/less results in the output, // and may as well change the sort order, so let's better abort here return false; } if (current->getType() == EN::CALCULATION) { // pick up the meanings of variables as we walk the plan variableDefinitions.emplace( ExecutionNode::castTo(current)->outVariable()->id, ExecutionNode::castTo(current)->expression()->node()); } if (current->getType() != EN::SORT) { // from here on, we are only interested in sorts continue; } std::vector> sorts; auto* sortNode = ExecutionNode::castTo(current); auto const& sortElements = sortNode->elements(); sorts.reserve(sortElements.size()); for (auto& it : sortElements) { // note: in contrast to regular indexes, views support sorting in different // directions for multiple fields (e.g. SORT doc.a ASC, doc.b DESC). // this is not supported by indexes sorts.emplace_back(it.var, it.ascending); } SortCondition sortCondition(plan, sorts, std::vector>(), arangodb::HashSet>(), variableDefinitions); if (sortCondition.isEmpty() || !sortCondition.isOnlyAttributeAccess()) { // unusable sort condition return false; } // sort condition found, and sorting only by attributes! if (sortCondition.numAttributes() > primarySort.size()) { // the SORT condition in the query has more attributes than the view // is sorted by. we cannot optimize in this case return false; } // check if all sort conditions match for (size_t i = 0; i < sortElements.size(); ++i) { if (sortElements[i].ascending != primarySort.direction(i)) { // view is sorted in different order than requested in SORT condition return false; } } // all sort orders equal! // now finally check how many of the SORT conditions attributes we cover size_t numCovered = sortCondition.coveredAttributes(&viewNode.outVariable(), primarySort.fields()); if (numCovered < sortNode->elements().size()) { // the sort is not covered by the view return false; } // we are almost done... but we need to do a final check and verify that our // sort node itself is not followed by another node that injects more data into // the result or that re-sorts it while (current->hasParent()) { current = current->getFirstParent(); if (current->getType() == EN::ENUMERATE_IRESEARCH_VIEW || current->getType() == EN::ENUMERATE_COLLECTION || current->getType() == EN::TRAVERSAL || current->getType() == EN::SHORTEST_PATH || current->getType() == EN::K_SHORTEST_PATHS || current->getType() == EN::INDEX || current->getType() == EN::COLLECT || current->getType() == EN::SORT) { // any of these node types will lead to more/less results in the output, // and may as well change the sort order, so let's better abort here return false; } } assert(!primarySort.empty()); viewNode.sort(&primarySort, sortElements.size()); sortNode->_reinsertInCluster = false; if (!arangodb::ServerState::instance()->isCoordinator()) { // in cluster node will be unlinked later by 'distributeSortToClusterRule' plan->unlinkNode(sortNode); } return true; } } } // namespace namespace arangodb { namespace iresearch { /// @brief move filters and sort conditions into views void handleViewsRule(arangodb::aql::Optimizer* opt, std::unique_ptr plan, arangodb::aql::OptimizerRule const& rule) { TRI_ASSERT(plan && plan->getAst() && plan->getAst()->query()); // ensure 'Optimizer::addPlan' will be called bool modified = false; auto addPlan = irs::make_finally([opt, &plan, rule, &modified]() { opt->addPlan(std::move(plan), rule, modified); }); if (!plan->contains(EN::ENUMERATE_IRESEARCH_VIEW)) { // no view present in the query, so no need to do any expensive // transformations return; } SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; // replace scorers in all calculation nodes with references plan->findNodesOfType(nodes, EN::CALCULATION, true); ScorerReplacer scorerReplacer; for (auto* node : nodes) { TRI_ASSERT(node && EN::CALCULATION == node->getType()); scorerReplacer.replace(*EN::castTo(node)); } // register replaced scorers to be evaluated by corresponding view nodes nodes.clear(); plan->findNodesOfType(nodes, EN::ENUMERATE_IRESEARCH_VIEW, true); auto& query = *plan->getAst()->query(); std::vector scorers; for (auto* node : nodes) { TRI_ASSERT(node && EN::ENUMERATE_IRESEARCH_VIEW == node->getType()); auto& viewNode = *EN::castTo(node); if (!viewNode.isInInnerLoop()) { // check if we can optimize away a sort that follows the EnumerateView node // this is only possible if the view node itself is not contained in another loop modified = optimizeSort(viewNode, plan.get()); } if (!optimizeSearchCondition(viewNode, query, *plan)) { continue; } // find scorers that have to be evaluated by a view scorerReplacer.extract(viewNode.outVariable(), scorers); viewNode.scorers(std::move(scorers)); modified = true; } // ensure all replaced scorers are covered by corresponding view nodes scorerReplacer.visit([](Scorer const& scorer) -> bool { TRI_ASSERT(scorer.node); auto const funcName = iresearch::getFuncName(*scorer.node); THROW_ARANGO_EXCEPTION_FORMAT( TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH, "Non ArangoSearch view variable '%s' is used in scorer function '%s'", scorer.var->name.c_str(), funcName.c_str()); }); } void scatterViewInClusterRule(arangodb::aql::Optimizer* opt, std::unique_ptr plan, arangodb::aql::OptimizerRule const& rule) { TRI_ASSERT(arangodb::ServerState::instance()->isCoordinator()); bool wasModified = false; SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; // find subqueries std::unordered_map subqueries; plan->findNodesOfType(nodes, ExecutionNode::SUBQUERY, true); for (auto& it : nodes) { subqueries.emplace(EN::castTo(it)->getSubquery(), it); } // we are a coordinator. now look in the plan for nodes of type // EnumerateIResearchViewNode nodes.clear(); plan->findNodesOfType(nodes, ExecutionNode::ENUMERATE_IRESEARCH_VIEW, true); TRI_ASSERT(plan->getAst() && plan->getAst()->query() && plan->getAst()->query()->trx()); auto* resolver = plan->getAst()->query()->trx()->resolver(); TRI_ASSERT(resolver); for (auto* node : nodes) { TRI_ASSERT(node); auto& viewNode = *EN::castTo(node); auto& options = viewNode.options(); if (viewNode.empty() || (options.restrictSources && options.sources.empty())) { // FIXME we have to invalidate plan cache (if exists) // in case if corresponding view has been modified // nothing to scatter, view has no associated collections // or node is restricted to empty collection list continue; } auto const& parents = node->getParents(); // intentional copy of the dependencies, as we will be modifying // dependencies later on auto const deps = node->getDependencies(); TRI_ASSERT(deps.size() == 1); // don't do this if we are already distributing! if (deps[0]->getType() == ExecutionNode::REMOTE) { auto const* firstDep = deps[0]->getFirstDependency(); if (!firstDep || firstDep->getType() == ExecutionNode::DISTRIBUTE) { continue; } } if (plan->shouldExcludeFromScatterGather(node)) { continue; } auto& vocbase = viewNode.vocbase(); bool const isRootNode = plan->isRoot(node); plan->unlinkNode(node, true); // insert a scatter node auto scatterNode = plan->registerNode(std::make_unique(plan.get(), plan->nextId())); TRI_ASSERT(!deps.empty()); scatterNode->addDependency(deps[0]); // insert a remote node auto* remoteNode = plan->registerNode(std::make_unique(plan.get(), plan->nextId(), &vocbase, "", "", "")); TRI_ASSERT(scatterNode); remoteNode->addDependency(scatterNode); node->addDependency(remoteNode); // re-link with the remote node // insert another remote node remoteNode = plan->registerNode(std::make_unique(plan.get(), plan->nextId(), &vocbase, "", "", "")); TRI_ASSERT(node); remoteNode->addDependency(node); // so far we don't know the exact number of db servers where // this query will be distributed, mode will be adjusted // during query distribution phase by EngineInfoContainerDBServer auto const sortMode = GatherNode::SortMode::Default; // insert gather node auto* gatherNode = plan->registerNode( std::make_unique(plan.get(), plan->nextId(), sortMode)); TRI_ASSERT(remoteNode); gatherNode->addDependency(remoteNode); // and now link the gather node with the rest of the plan if (parents.size() == 1) { parents[0]->replaceDependency(deps[0], gatherNode); } // check if the node that we modified was at the end of a subquery auto it = subqueries.find(node); if (it != subqueries.end()) { auto* subQueryNode = EN::castTo((*it).second); subQueryNode->setSubquery(gatherNode, true); } if (isRootNode) { // if we replaced the root node, set a new root node plan->root(gatherNode); } wasModified = true; } opt->addPlan(std::move(plan), rule, wasModified); } } // namespace iresearch } // namespace arangodb // ----------------------------------------------------------------------------- // --SECTION-- END-OF-FILE // -----------------------------------------------------------------------------