//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2019 ArangoDB GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Yuriy Popov //////////////////////////////////////////////////////////////////////////////// #include "Aql/Ast.h" #include "Aql/Collection.h" #include "Aql/Condition.h" #include "Aql/Expression.h" #include "Aql/IndexNode.h" #include "Aql/Optimizer.h" #include "IndexNodeOptimizerRules.h" #include "Basics/AttributeNameParser.h" #include "Cluster/ServerState.h" using EN = arangodb::aql::ExecutionNode; namespace { struct NodeWithAttrs { struct AttributeAndField { std::vector attr; arangodb::aql::AstNode* astNode; size_t astNodeChildNum; size_t indexFieldNum; std::vector const* indexField; }; std::vector attrs; arangodb::aql::CalculationNode* node; }; bool attributesMatch(TRI_idx_iid_t& commonIndexId, arangodb::aql::IndexNode const* indexNode, NodeWithAttrs& node) { // check all node attributes to be in index for (auto& nodeAttr : node.attrs) { for (auto& index : indexNode->getIndexes()) { if (!index->hasCoveringIterator()) { continue; } auto indexId = index->id(); // use one index only if (commonIndexId != 0 && commonIndexId != indexId) { continue; } size_t indexFieldNum = 0; for (auto const& field : index->fields()) { if (arangodb::basics::AttributeName::isIdentical(nodeAttr.attr, field, false)) { if (commonIndexId == 0) { commonIndexId = indexId; } nodeAttr.indexFieldNum = indexFieldNum; nodeAttr.indexField = &field; break; } ++indexFieldNum; } if (nodeAttr.indexField != nullptr) { break; } } // not found if (nodeAttr.indexField == nullptr) { return false; } } return true; } // traverse the AST, using previsitor void traverseReadOnly(arangodb::aql::AstNode* node, arangodb::aql::AstNode* parentNode, size_t childNumber, std::function const& preVisitor) { if (node == nullptr) { return; } if (!preVisitor(node, parentNode, childNumber)) { return; } size_t const n = node->numMembers(); for (size_t i = 0; i < n; ++i) { auto member = node->getMemberUnchecked(i); if (member != nullptr) { traverseReadOnly(member, node, i, preVisitor); } } } // traversal state struct TraversalState { arangodb::aql::Variable const* variable; NodeWithAttrs& nodeAttrs; bool optimize; bool wasAccess; }; // determines attributes referenced in an expression for the specified out variable bool getReferencedAttributes(arangodb::aql::AstNode* node, arangodb::aql::Variable const* variable, NodeWithAttrs& nodeAttrs) { TraversalState state{variable, nodeAttrs, true, false}; auto preVisitor = [&state](arangodb::aql::AstNode const* node, arangodb::aql::AstNode* parentNode, size_t childNumber) { if (node == nullptr) { return false; } switch (node->type) { case arangodb::aql::NODE_TYPE_ATTRIBUTE_ACCESS: if (!state.wasAccess) { state.nodeAttrs.attrs.emplace_back( NodeWithAttrs::AttributeAndField{std::vector{ {std::string(node->getStringValue(), node->getStringLength()), false}}, parentNode, childNumber, 0, nullptr}); state.wasAccess = true; } else { state.nodeAttrs.attrs.back().attr.emplace_back(std::string(node->getStringValue(), node->getStringLength()), false); } return true; case arangodb::aql::NODE_TYPE_REFERENCE: { // reference to a variable auto v = static_cast(node->getData()); if (v == state.variable) { if (!state.wasAccess) { // we haven't seen an attribute access directly before state.optimize = false; return false; } std::reverse(state.nodeAttrs.attrs.back().attr.begin(), state.nodeAttrs.attrs.back().attr.end()); } else { if (state.wasAccess) { state.nodeAttrs.attrs.pop_back(); } } // finish an attribute path state.wasAccess = false; return true; } default: break; } if (state.wasAccess) { // not appropriate node type state.wasAccess = false; state.optimize = false; return false; } return true; }; traverseReadOnly(node, nullptr, 0, preVisitor); return state.optimize; } } void arangodb::aql::lateDocumentMaterializationRule(arangodb::aql::Optimizer* opt, std::unique_ptr plan, arangodb::aql::OptimizerRule const& rule) { auto modified = false; auto addPlan = arangodb::scopeGuard([opt, &plan, &rule, &modified]() { opt->addPlan(std::move(plan), rule, modified); }); // index node supports late materialization if (!plan->contains(EN::INDEX) || // we need sort node to be present (without sort it will be just skip, nothing to optimize) !plan->contains(EN::SORT) || // limit node is needed as without limit all documents will be returned anyway, nothing to optimize !plan->contains(EN::LIMIT)) { return; } ::arangodb::containers::SmallVector::allocator_type::arena_type a; ::arangodb::containers::SmallVector nodes{a}; plan->findNodesOfType(nodes, EN::LIMIT, true); for (auto limitNode : nodes) { auto loop = const_cast(limitNode->getLoop()); if (arangodb::aql::ExecutionNode::INDEX == loop->getType()) { auto indexNode = EN::castTo(loop); if (indexNode->isLateMaterialized()) { continue; // loop is aleady optimized } auto current = limitNode->getFirstDependency(); ExecutionNode* sortNode = nullptr; // examining plan. We are looking for SortNode closest to lowest LimitNode // without document body usage before that node. // this node could be appended with materializer bool stopSearch = false; std::vector nodesToChange; TRI_idx_iid_t commonIndexId = 0; // use one index only while (current != loop) { switch (current->getType()) { case arangodb::aql::ExecutionNode::SORT: if (sortNode == nullptr) { // we need nearest to limit sort node, so keep selected if any sortNode = current; } break; case arangodb::aql::ExecutionNode::CALCULATION: { auto calculationNode = EN::castTo(current); auto astNode = calculationNode->expression()->nodeForModification(); NodeWithAttrs node; node.node = calculationNode; // find attributes referenced to index node out variable if (!getReferencedAttributes(astNode, indexNode->outVariable(), node)) { // is not safe for optimization stopSearch = true; } else if (!node.attrs.empty()) { if (!attributesMatch(commonIndexId, indexNode, node)) { // the node uses attributes which is not in index stopSearch = true; } else { nodesToChange.emplace_back(node); } } break; } case arangodb::aql::ExecutionNode::REMOTE: // REMOTE node is a blocker - we do not want to make materialization calls across cluster! if (sortNode != nullptr) { stopSearch = true; } break; default: // make clang happy break; } if (sortNode != nullptr && current->getType() != arangodb::aql::ExecutionNode::CALCULATION) { ::arangodb::containers::HashSet currentUsedVars; current->getVariablesUsedHere(currentUsedVars); if (currentUsedVars.find(indexNode->outVariable()) != currentUsedVars.end()) { // this limit node affects only closest sort, if this sort is invalid // we need to check other limit node stopSearch = true; } } if (stopSearch) { // we have a doc body used before selected SortNode. Forget it, let`s look for better sort to use sortNode = nullptr; nodesToChange.clear(); break; } current = current->getFirstDependency(); // inspect next node } if (sortNode && !nodesToChange.empty()) { auto ast = plan->getAst(); IndexNode::IndexVarsInfo uniqueVariables; for (auto& node : nodesToChange) { std::transform(node.attrs.cbegin(), node.attrs.cend(), std::inserter(uniqueVariables, uniqueVariables.end()), [&ast](auto const& attrAndField) { return std::make_pair(attrAndField.indexField, IndexNode::IndexVariable{attrAndField.indexFieldNum, ast->variables()->createTemporaryVariable()}); }); } auto localDocIdTmp = ast->variables()->createTemporaryVariable(); for (auto& node : nodesToChange) { for (auto& attr : node.attrs) { auto it = uniqueVariables.find(attr.indexField); TRI_ASSERT(it != uniqueVariables.cend()); auto newNode = ast->createNodeReference(it->second.var); if (attr.astNode != nullptr) { TEMPORARILY_UNLOCK_NODE(attr.astNode); attr.astNode->changeMember(attr.astNodeChildNum, newNode); } else { TRI_ASSERT(node.attrs.size() == 1); node.node->expression()->replaceNode(newNode); } } } // we could apply late materialization // 1. We need to notify index node - it should not materialize documents, but produce only localDocIds indexNode->setLateMaterialized(localDocIdTmp, commonIndexId, uniqueVariables); // 2. We need to add materializer after limit node to do materialization // insert a materialize node auto materializeNode = plan->registerNode(std::make_unique( plan.get(), plan->nextId(), indexNode->collection(), *localDocIdTmp, *indexNode->outVariable())); // on cluster we need to materialize node stay close to sort node on db server (to avoid network hop for materialization calls) // however on single server we move it to limit node to make materialization as lazy as possible auto materializeDependency = ServerState::instance()->isCoordinator() ? sortNode : limitNode; auto dependencyParent = materializeDependency->getFirstParent(); TRI_ASSERT(dependencyParent != nullptr); dependencyParent->replaceDependency(materializeDependency, materializeNode); materializeDependency->addParent(materializeNode); modified = true; } } } }