From 7e0c8c935dcf5bcef476290219a178616b25f91a Mon Sep 17 00:00:00 2001 From: Michael Hackstein Date: Tue, 15 Mar 2016 11:12:10 +0100 Subject: [PATCH] Reimplemented optimizer rule to remove unnecessary SORT if covered by index --- arangod/Aql/IndexBlock.cpp | 5 ++++- arangod/Aql/OptimizerRules.cpp | 38 +++++++++++++++----------------- arangod/Utils/Transaction.cpp | 40 +++++++++++++++++++++------------- arangod/Utils/Transaction.h | 22 ++++++++++++------- 4 files changed, 61 insertions(+), 44 deletions(-) diff --git a/arangod/Aql/IndexBlock.cpp b/arangod/Aql/IndexBlock.cpp index 7317b91d60..010b1f67b1 100644 --- a/arangod/Aql/IndexBlock.cpp +++ b/arangod/Aql/IndexBlock.cpp @@ -68,7 +68,10 @@ arangodb::aql::AstNode* IndexBlock::makeUnique( auto array = ast->createNodeArray(); array->addMember(node); auto trx = ast->query()->trx(); - if (trx->isIndexSorted(_collection->getName(), _indexes[_currentIndex])) { + bool isSorted = false; + bool isSparse = false; + auto unused = trx->getIndexFeatures(_collection->getName(), _indexes[_currentIndex], isSorted, isSparse); + if (isSparse) { // the index is sorted. we need to use SORTED_UNIQUE to get the // result back in index order return ast->createNodeFunctionCall("SORTED_UNIQUE", array); diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 75122d7bbd..61a46ce964 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -1789,8 +1789,6 @@ struct SortToIndexNode final : public WalkerWorker { return true; } -#warning Reimplement this rule - /* auto const& indexes = indexNode->getIndexes(); auto cond = indexNode->condition(); TRI_ASSERT(cond != nullptr); @@ -1798,6 +1796,13 @@ struct SortToIndexNode final : public WalkerWorker { Variable const* outVariable = indexNode->outVariable(); TRI_ASSERT(outVariable != nullptr); + auto index = indexes[0]; + std::string collectionName = indexNode->collection()->getName(); + arangodb::Transaction* trx = indexNode->trx(); + bool isSorted = false; + bool isSparse = false; + std::vector> fields = fields = + trx->getIndexFeatures(collectionName, index, isSorted, isSparse); if (indexes.size() != 1) { // can only use this index node if it uses exactly one index or multiple // indexes on exactly the same attributes @@ -1807,17 +1812,13 @@ struct SortToIndexNode final : public WalkerWorker { return true; } - std::vector> seen; + if (!isSparse) { + return true; + } - for (auto& index : indexes) { - if (index->sparse) { - // cannot use a sparse index for sorting - return true; - } - - if (!seen.empty() && arangodb::basics::AttributeName::isIdentical( - index->fields, seen, true)) { - // different attributes + for (auto& idx : indexes) { + if (idx != index) { + // Can only be sorted iff only one index is used. return true; } } @@ -1828,22 +1829,21 @@ struct SortToIndexNode final : public WalkerWorker { // if we get here, we either have one index or multiple indexes on the same // attributes - auto index = indexes[0]; bool handled = false; - SortCondition sortCondition(_sorts, cond->getConstAttributes(outVariable, !index->sparse), _variableDefinitions); + SortCondition sortCondition(_sorts, cond->getConstAttributes(outVariable, !isSparse), _variableDefinitions); bool const isOnlyAttributeAccess = (!sortCondition.isEmpty() && sortCondition.isOnlyAttributeAccess()); - if (isOnlyAttributeAccess && index->isSorted() && !index->sparse && + if (isOnlyAttributeAccess && isSorted && !isSparse && sortCondition.isUnidirectional() && sortCondition.isDescending() == indexNode->reverse()) { // we have found a sort condition, which is unidirectional and in the same // order as the IndexNode... // now check if the sort attributes match the ones of the index size_t const numCovered = - sortCondition.coveredAttributes(outVariable, index->fields); + sortCondition.coveredAttributes(outVariable, fields); if (numCovered >= sortCondition.numAttributes()) { // sort condition is fully covered by index... now we can remove the @@ -1869,12 +1869,11 @@ struct SortToIndexNode final : public WalkerWorker { // fields // e.g. FILTER c.value1 == 1 && c.value2 == 42 SORT c.value1, c.value2 size_t const numCovered = - sortCondition.coveredAttributes(outVariable, index->fields); + sortCondition.coveredAttributes(outVariable, fields); if (numCovered == sortCondition.numAttributes() && sortCondition.isUnidirectional() && - (index->isSorted() || - index->fields.size() == sortCondition.numAttributes())) { + (isSorted || fields.size() == sortCondition.numAttributes())) { // no need to sort _plan->unlinkNode(_plan->getNodeById(_sortNode->id())); _modified = true; @@ -1882,7 +1881,6 @@ struct SortToIndexNode final : public WalkerWorker { } } } -*/ return true; // always abort after we found an IndexNode } diff --git a/arangod/Utils/Transaction.cpp b/arangod/Utils/Transaction.cpp index 9c9e97818a..fe03b81a42 100644 --- a/arangod/Utils/Transaction.cpp +++ b/arangod/Utils/Transaction.cpp @@ -26,6 +26,7 @@ #include "Aql/AstNode.h" #include "Aql/Condition.h" #include "Aql/SortCondition.h" +#include "Basics/AttributeNameParser.h" #include "Basics/Exceptions.h" #include "Basics/StringUtils.h" #include "Basics/VelocyPackHelper.h" @@ -2006,6 +2007,28 @@ bool Transaction::supportsFilterCondition( estimatedItems, estimatedCost); } +////////////////////////////////////////////////////////////////////////////// +/// @brief Get the index features: +/// Returns the covered attributes, and sets the first bool value +/// to isSorted and the second bool value to isSparse +////////////////////////////////////////////////////////////////////////////// + +std::vector> +Transaction::getIndexFeatures(std::string const& collectionName, + std::string const& indexHandle, bool& isSorted, + bool& isSparse) { + + if (ServerState::instance()->isCoordinator()) { + // The index is sorted check is only available on DBServers and Single Server. + THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_ONLY_ON_DBSERVER); + } + + arangodb::Index* idx = getIndexByIdentifier(collectionName, indexHandle); + isSorted = idx->isSorted(); + isSparse = idx->sparse(); + return idx->fields(); +} + ////////////////////////////////////////////////////////////////////////////// /// @brief Gets the best fitting index for an AQL sort condition /// note: the caller must have read-locked the underlying collection when @@ -2090,26 +2113,13 @@ OperationCursor Transaction::indexScanForCondition( return OperationCursor(transactionContext()->orderCustomTypeHandler(), iterator.release(), limit, batchSize); - } - ////////////////////////////////////////////////////////////////////////////// -/// @brief check if index is sorted +/// @brief get the index by it's identifier. Will either throw or +/// return a valid index. nullptr is impossible. ////////////////////////////////////////////////////////////////////////////// -bool Transaction::isIndexSorted(std::string const& collectionName, - std::string const& indexId) { - if (ServerState::instance()->isCoordinator()) { - // The index is sorted check is only available on DBServers and Single Server. - THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_ONLY_ON_DBSERVER); - } - - arangodb::Index* idx = getIndexByIdentifier(collectionName, indexId); - TRI_ASSERT(idx != nullptr); - return idx->isSorted(); -} - arangodb::Index* Transaction::getIndexByIdentifier( std::string const& collectionName, std::string const& indexHandle) { TRI_voc_cid_t cid = resolver()->getCollectionIdLocal(collectionName); diff --git a/arangod/Utils/Transaction.h b/arangod/Utils/Transaction.h index ef9efc887f..cedbe5f438 100644 --- a/arangod/Utils/Transaction.h +++ b/arangod/Utils/Transaction.h @@ -37,6 +37,11 @@ #define TRI_DEFAULT_BATCH_SIZE 1000 namespace arangodb { + +namespace basics { +class AttributeName; +} + class Index; namespace aql { @@ -400,6 +405,15 @@ class Transaction { arangodb::aql::Variable const*, size_t, size_t&, double&); + ////////////////////////////////////////////////////////////////////////////// + /// @brief Get the index features: + /// Returns the covered attributes, and sets the first bool value + /// to isSorted and the second bool value to isSparse + ////////////////////////////////////////////////////////////////////////////// + + std::vector> getIndexFeatures( + std::string const&, std::string const&, bool&, bool&); + ////////////////////////////////////////////////////////////////////////////// /// @brief Gets the best fitting index for an AQL sort condition /// note: the caller must have read-locked the underlying collection when @@ -424,14 +438,6 @@ class Transaction { arangodb::aql::Variable const*, uint64_t, uint64_t, bool); - ////////////////////////////////////////////////////////////////////////////// - /// @brief check if index is sorted - ////////////////////////////////////////////////////////////////////////////// - - bool isIndexSorted(std::string const& collectionName, - std::string const& indexId); - - ////////////////////////////////////////////////////////////////////////////// /// @brief factory for OperationCursor objects /// note: the caller must have read-locked the underlying collection when