1
0
Fork 0
arangodb/arangod/Aql/SortingGatherExecutor.h

204 lines
6.4 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Michael Hackstein
////////////////////////////////////////////////////////////////////////////////
#ifndef ARANGOD_AQL_SORTING_GATHER_EXECUTOR_H
#define ARANGOD_AQL_SORTING_GATHER_EXECUTOR_H
#include "Aql/ClusterNodes.h"
#include "Aql/ExecutionState.h"
#include "Aql/ExecutorInfos.h"
#include "Aql/InputAqlItemRow.h"
namespace arangodb {
namespace transaction {
class Methods;
}
namespace aql {
class MultiDependencySingleRowFetcher;
class NoStats;
class OutputAqlItemRow;
struct SortRegister;
class SortingGatherExecutorInfos : public ExecutorInfos {
public:
SortingGatherExecutorInfos(std::shared_ptr<std::unordered_set<RegisterId>> inputRegisters,
std::shared_ptr<std::unordered_set<RegisterId>> outputRegisters,
RegisterId nrInputRegisters, RegisterId nrOutputRegisters,
std::unordered_set<RegisterId> registersToClear,
std::unordered_set<RegisterId> registersToKeep,
std::vector<SortRegister>&& sortRegister,
arangodb::transaction::Methods* trx,
GatherNode::SortMode sortMode, size_t limit,
GatherNode::Parallelism p);
SortingGatherExecutorInfos() = delete;
SortingGatherExecutorInfos(SortingGatherExecutorInfos&&);
SortingGatherExecutorInfos(SortingGatherExecutorInfos const&) = delete;
~SortingGatherExecutorInfos();
std::vector<SortRegister>& sortRegister() { return _sortRegister; }
arangodb::transaction::Methods* trx() { return _trx; }
GatherNode::SortMode sortMode() const noexcept { return _sortMode; }
GatherNode::Parallelism parallelism() const noexcept { return _parallelism; }
size_t limit() const noexcept { return _limit; }
private:
std::vector<SortRegister> _sortRegister;
arangodb::transaction::Methods* _trx;
GatherNode::SortMode _sortMode;
GatherNode::Parallelism _parallelism;
size_t _limit;
};
class SortingGatherExecutor {
public:
struct ValueType {
size_t dependencyIndex;
InputAqlItemRow row;
ExecutionState state;
explicit ValueType(size_t index);
};
////////////////////////////////////////////////////////////////////////////////
/// @struct SortingStrategy
////////////////////////////////////////////////////////////////////////////////
struct SortingStrategy {
virtual ~SortingStrategy() = default;
/// @brief returns next value
virtual ValueType nextValue() = 0;
/// @brief prepare strategy fetching values
virtual void prepare(std::vector<ValueType>& /*blockPos*/) {}
/// @brief resets strategy state
virtual void reset() = 0;
}; // SortingStrategy
public:
struct Properties {
static constexpr bool preservesOrder = true;
static constexpr BlockPassthrough allowsBlockPassthrough = BlockPassthrough::Disable;
static constexpr bool inputSizeRestrictsOutputSize = true;
};
using Fetcher = MultiDependencySingleRowFetcher;
using Infos = SortingGatherExecutorInfos;
using Stats = NoStats;
SortingGatherExecutor(Fetcher& fetcher, Infos& infos);
~SortingGatherExecutor();
/**
* @brief produce the next Row of Aql Values.
*
* @return ExecutionState,
* if something was written output.hasValue() == true
*/
std::pair<ExecutionState, Stats> produceRows(OutputAqlItemRow& output);
void adjustNrDone(size_t dependency);
std::pair<ExecutionState, size_t> expectedNumberOfRows(size_t atMost) const;
std::tuple<ExecutionState, Stats, size_t> skipRows(size_t atMost);
private:
void initNumDepsIfNecessary();
ExecutionState init(size_t atMost);
std::pair<ExecutionState, InputAqlItemRow> produceNextRow(size_t atMost);
bool constrainedSort() const noexcept;
size_t rowsLeftToWrite() const noexcept;
void assertConstrainedDoesntOverfetch(size_t atMost) const noexcept;
// This is interesting in case this is a constrained sort and fullCount is
// enabled. Then, after the limit is reached, we may pass skipSome through
// to our dependencies, and not sort any more.
// This also means that we may not produce rows anymore after that point.
bool maySkip() const noexcept;
private:
Fetcher& _fetcher;
// Flag if we are past the initialize phase (fetched one block for every dependency).
bool _initialized;
// Total Number of dependencies
size_t _numberDependencies;
// The Dependency we have to fetch next
size_t _dependencyToFetch;
// Input data to process
std::vector<ValueType> _inputRows;
// Counter for DONE states
size_t _nrDone;
/// @brief If we do a constrained sort, it holds the limit > 0. Otherwise, it's 0.
size_t _limit;
/// @brief Number of rows we've already written or skipped, up to _limit.
/// Only after _rowsReturned == _limit we may pass skip through to
/// dependencies.
size_t _rowsReturned;
/// @brief When we reached the limit, we once count the rows that are left in
/// the heap (in _rowsLeftInHeap), so we can count them for skipping.
bool _heapCounted;
/// @brief See comment for _heapCounted first. At the first real skip, this
/// is set to the number of rows left in the heap. It will be reduced while
/// skipping.
size_t _rowsLeftInHeap;
size_t _skipped;
/// @brief sorting strategy
std::unique_ptr<SortingStrategy> _strategy;
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
std::vector<bool> _flaggedAsDone;
#endif
std::tuple<ExecutionState, SortingGatherExecutor::Stats, size_t> reallySkipRows(size_t atMost);
std::tuple<ExecutionState, SortingGatherExecutor::Stats, size_t> produceAndSkipRows(size_t atMost);
const bool _fetchParallel;
};
} // namespace aql
} // namespace arangodb
#endif