1
0
Fork 0

[3.5] Make AQL's constrained heap play nice with fullCount (#10007)

* Make AQL's constrained heap play nice with fullCount (#9981)

* Update CHANGELOG
This commit is contained in:
Tobias Gödderz 2019-09-13 11:28:28 +02:00 committed by KVS85
parent e0e05903fb
commit 6f16c3deef
6 changed files with 180 additions and 36 deletions

View File

@ -1,6 +1,8 @@
v3.5.1 (XXXX-XX-XX) v3.5.1 (XXXX-XX-XX)
------------------- -------------------
* Fixed AQL constrained-heap sort in conjunction with fullCount.
* Added support for AQL expressions such as `a NOT LIKE b`, `a NOT =~ b` and * Added support for AQL expressions such as `a NOT LIKE b`, `a NOT =~ b` and
`a NOT !~ b`. Previous versions of ArangoDB did not support these expressions, `a NOT !~ b`. Previous versions of ArangoDB did not support these expressions,
and using them in an AQL query resulted in a parse error. and using them in an AQL query resulted in a parse error.

View File

@ -138,6 +138,8 @@ ConstrainedSortExecutor::ConstrainedSortExecutor(Fetcher& fetcher, SortExecutorI
_state(ExecutionState::HASMORE), _state(ExecutionState::HASMORE),
_returnNext(0), _returnNext(0),
_rowsPushed(0), _rowsPushed(0),
_rowsRead(0),
_skippedAfter(0),
_heapBuffer(_infos._manager.requestBlock(_infos._limit, _heapBuffer(_infos._manager.requestBlock(_infos._limit,
_infos.numberOfOutputRegisters())), _infos.numberOfOutputRegisters())),
_cmpHeap(std::make_unique<ConstrainedLessThan>(_infos.trx(), _infos.sortRegisters())), _cmpHeap(std::make_unique<ConstrainedLessThan>(_infos.trx(), _infos.sortRegisters())),
@ -147,11 +149,23 @@ ConstrainedSortExecutor::ConstrainedSortExecutor(Fetcher& fetcher, SortExecutorI
TRI_ASSERT(_infos._limit > 0); TRI_ASSERT(_infos._limit > 0);
_rows.reserve(infos._limit); _rows.reserve(infos._limit);
_cmpHeap->setBuffer(_heapBuffer.get()); _cmpHeap->setBuffer(_heapBuffer.get());
}; }
ConstrainedSortExecutor::~ConstrainedSortExecutor() = default; ConstrainedSortExecutor::~ConstrainedSortExecutor() = default;
std::pair<ExecutionState, NoStats> ConstrainedSortExecutor::produceRows(OutputAqlItemRow& output) { bool ConstrainedSortExecutor::doneProducing() const noexcept {
// must not get strictly larger
TRI_ASSERT(_returnNext <= _rows.size());
return _state == ExecutionState::DONE && _returnNext >= _rows.size();
}
bool ConstrainedSortExecutor::doneSkipping() const noexcept {
// must not get strictly larger
TRI_ASSERT(_returnNext + _skippedAfter <= _rowsRead);
return _state == ExecutionState::DONE && _returnNext + _skippedAfter >= _rowsRead;
}
ExecutionState ConstrainedSortExecutor::consumeInput() {
while (_state != ExecutionState::DONE) { while (_state != ExecutionState::DONE) {
TRI_IF_FAILURE("SortBlock::doSorting") { TRI_IF_FAILURE("SortBlock::doSorting") {
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
@ -161,37 +175,65 @@ std::pair<ExecutionState, NoStats> ConstrainedSortExecutor::produceRows(OutputAq
std::tie(_state, input) = _fetcher.fetchRow(); std::tie(_state, input) = _fetcher.fetchRow();
if (_state == ExecutionState::WAITING) { if (_state == ExecutionState::WAITING) {
return {_state, NoStats{}}; return _state;
} }
if (!input.isInitialized()) { if (!input.isInitialized()) {
TRI_ASSERT(_state == ExecutionState::DONE); TRI_ASSERT(_state == ExecutionState::DONE);
} else { } else {
++_rowsRead;
if (_rowsPushed < _infos._limit || !compareInput(_rows.front(), input)) { if (_rowsPushed < _infos._limit || !compareInput(_rows.front(), input)) {
// Push this row into the heap // Push this row into the heap
pushRow(input); pushRow(input);
} }
} }
} }
if (_returnNext >= _rows.size()) {
// Happens if, we either have no upstream e.g. _rows is empty TRI_ASSERT(_state == ExecutionState::DONE);
// Or if dependency is pulling too often (should not happen)
return {ExecutionState::DONE, NoStats{}}; return _state;
}
std::pair<ExecutionState, NoStats> ConstrainedSortExecutor::produceRows(OutputAqlItemRow& output) {
{
ExecutionState state = consumeInput();
TRI_ASSERT(state == _state);
if (state == ExecutionState::WAITING) {
return {ExecutionState::WAITING, NoStats{}};
}
TRI_ASSERT(state == ExecutionState::DONE);
} }
if (doneProducing()) {
if (doneSkipping()) {
// No we're really done
return {ExecutionState::DONE, NoStats{}};
}
// We should never get here, as the following LIMIT block should never fetch
// more than our limit. He may only skip after that. Thus:
TRI_ASSERT(false);
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_INTERNAL_AQL,
"Overfetch during constrained heap sort. Please report this error! Try "
"turning off the sort-limit optimizer rule to get your query working.");
}
if (_returnNext == 0) { if (_returnNext == 0) {
// Only once sort the rows again, s.t. the // Only once sort the rows again, s.t. the
// contained list of elements is in the right ordering. // contained list of elements is in the right ordering.
std::sort(_rows.begin(), _rows.end(), *_cmpHeap); std::sort(_rows.begin(), _rows.end(), *_cmpHeap);
} }
// Now our heap is full and sorted, we just need to return it line by line // Now our heap is full and sorted, we just need to return it line by line
TRI_ASSERT(_returnNext < _rows.size()); TRI_ASSERT(_returnNext < _rows.size());
std::size_t heapRowPosition = _rows[_returnNext++]; auto const heapRowPosition = _rows[_returnNext];
++_returnNext;
InputAqlItemRow heapRow(_heapBuffer, heapRowPosition); InputAqlItemRow heapRow(_heapBuffer, heapRowPosition);
TRI_ASSERT(heapRow.isInitialized()); TRI_ASSERT(heapRow.isInitialized());
TRI_ASSERT(heapRowPosition < _rowsPushed); TRI_ASSERT(heapRowPosition < _rowsPushed);
output.copyRow(heapRow); output.copyRow(heapRow);
if (_returnNext == _rows.size()) {
return {ExecutionState::DONE, NoStats{}}; // Lie, we may have a possible LIMIT block with fullCount to work.
} // We emitted at least one row at this point, so this is fine.
return {ExecutionState::HASMORE, NoStats{}}; return {ExecutionState::HASMORE, NoStats{}};
} }
@ -213,8 +255,57 @@ std::pair<ExecutionState, size_t> ConstrainedSortExecutor::expectedNumberOfRows(
// We have exactly the following rows available: // We have exactly the following rows available:
rowsLeft = _rows.size() - _returnNext; rowsLeft = _rows.size() - _returnNext;
} }
if (rowsLeft > 0) {
return {ExecutionState::HASMORE, rowsLeft}; if (rowsLeft == 0) {
if (doneSkipping()) {
return {ExecutionState::DONE, rowsLeft};
}
// We always report at least 1 row here, for a possible LIMIT block with fullCount to work.
// However, we should never have to do this if the LIMIT block doesn't overfetch with getSome.
rowsLeft = 1;
} }
return {ExecutionState::DONE, rowsLeft};
return {ExecutionState::HASMORE, rowsLeft};
}
std::tuple<ExecutionState, NoStats, size_t> ConstrainedSortExecutor::skipRows(size_t toSkipRequested) {
{
ExecutionState state = consumeInput();
TRI_ASSERT(state == _state);
if (state == ExecutionState::WAITING) {
return {ExecutionState::WAITING, NoStats{}, 0};
}
TRI_ASSERT(state == ExecutionState::DONE);
}
if (_returnNext == 0) {
// Only once sort the rows again, s.t. the
// contained list of elements is in the right ordering.
std::sort(_rows.begin(), _rows.end(), *_cmpHeap);
}
size_t skipped = 0;
// Skip rows in the heap
if (!doneProducing()) {
TRI_ASSERT(_rows.size() >= _returnNext);
auto const rowsLeftInHeap = _rows.size() - _returnNext;
auto const skipNum = (std::min)(toSkipRequested, rowsLeftInHeap);
_returnNext += skipNum;
skipped += skipNum;
}
// Skip rows we've dropped
if (skipped < toSkipRequested && !doneSkipping()) {
TRI_ASSERT(doneProducing());
auto const rowsLeftToSkip = _rowsRead - (_rows.size() + _skippedAfter);
auto const skipNum = (std::min)(toSkipRequested, rowsLeftToSkip);
_skippedAfter += skipNum;
skipped += skipNum;
}
TRI_ASSERT(skipped <= toSkipRequested);
auto const state = doneSkipping() ? ExecutionState::DONE : ExecutionState::HASMORE;
return {state, NoStats{}, skipped};
} }

View File

@ -56,8 +56,6 @@ struct SortRegister;
*/ */
class ConstrainedSortExecutor { class ConstrainedSortExecutor {
public: public:
friend class Sorter;
struct Properties { struct Properties {
static const bool preservesOrder = false; static const bool preservesOrder = false;
static const bool allowsBlockPassthrough = false; static const bool allowsBlockPassthrough = false;
@ -78,6 +76,8 @@ class ConstrainedSortExecutor {
*/ */
std::pair<ExecutionState, Stats> produceRows(OutputAqlItemRow& output); std::pair<ExecutionState, Stats> produceRows(OutputAqlItemRow& output);
std::tuple<ExecutionState, Stats, size_t> skipRows(size_t toSkipRequested);
/** /**
* @brief This Executor knows how many rows it will produce and most by itself * @brief This Executor knows how many rows it will produce and most by itself
* It also knows that it could produce less if the upstream only has fewer rows. * It also knows that it could produce less if the upstream only has fewer rows.
@ -88,6 +88,16 @@ class ConstrainedSortExecutor {
bool compareInput(size_t const& rosPos, InputAqlItemRow& row) const; bool compareInput(size_t const& rosPos, InputAqlItemRow& row) const;
arangodb::Result pushRow(InputAqlItemRow& row); arangodb::Result pushRow(InputAqlItemRow& row);
// We're done producing when we've emitted all rows from our heap.
bool doneProducing() const noexcept;
// We're done skipping when we've emitted all rows from our heap,
// AND emitted (in this case, skipped) all rows that were dropped during the
// sort as well. This is for fullCount queries only.
bool doneSkipping() const noexcept;
ExecutionState consumeInput();
private: private:
Infos& _infos; Infos& _infos;
Fetcher& _fetcher; Fetcher& _fetcher;
@ -95,6 +105,8 @@ class ConstrainedSortExecutor {
size_t _returnNext; size_t _returnNext;
std::vector<size_t> _rows; std::vector<size_t> _rows;
size_t _rowsPushed; size_t _rowsPushed;
size_t _rowsRead;
size_t _skippedAfter;
SharedAqlItemBlockPtr _heapBuffer; SharedAqlItemBlockPtr _heapBuffer;
std::unique_ptr<ConstrainedLessThan> _cmpHeap; // in pointer to avoid std::unique_ptr<ConstrainedLessThan> _cmpHeap; // in pointer to avoid
OutputAqlItemRow _heapOutputRow; OutputAqlItemRow _heapOutputRow;

View File

@ -190,7 +190,12 @@ std::pair<ExecutionState, SharedAqlItemBlockPtr> ExecutionBlockImpl<Executor>::g
TRI_ASSERT(state == ExecutionState::HASMORE); TRI_ASSERT(state == ExecutionState::HASMORE);
// When we're passing blocks through we have no control over the size of the // When we're passing blocks through we have no control over the size of the
// output block. // output block.
if /* constexpr */ (!Executor::Properties::allowsBlockPassthrough) { // Plus, the ConstrainedSortExecutor will report an expectedNumberOfRows
// according to its heap size, thus resulting in a smaller allocated output
// block. However, it won't report DONE after, because a LIMIT block with
// fullCount must continue to count after the sorted output.
if /* constexpr */ (!Executor::Properties::allowsBlockPassthrough &&
!std::is_same<Executor, ConstrainedSortExecutor>::value) {
TRI_ASSERT(_outputItemRow->numRowsWritten() == atMost); TRI_ASSERT(_outputItemRow->numRowsWritten() == atMost);
} }
@ -281,11 +286,13 @@ static SkipVariants constexpr skipType() {
std::is_same<Executor, IResearchViewMergeExecutor<false>>::value || std::is_same<Executor, IResearchViewMergeExecutor<false>>::value ||
std::is_same<Executor, IResearchViewMergeExecutor<true>>::value || std::is_same<Executor, IResearchViewMergeExecutor<true>>::value ||
std::is_same<Executor, EnumerateCollectionExecutor>::value || std::is_same<Executor, EnumerateCollectionExecutor>::value ||
std::is_same<Executor, LimitExecutor>::value), std::is_same<Executor, LimitExecutor>::value ||
std::is_same<Executor, ConstrainedSortExecutor>::value),
"Unexpected executor for SkipVariants::EXECUTOR"); "Unexpected executor for SkipVariants::EXECUTOR");
// The LimitExecutor will not work correctly with SkipVariants::FETCHER! // The LimitExecutor will not work correctly with SkipVariants::FETCHER!
static_assert(!std::is_same<Executor, LimitExecutor>::value || useFetcher, static_assert(
!std::is_same<Executor, LimitExecutor>::value || useFetcher,
"LimitExecutor needs to implement skipRows() to work correctly"); "LimitExecutor needs to implement skipRows() to work correctly");
if (useExecutor) { if (useExecutor) {
@ -510,7 +517,11 @@ namespace arangodb {
namespace aql { namespace aql {
// The constant "PASSTHROUGH" is somehow reserved with MSVC. // The constant "PASSTHROUGH" is somehow reserved with MSVC.
enum class RequestWrappedBlockVariant { DEFAULT , PASS_THROUGH , INPUTRESTRICTED }; enum class RequestWrappedBlockVariant {
DEFAULT,
PASS_THROUGH,
INPUTRESTRICTED
};
// Specifying the namespace here is important to MSVC. // Specifying the namespace here is important to MSVC.
template <enum arangodb::aql::RequestWrappedBlockVariant> template <enum arangodb::aql::RequestWrappedBlockVariant>
@ -545,9 +556,9 @@ struct RequestWrappedBlock<RequestWrappedBlockVariant::PASS_THROUGH> {
typename Executor::Infos const& infos, typename Executor::Infos const& infos,
#endif #endif
Executor& executor, ExecutionEngine& engine, size_t nrItems, RegisterCount nrRegs) { Executor& executor, ExecutionEngine& engine, size_t nrItems, RegisterCount nrRegs) {
static_assert( static_assert(Executor::Properties::allowsBlockPassthrough,
Executor::Properties::allowsBlockPassthrough, "This function can only be used with executors supporting "
"This function can only be used with executors supporting `allowsBlockPassthrough`"); "`allowsBlockPassthrough`");
static_assert(hasFetchBlockForPassthrough<Executor>::value, static_assert(hasFetchBlockForPassthrough<Executor>::value,
"An Executor with allowsBlockPassthrough must implement " "An Executor with allowsBlockPassthrough must implement "
"fetchBlockForPassthrough"); "fetchBlockForPassthrough");
@ -601,9 +612,9 @@ struct RequestWrappedBlock<RequestWrappedBlockVariant::INPUTRESTRICTED> {
typename Executor::Infos const&, typename Executor::Infos const&,
#endif #endif
Executor& executor, ExecutionEngine& engine, size_t nrItems, RegisterCount nrRegs) { Executor& executor, ExecutionEngine& engine, size_t nrItems, RegisterCount nrRegs) {
static_assert( static_assert(Executor::Properties::inputSizeRestrictsOutputSize,
Executor::Properties::inputSizeRestrictsOutputSize, "This function can only be used with executors supporting "
"This function can only be used with executors supporting `inputSizeRestrictsOutputSize`"); "`inputSizeRestrictsOutputSize`");
static_assert(hasExpectedNumberOfRows<Executor>::value, static_assert(hasExpectedNumberOfRows<Executor>::value,
"An Executor with inputSizeRestrictsOutputSize must " "An Executor with inputSizeRestrictsOutputSize must "
"implement expectedNumberOfRows"); "implement expectedNumberOfRows");

View File

@ -229,8 +229,8 @@ class OutputAqlItemRow {
#endif #endif
_baseIndex = index; _baseIndex = index;
} }
// Use this function with caution! We need it for the SortedCollectExecutor // Use this function with caution! We need it for the SortedCollectExecutor,
// and CountCollectExecutor. // CountCollectExecutor, and the ConstrainedSortExecutor.
void setAllowSourceRowUninitialized() { void setAllowSourceRowUninitialized() {
_allowSourceRowUninitialized = true; _allowSourceRowUninitialized = true;
} }
@ -307,7 +307,7 @@ class OutputAqlItemRow {
bool _setBaseIndexNotUsed; bool _setBaseIndexNotUsed;
#endif #endif
// Need this special bool for allowing an empty AqlValue inside the // Need this special bool for allowing an empty AqlValue inside the
// SortedCollectExecutor and CountCollectExecutor. // SortedCollectExecutor, CountCollectExecutor and ConstrainedSortExecutor.
bool _allowSourceRowUninitialized; bool _allowSourceRowUninitialized;
private: private:

View File

@ -1,5 +1,5 @@
/*jshint globalstrict:false, strict:false, maxlen: 500 */ /*jshint globalstrict:false, strict:false, maxlen: 500 */
/*global assertEqual, AQL_EXPLAIN */ /*global assertEqual, AQL_EXPLAIN, AQL_EXECUTE */
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief tests for query language, limit optimizations /// @brief tests for query language, limit optimizations
@ -28,11 +28,10 @@
/// @author Copyright 2012, triAGENS GmbH, Cologne, Germany /// @author Copyright 2012, triAGENS GmbH, Cologne, Germany
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
var jsunity = require("jsunity"); const jsunity = require("jsunity");
var internal = require("internal"); const internal = require("internal");
var helper = require("@arangodb/aql-helper"); const helper = require("@arangodb/aql-helper");
var getQueryResults = helper.getQueryResults; const getQueryResults = helper.getQueryResults;
var db = internal.db;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief test suite /// @brief test suite
@ -55,7 +54,7 @@ function ahuacatlQueryOptimizerLimitTestSuite () {
setUp : function () { setUp : function () {
internal.db._drop(cn); internal.db._drop(cn);
collection = internal.db._create(cn); collection = internal.db._create(cn, {numberOfShards: 9});
for (var i = 0; i < docCount; ++i) { for (var i = 0; i < docCount; ++i) {
collection.save({ _key: "test" + i, value : i }); collection.save({ _key: "test" + i, value : i });
@ -303,6 +302,35 @@ function ahuacatlQueryOptimizerLimitTestSuite () {
assertEqual(sorts[0].strategy, "constrained-heap"); assertEqual(sorts[0].strategy, "constrained-heap");
}, },
////////////////////////////////////////////////////////////////////////////////
/// @brief check limit optimization with sort and fullCount
/// This is a regression test, the constrained heap did not play well with
/// fullCount when 3.5 was released.
////////////////////////////////////////////////////////////////////////////////
testLimitFullCollectionSortWithFullCount : function () {
const query = "FOR c IN " + cn + " SORT c.value LIMIT 20, 10 RETURN c";
const queryResult = AQL_EXECUTE(query, {}, {fullCount: true});
const values = queryResult.json;
const fullCount = queryResult.stats.fullCount;
assertEqual(10, values.length);
assertEqual(20, values[0].value);
assertEqual(21, values[1].value);
assertEqual(22, values[2].value);
assertEqual(29, values[9].value);
assertEqual(fullCount, 1000);
const sorts = getSorts(query);
assertEqual(sorts.length, 1);
assertEqual(sorts[0].limit, 30);
assertEqual(sorts[0].strategy, "constrained-heap");
},
}; };
} }