1
0
Fork 0

Bug fix 3.5/count collect should skip (#9512)

* Make CountCollectExecutor skip rather than fetch

* Cleanup, fix and test for top-level variables

* Added CHANGELOG entry and tests
This commit is contained in:
Tobias Gödderz 2019-07-18 23:47:27 +02:00 committed by KVS85
parent 47d7f9e3e2
commit 7e6b6fbed7
7 changed files with 74 additions and 31 deletions

View File

@ -1,6 +1,8 @@
v3.5.0-rc.5 (2019-XX-XX)
------------------------
* Fixed a performance regression of COLLECT WITH COUNT INTO.
* Fixed some races in cluster collection creation, which allowed collections with the
same name to be created in parallel under some rare conditions.

View File

@ -96,40 +96,36 @@ class CountCollectExecutor {
TRI_IF_FAILURE("CountCollectExecutor::produceRows") {
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
}
NoStats stats{};
InputAqlItemRow input{CreateInvalidInputRowHint{}};
if (_state == ExecutionState::DONE) {
return {_state, stats};
return {_state, NoStats{}};
}
while (true) {
std::tie(_state, input) = _fetcher.fetchRow();
while (_state != ExecutionState::DONE) {
size_t skipped;
std::tie(_state, skipped) = _fetcher.skipRows(ExecutionBlock::SkipAllSize());
if (_state == ExecutionState::WAITING) {
return {_state, stats};
TRI_ASSERT(skipped == 0);
return {_state, NoStats{}};
}
if (!input) {
TRI_ASSERT(_state == ExecutionState::DONE);
output.cloneValueInto(_infos.getOutputRegisterId(), input,
AqlValue(AqlValueHintUInt(getCount())));
return {_state, stats};
}
TRI_ASSERT(input.isInitialized());
incrCount();
// Abort if upstream is done
if (_state == ExecutionState::DONE) {
output.cloneValueInto(_infos.getOutputRegisterId(), input,
AqlValue(AqlValueHintUInt(getCount())));
return {_state, stats};
}
TRI_ASSERT(skipped != 0 || _state == ExecutionState::DONE);
incrCountBy(skipped);
}
// In general, we do not have an input row. In fact, we never fetch one.
output.setAllowSourceRowUninitialized();
// We must produce exactly one output row.
output.cloneValueInto(_infos.getOutputRegisterId(),
InputAqlItemRow{CreateInvalidInputRowHint{}},
AqlValue(AqlValueHintUInt(getCount())));
return {_state, NoStats{}};
}
void incrCount() noexcept { _count++; };
void incrCountBy(size_t incr) noexcept { _count += incr; };
uint64_t getCount() noexcept { return _count; };
inline std::pair<ExecutionState, size_t> expectedNumberOfRows(size_t atMost) const {

View File

@ -229,7 +229,8 @@ class OutputAqlItemRow {
#endif
_baseIndex = index;
}
// Use this function with caution! We need it only for the SortedCollectExecutor
// Use this function with caution! We need it for the SortedCollectExecutor
// and CountCollectExecutor.
void setAllowSourceRowUninitialized() {
_allowSourceRowUninitialized = true;
}
@ -305,7 +306,8 @@ class OutputAqlItemRow {
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
bool _setBaseIndexNotUsed;
#endif
// need this special bool for allowing an empty AqlValue inside the SortedCollectExecutor
// Need this special bool for allowing an empty AqlValue inside the
// SortedCollectExecutor and CountCollectExecutor.
bool _allowSourceRowUninitialized;
private:

View File

@ -74,6 +74,8 @@ TEST_F(CountCollectExecutorTest, there_are_no_rows_upstream_the_producer_doesnt_
AqlValue x = block->getValue(0, 1);
ASSERT_TRUE(x.isNumber());
ASSERT_TRUE(x.toInt64() == 0);
ASSERT_EQ(0, fetcher.totalSkipped());
}
TEST_F(CountCollectExecutorTest, there_are_now_rows_upstream_the_producer_waits) {
@ -97,6 +99,8 @@ TEST_F(CountCollectExecutorTest, there_are_now_rows_upstream_the_producer_waits)
AqlValue x = block->getValue(0, 1);
ASSERT_TRUE(x.isNumber());
ASSERT_TRUE(x.toInt64() == 0);
ASSERT_EQ(0, fetcher.totalSkipped());
}
TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_doesnt_wait) {
@ -116,6 +120,8 @@ TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_doe
AqlValue x = block->getValue(0, 1);
ASSERT_TRUE(x.isNumber());
ASSERT_TRUE(x.toInt64() == 3);
ASSERT_EQ(3, fetcher.totalSkipped());
}
TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_waits) {
@ -147,6 +153,8 @@ TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_wai
AqlValue x = block->getValue(0, 1);
ASSERT_TRUE(x.isNumber());
ASSERT_TRUE(x.toInt64() == 3);
ASSERT_EQ(3, fetcher.totalSkipped());
}
} // namespace aql

View File

@ -120,12 +120,14 @@ std::pair<ExecutionState, size_t> SingleRowFetcherHelper<passBlocksThrough>::ski
if (state == ExecutionState::DONE) {
size_t skipped = _skipped;
_skipped = 0;
_totalSkipped += skipped;
return {state, skipped};
}
}
size_t skipped = _skipped;
_skipped = 0;
_totalSkipped += skipped;
return {state, skipped};
}

View File

@ -77,6 +77,8 @@ class SingleRowFetcherHelper
uint64_t nrCalled() { return _nrCalled; }
size_t totalSkipped() const { return _totalSkipped; }
std::pair<arangodb::aql::ExecutionState, size_t> skipRows(size_t atMost) override;
std::pair<arangodb::aql::ExecutionState, arangodb::aql::SharedAqlItemBlockPtr> fetchBlockForPassthrough(
@ -113,6 +115,7 @@ class SingleRowFetcherHelper
uint64_t _nrItems;
uint64_t _nrCalled{};
size_t _skipped{};
size_t _totalSkipped{};
size_t _curIndexInBlock{};
size_t _curRowIndex{};
size_t _blockSize;

View File

@ -381,13 +381,43 @@ function optimizerCountTestSuite () {
}
},
testCollectAggregateUndefined: function () {
var randomDocumentID = db["UnitTestsCollection"].any()._id;
var query = 'LET start = DOCUMENT("' + randomDocumentID + '")._key for i in [] collect aggregate count = count(i) return {count, start}';
var bindParams = {};
var options = {optimizer: {rules: ['-remove-unnecessary-calculations','-remove-unnecessary-calculations-2']}};
// This test is not necessary for hashed collect, as hashed collect will not
// be used without group variables.
testCollectSortedUndefined: function () {
const randomDocumentID = db["UnitTestsCollection"].any()._id;
const query = 'LET start = DOCUMENT("' + randomDocumentID + '")._key FOR i IN [] COLLECT AGGREGATE count = count(i) RETURN {count, start}';
const bindParams = {};
const options = {optimizer: {rules: ['-remove-unnecessary-calculations','-remove-unnecessary-calculations-2']}};
var results = db._query(query, bindParams, options).toArray();
const planNodes = AQL_EXPLAIN(query, {}, options).plan.nodes;
assertEqual(
[ "SingletonNode", "CalculationNode", "CalculationNode",
"EnumerateListNode", "CollectNode", "CalculationNode", "ReturnNode" ],
planNodes.map(n => n.type));
assertEqual("sorted", planNodes[4].collectOptions.method);
const results = db._query(query, bindParams, options).toArray();
// expectation is that we exactly get one result
// count will be 0, start will be undefined
assertEqual(1, results.length);
assertEqual(0, results[0].count);
assertEqual(undefined, results[0].start);
},
testCollectCountUndefined: function () {
const randomDocumentID = db["UnitTestsCollection"].any()._id;
const query = 'LET start = DOCUMENT("' + randomDocumentID + '")._key FOR i IN [] COLLECT WITH COUNT INTO count RETURN {count, start}';
const bindParams = {};
const options = {optimizer: {rules: ['-remove-unnecessary-calculations','-remove-unnecessary-calculations-2']}};
const planNodes = AQL_EXPLAIN(query, {}, options).plan.nodes;
assertEqual(
[ "SingletonNode", "CalculationNode", "CalculationNode",
"EnumerateListNode", "CollectNode", "CalculationNode", "ReturnNode" ],
planNodes.map(n => n.type));
assertEqual("count", planNodes[4].collectOptions.method);
const results = db._query(query, bindParams, options).toArray();
// expectation is that we exactly get one result
// count will be 0, start will be undefined
assertEqual(1, results.length);