mirror of https://gitee.com/bigwinds/arangodb
Bug fix 3.5/count collect should skip (#9512)
* Make CountCollectExecutor skip rather than fetch * Cleanup, fix and test for top-level variables * Added CHANGELOG entry and tests
This commit is contained in:
parent
47d7f9e3e2
commit
7e6b6fbed7
|
@ -1,6 +1,8 @@
|
|||
v3.5.0-rc.5 (2019-XX-XX)
|
||||
------------------------
|
||||
|
||||
* Fixed a performance regression of COLLECT WITH COUNT INTO.
|
||||
|
||||
* Fixed some races in cluster collection creation, which allowed collections with the
|
||||
same name to be created in parallel under some rare conditions.
|
||||
|
||||
|
|
|
@ -96,40 +96,36 @@ class CountCollectExecutor {
|
|||
TRI_IF_FAILURE("CountCollectExecutor::produceRows") {
|
||||
THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
|
||||
}
|
||||
NoStats stats{};
|
||||
InputAqlItemRow input{CreateInvalidInputRowHint{}};
|
||||
|
||||
if (_state == ExecutionState::DONE) {
|
||||
return {_state, stats};
|
||||
return {_state, NoStats{}};
|
||||
}
|
||||
|
||||
while (true) {
|
||||
std::tie(_state, input) = _fetcher.fetchRow();
|
||||
while (_state != ExecutionState::DONE) {
|
||||
size_t skipped;
|
||||
std::tie(_state, skipped) = _fetcher.skipRows(ExecutionBlock::SkipAllSize());
|
||||
|
||||
if (_state == ExecutionState::WAITING) {
|
||||
return {_state, stats};
|
||||
TRI_ASSERT(skipped == 0);
|
||||
return {_state, NoStats{}};
|
||||
}
|
||||
|
||||
if (!input) {
|
||||
TRI_ASSERT(_state == ExecutionState::DONE);
|
||||
output.cloneValueInto(_infos.getOutputRegisterId(), input,
|
||||
AqlValue(AqlValueHintUInt(getCount())));
|
||||
return {_state, stats};
|
||||
}
|
||||
|
||||
TRI_ASSERT(input.isInitialized());
|
||||
incrCount();
|
||||
|
||||
// Abort if upstream is done
|
||||
if (_state == ExecutionState::DONE) {
|
||||
output.cloneValueInto(_infos.getOutputRegisterId(), input,
|
||||
AqlValue(AqlValueHintUInt(getCount())));
|
||||
return {_state, stats};
|
||||
}
|
||||
TRI_ASSERT(skipped != 0 || _state == ExecutionState::DONE);
|
||||
incrCountBy(skipped);
|
||||
}
|
||||
|
||||
// In general, we do not have an input row. In fact, we never fetch one.
|
||||
output.setAllowSourceRowUninitialized();
|
||||
|
||||
// We must produce exactly one output row.
|
||||
output.cloneValueInto(_infos.getOutputRegisterId(),
|
||||
InputAqlItemRow{CreateInvalidInputRowHint{}},
|
||||
AqlValue(AqlValueHintUInt(getCount())));
|
||||
|
||||
return {_state, NoStats{}};
|
||||
}
|
||||
|
||||
void incrCount() noexcept { _count++; };
|
||||
void incrCountBy(size_t incr) noexcept { _count += incr; };
|
||||
uint64_t getCount() noexcept { return _count; };
|
||||
|
||||
inline std::pair<ExecutionState, size_t> expectedNumberOfRows(size_t atMost) const {
|
||||
|
|
|
@ -229,7 +229,8 @@ class OutputAqlItemRow {
|
|||
#endif
|
||||
_baseIndex = index;
|
||||
}
|
||||
// Use this function with caution! We need it only for the SortedCollectExecutor
|
||||
// Use this function with caution! We need it for the SortedCollectExecutor
|
||||
// and CountCollectExecutor.
|
||||
void setAllowSourceRowUninitialized() {
|
||||
_allowSourceRowUninitialized = true;
|
||||
}
|
||||
|
@ -305,7 +306,8 @@ class OutputAqlItemRow {
|
|||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
bool _setBaseIndexNotUsed;
|
||||
#endif
|
||||
// need this special bool for allowing an empty AqlValue inside the SortedCollectExecutor
|
||||
// Need this special bool for allowing an empty AqlValue inside the
|
||||
// SortedCollectExecutor and CountCollectExecutor.
|
||||
bool _allowSourceRowUninitialized;
|
||||
|
||||
private:
|
||||
|
|
|
@ -74,6 +74,8 @@ TEST_F(CountCollectExecutorTest, there_are_no_rows_upstream_the_producer_doesnt_
|
|||
AqlValue x = block->getValue(0, 1);
|
||||
ASSERT_TRUE(x.isNumber());
|
||||
ASSERT_TRUE(x.toInt64() == 0);
|
||||
|
||||
ASSERT_EQ(0, fetcher.totalSkipped());
|
||||
}
|
||||
|
||||
TEST_F(CountCollectExecutorTest, there_are_now_rows_upstream_the_producer_waits) {
|
||||
|
@ -97,6 +99,8 @@ TEST_F(CountCollectExecutorTest, there_are_now_rows_upstream_the_producer_waits)
|
|||
AqlValue x = block->getValue(0, 1);
|
||||
ASSERT_TRUE(x.isNumber());
|
||||
ASSERT_TRUE(x.toInt64() == 0);
|
||||
|
||||
ASSERT_EQ(0, fetcher.totalSkipped());
|
||||
}
|
||||
|
||||
TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_doesnt_wait) {
|
||||
|
@ -116,6 +120,8 @@ TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_doe
|
|||
AqlValue x = block->getValue(0, 1);
|
||||
ASSERT_TRUE(x.isNumber());
|
||||
ASSERT_TRUE(x.toInt64() == 3);
|
||||
|
||||
ASSERT_EQ(3, fetcher.totalSkipped());
|
||||
}
|
||||
|
||||
TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_waits) {
|
||||
|
@ -147,6 +153,8 @@ TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_wai
|
|||
AqlValue x = block->getValue(0, 1);
|
||||
ASSERT_TRUE(x.isNumber());
|
||||
ASSERT_TRUE(x.toInt64() == 3);
|
||||
|
||||
ASSERT_EQ(3, fetcher.totalSkipped());
|
||||
}
|
||||
|
||||
} // namespace aql
|
||||
|
|
|
@ -120,12 +120,14 @@ std::pair<ExecutionState, size_t> SingleRowFetcherHelper<passBlocksThrough>::ski
|
|||
if (state == ExecutionState::DONE) {
|
||||
size_t skipped = _skipped;
|
||||
_skipped = 0;
|
||||
_totalSkipped += skipped;
|
||||
return {state, skipped};
|
||||
}
|
||||
}
|
||||
|
||||
size_t skipped = _skipped;
|
||||
_skipped = 0;
|
||||
_totalSkipped += skipped;
|
||||
return {state, skipped};
|
||||
}
|
||||
|
||||
|
|
|
@ -77,6 +77,8 @@ class SingleRowFetcherHelper
|
|||
|
||||
uint64_t nrCalled() { return _nrCalled; }
|
||||
|
||||
size_t totalSkipped() const { return _totalSkipped; }
|
||||
|
||||
std::pair<arangodb::aql::ExecutionState, size_t> skipRows(size_t atMost) override;
|
||||
|
||||
std::pair<arangodb::aql::ExecutionState, arangodb::aql::SharedAqlItemBlockPtr> fetchBlockForPassthrough(
|
||||
|
@ -113,6 +115,7 @@ class SingleRowFetcherHelper
|
|||
uint64_t _nrItems;
|
||||
uint64_t _nrCalled{};
|
||||
size_t _skipped{};
|
||||
size_t _totalSkipped{};
|
||||
size_t _curIndexInBlock{};
|
||||
size_t _curRowIndex{};
|
||||
size_t _blockSize;
|
||||
|
|
|
@ -381,13 +381,43 @@ function optimizerCountTestSuite () {
|
|||
}
|
||||
},
|
||||
|
||||
testCollectAggregateUndefined: function () {
|
||||
var randomDocumentID = db["UnitTestsCollection"].any()._id;
|
||||
var query = 'LET start = DOCUMENT("' + randomDocumentID + '")._key for i in [] collect aggregate count = count(i) return {count, start}';
|
||||
var bindParams = {};
|
||||
var options = {optimizer: {rules: ['-remove-unnecessary-calculations','-remove-unnecessary-calculations-2']}};
|
||||
// This test is not necessary for hashed collect, as hashed collect will not
|
||||
// be used without group variables.
|
||||
testCollectSortedUndefined: function () {
|
||||
const randomDocumentID = db["UnitTestsCollection"].any()._id;
|
||||
const query = 'LET start = DOCUMENT("' + randomDocumentID + '")._key FOR i IN [] COLLECT AGGREGATE count = count(i) RETURN {count, start}';
|
||||
const bindParams = {};
|
||||
const options = {optimizer: {rules: ['-remove-unnecessary-calculations','-remove-unnecessary-calculations-2']}};
|
||||
|
||||
var results = db._query(query, bindParams, options).toArray();
|
||||
const planNodes = AQL_EXPLAIN(query, {}, options).plan.nodes;
|
||||
assertEqual(
|
||||
[ "SingletonNode", "CalculationNode", "CalculationNode",
|
||||
"EnumerateListNode", "CollectNode", "CalculationNode", "ReturnNode" ],
|
||||
planNodes.map(n => n.type));
|
||||
assertEqual("sorted", planNodes[4].collectOptions.method);
|
||||
|
||||
const results = db._query(query, bindParams, options).toArray();
|
||||
// expectation is that we exactly get one result
|
||||
// count will be 0, start will be undefined
|
||||
assertEqual(1, results.length);
|
||||
assertEqual(0, results[0].count);
|
||||
assertEqual(undefined, results[0].start);
|
||||
},
|
||||
|
||||
testCollectCountUndefined: function () {
|
||||
const randomDocumentID = db["UnitTestsCollection"].any()._id;
|
||||
const query = 'LET start = DOCUMENT("' + randomDocumentID + '")._key FOR i IN [] COLLECT WITH COUNT INTO count RETURN {count, start}';
|
||||
const bindParams = {};
|
||||
const options = {optimizer: {rules: ['-remove-unnecessary-calculations','-remove-unnecessary-calculations-2']}};
|
||||
|
||||
const planNodes = AQL_EXPLAIN(query, {}, options).plan.nodes;
|
||||
assertEqual(
|
||||
[ "SingletonNode", "CalculationNode", "CalculationNode",
|
||||
"EnumerateListNode", "CollectNode", "CalculationNode", "ReturnNode" ],
|
||||
planNodes.map(n => n.type));
|
||||
assertEqual("count", planNodes[4].collectOptions.method);
|
||||
|
||||
const results = db._query(query, bindParams, options).toArray();
|
||||
// expectation is that we exactly get one result
|
||||
// count will be 0, start will be undefined
|
||||
assertEqual(1, results.length);
|
||||
|
|
Loading…
Reference in New Issue