Bug fix 3.5/count collect should skip (#9512)

* Make CountCollectExecutor skip rather than fetch * Cleanup, fix and test for top-level variables * Added CHANGELOG entry and tests
2019-07-18 23:47:27 +02:00 · 2019-07-18 23:47:27 +02:00 · 7e6b6fbed7
parent 47d7f9e3e2
commit 7e6b6fbed7
7 changed files with 74 additions and 31 deletions
--- a/2
+++ b/2
@ -1,6 +1,8 @@
 v3.5.0-rc.5 (2019-XX-XX)
 ------------------------

+* Fixed a performance regression of COLLECT WITH COUNT INTO.
+
 * Fixed some races in cluster collection creation, which allowed collections with the
  same name to be created in parallel under some rare conditions.
 
--- a/arangod/Aql/CountCollectExecutor.h
+++ b/arangod/Aql/CountCollectExecutor.h
@ -96,40 +96,36 @@ class CountCollectExecutor {
    TRI_IF_FAILURE("CountCollectExecutor::produceRows") {
      THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG);
    }
-    NoStats stats{};
-    InputAqlItemRow input{CreateInvalidInputRowHint{}};

    if (_state == ExecutionState::DONE) {
-      return {_state, stats};
+      return {_state, NoStats{}};
    }

-    while (true) {
-      std::tie(_state, input) = _fetcher.fetchRow();
+    while (_state != ExecutionState::DONE) {
+      size_t skipped;
+      std::tie(_state, skipped) = _fetcher.skipRows(ExecutionBlock::SkipAllSize());

      if (_state == ExecutionState::WAITING) {
-        return {_state, stats};
+        TRI_ASSERT(skipped == 0);
+        return {_state, NoStats{}};
      }

-      if (!input) {
-        TRI_ASSERT(_state == ExecutionState::DONE);
-        output.cloneValueInto(_infos.getOutputRegisterId(), input,
-                              AqlValue(AqlValueHintUInt(getCount())));
-        return {_state, stats};
-      }
-
-      TRI_ASSERT(input.isInitialized());
-      incrCount();
-
-      // Abort if upstream is done
-      if (_state == ExecutionState::DONE) {
-        output.cloneValueInto(_infos.getOutputRegisterId(), input,
-                              AqlValue(AqlValueHintUInt(getCount())));
-        return {_state, stats};
-      }
+      TRI_ASSERT(skipped != 0 || _state == ExecutionState::DONE);
+      incrCountBy(skipped);
    }
+
+    // In general, we do not have an input row. In fact, we never fetch one.
+    output.setAllowSourceRowUninitialized();
+
+    // We must produce exactly one output row.
+    output.cloneValueInto(_infos.getOutputRegisterId(),
+                          InputAqlItemRow{CreateInvalidInputRowHint{}},
+                          AqlValue(AqlValueHintUInt(getCount())));
+
+    return {_state, NoStats{}};
  }

-  void incrCount() noexcept { _count++; };
+  void incrCountBy(size_t incr) noexcept { _count += incr; };
  uint64_t getCount() noexcept { return _count; };

  inline std::pair<ExecutionState, size_t> expectedNumberOfRows(size_t atMost) const {
--- a/arangod/Aql/OutputAqlItemRow.h
+++ b/arangod/Aql/OutputAqlItemRow.h
@ -229,7 +229,8 @@ class OutputAqlItemRow {
 #endif
    _baseIndex = index;
  }
-  // Use this function with caution! We need it only for the SortedCollectExecutor
+  // Use this function with caution! We need it for the SortedCollectExecutor
+  // and CountCollectExecutor.
  void setAllowSourceRowUninitialized() {
    _allowSourceRowUninitialized = true;
  }
@ -305,7 +306,8 @@ class OutputAqlItemRow {
 #ifdef ARANGODB_ENABLE_MAINTAINER_MODE
  bool _setBaseIndexNotUsed;
 #endif
-  // need this special bool for allowing an empty AqlValue inside the SortedCollectExecutor
+  // Need this special bool for allowing an empty AqlValue inside the
+  // SortedCollectExecutor and CountCollectExecutor.
  bool _allowSourceRowUninitialized;

 private:
--- a/tests/Aql/CountCollectExecutorTest.cpp
+++ b/tests/Aql/CountCollectExecutorTest.cpp
@ -74,6 +74,8 @@ TEST_F(CountCollectExecutorTest, there_are_no_rows_upstream_the_producer_doesnt_
  AqlValue x = block->getValue(0, 1);
  ASSERT_TRUE(x.isNumber());
  ASSERT_TRUE(x.toInt64() == 0);
+
+  ASSERT_EQ(0, fetcher.totalSkipped());
 }

 TEST_F(CountCollectExecutorTest, there_are_now_rows_upstream_the_producer_waits) {
@ -97,6 +99,8 @@ TEST_F(CountCollectExecutorTest, there_are_now_rows_upstream_the_producer_waits)
  AqlValue x = block->getValue(0, 1);
  ASSERT_TRUE(x.isNumber());
  ASSERT_TRUE(x.toInt64() == 0);
+
+  ASSERT_EQ(0, fetcher.totalSkipped());
 }

 TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_doesnt_wait) {
@ -116,6 +120,8 @@ TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_doe
  AqlValue x = block->getValue(0, 1);
  ASSERT_TRUE(x.isNumber());
  ASSERT_TRUE(x.toInt64() == 3);
+
+  ASSERT_EQ(3, fetcher.totalSkipped());
 }

 TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_waits) {
@ -147,6 +153,8 @@ TEST_F(CountCollectExecutorTest, there_are_rows_in_the_upstream_the_producer_wai
  AqlValue x = block->getValue(0, 1);
  ASSERT_TRUE(x.isNumber());
  ASSERT_TRUE(x.toInt64() == 3);
+
+  ASSERT_EQ(3, fetcher.totalSkipped());
 }

 }  // namespace aql
--- a/tests/Aql/RowFetcherHelper.cpp
+++ b/tests/Aql/RowFetcherHelper.cpp
@ -120,12 +120,14 @@ std::pair<ExecutionState, size_t> SingleRowFetcherHelper<passBlocksThrough>::ski
    if (state == ExecutionState::DONE) {
      size_t skipped = _skipped;
      _skipped = 0;
+      _totalSkipped += skipped;
      return {state, skipped};
    }
  }

  size_t skipped = _skipped;
  _skipped = 0;
+  _totalSkipped += skipped;
  return {state, skipped};
 }

--- a/tests/Aql/RowFetcherHelper.h
+++ b/tests/Aql/RowFetcherHelper.h
@ -77,6 +77,8 @@ class SingleRowFetcherHelper

  uint64_t nrCalled() { return _nrCalled; }

+  size_t totalSkipped() const { return _totalSkipped; }
+
  std::pair<arangodb::aql::ExecutionState, size_t> skipRows(size_t atMost) override;

  std::pair<arangodb::aql::ExecutionState, arangodb::aql::SharedAqlItemBlockPtr> fetchBlockForPassthrough(
@ -113,6 +115,7 @@ class SingleRowFetcherHelper
  uint64_t _nrItems;
  uint64_t _nrCalled{};
  size_t _skipped{};
+  size_t _totalSkipped{};
  size_t _curIndexInBlock{};
  size_t _curRowIndex{};
  size_t _blockSize;
--- a/tests/js/server/aql/aql-optimizer-collect-count.js
+++ b/tests/js/server/aql/aql-optimizer-collect-count.js
@ -381,13 +381,43 @@ function optimizerCountTestSuite () {
      }
    },

-    testCollectAggregateUndefined: function () {
-      var randomDocumentID = db["UnitTestsCollection"].any()._id;
-      var query = 'LET start = DOCUMENT("' + randomDocumentID + '")._key for i in [] collect aggregate count = count(i) return {count, start}';
-      var bindParams = {};
-      var options = {optimizer: {rules: ['-remove-unnecessary-calculations','-remove-unnecessary-calculations-2']}};
+    // This test is not necessary for hashed collect, as hashed collect will not
+    // be used without group variables.
+    testCollectSortedUndefined: function () {
+      const randomDocumentID = db["UnitTestsCollection"].any()._id;
+      const query = 'LET start = DOCUMENT("' + randomDocumentID + '")._key FOR i IN [] COLLECT AGGREGATE count = count(i) RETURN {count, start}';
+      const bindParams = {};
+      const options = {optimizer: {rules: ['-remove-unnecessary-calculations','-remove-unnecessary-calculations-2']}};

-      var results = db._query(query, bindParams, options).toArray();
+      const planNodes = AQL_EXPLAIN(query, {}, options).plan.nodes;
+      assertEqual(
+        [ "SingletonNode", "CalculationNode", "CalculationNode",
+          "EnumerateListNode", "CollectNode", "CalculationNode", "ReturnNode" ],
+        planNodes.map(n => n.type));
+      assertEqual("sorted", planNodes[4].collectOptions.method);
+
+      const results = db._query(query, bindParams, options).toArray();
+      // expectation is that we exactly get one result
+      // count will be 0, start will be undefined
+      assertEqual(1, results.length);
+      assertEqual(0, results[0].count);
+      assertEqual(undefined, results[0].start);
+    },
+
+    testCollectCountUndefined: function () {
+      const randomDocumentID = db["UnitTestsCollection"].any()._id;
+      const query = 'LET start = DOCUMENT("' + randomDocumentID + '")._key FOR i IN [] COLLECT WITH COUNT INTO count RETURN {count, start}';
+      const bindParams = {};
+      const options = {optimizer: {rules: ['-remove-unnecessary-calculations','-remove-unnecessary-calculations-2']}};
+
+      const planNodes = AQL_EXPLAIN(query, {}, options).plan.nodes;
+      assertEqual(
+        [ "SingletonNode", "CalculationNode", "CalculationNode",
+          "EnumerateListNode", "CollectNode", "CalculationNode", "ReturnNode" ],
+        planNodes.map(n => n.type));
+      assertEqual("count", planNodes[4].collectOptions.method);
+
+      const results = db._query(query, bindParams, options).toArray();
      // expectation is that we exactly get one result
      // count will be 0, start will be undefined
      assertEqual(1, results.length);