From aeba4bc2c6ac7e48adb3d8750bd0b244e3065110 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= <tobias@arangodb.com>
Date: Fri, 13 Sep 2019 18:10:43 +0200
Subject: [PATCH] Additional sort-limit tests (#10010) (#10011)

---
 arangod/Aql/ConstrainedSortExecutor.cpp       |   4 +-
 .../@arangodb/aql-profiler-test-helper.js     |  39 +++--
 tests/js/server/aql/aql-profiler.js           | 146 +++++++++++++++---
 .../aql/aql-queries-optimizer-sort-limit.js   |  55 ++++++-
 4 files changed, 208 insertions(+), 36 deletions(-)

diff --git a/arangod/Aql/ConstrainedSortExecutor.cpp b/arangod/Aql/ConstrainedSortExecutor.cpp
index 11c87b63de..b5276c5cf9 100644
--- a/arangod/Aql/ConstrainedSortExecutor.cpp
+++ b/arangod/Aql/ConstrainedSortExecutor.cpp
@@ -209,7 +209,9 @@ std::pair<ExecutionState, NoStats> ConstrainedSortExecutor::produceRows(OutputAq
       return {ExecutionState::DONE, NoStats{}};
     }
     // We should never get here, as the following LIMIT block should never fetch
-    // more than our limit. He may only skip after that. Thus:
+    // more than our limit. It may only skip after that.
+    // But note that this means that this block breaks with usual AQL behaviour!
+    // From this point on (i.e. doneProducing()), this block may only skip, not produce.
     TRI_ASSERT(false);
     THROW_ARANGO_EXCEPTION_MESSAGE(
         TRI_ERROR_INTERNAL_AQL,
diff --git a/js/server/modules/@arangodb/aql-profiler-test-helper.js b/js/server/modules/@arangodb/aql-profiler-test-helper.js
index 3e69685624..0768b094e5 100644
--- a/js/server/modules/@arangodb/aql-profiler-test-helper.js
+++ b/js/server/modules/@arangodb/aql-profiler-test-helper.js
@@ -80,6 +80,7 @@ const nodeTypesList = [
 ];
 
 const CalculationBlock = 'CalculationNode';
+const ConstrainedSortBlock = 'SortLimitNode';
 const CountCollectBlock = 'CountCollectNode';
 const DistinctCollectBlock = 'DistinctCollectNode';
 const EnumerateCollectionBlock = 'EnumerateCollectionNode';
@@ -111,7 +112,7 @@ const IResearchViewBlock = 'IResearchViewNode';
 const IResearchViewOrderedBlock = 'IResearchOrderedViewNode';
 
 const blockTypesList = [
-  CalculationBlock, CountCollectBlock, DistinctCollectBlock,
+  CalculationBlock, ConstrainedSortBlock, CountCollectBlock, DistinctCollectBlock,
   EnumerateCollectionBlock, EnumerateListBlock, FilterBlock,
   HashedCollectBlock, IndexBlock, LimitBlock, NoResultsBlock, RemoteBlock,
   ReturnBlock, ShortestPathBlock, SingletonBlock, SortBlock,
@@ -141,6 +142,14 @@ let translateType = function(nodes, node) {
       } else {
         type = 'UnsortingGatherNode';
       }
+    } else if (node.type === 'SortNode') {
+      if (node.strategy === 'standard') {
+        type = 'SortNode';
+      } else if (node.strategy === 'constrained-heap') {
+        type = 'SortLimitNode';
+      } else {
+        throw new Error('Unhandled sort strategy');
+      }
     }
     types[node.id] = type;
   });
@@ -236,7 +245,7 @@ function getStatsNodesWithId (profile) {
 /// @brief assert structure of profile.stats
 ////////////////////////////////////////////////////////////////////////////////
 
-function assertIsProfileStatsObject (stats, {level}) {
+function assertIsProfileStatsObject (stats, {level, fullCount}) {
   // internal argument check
   expect(level)
     .to.be.a('number')
@@ -259,6 +268,10 @@ function assertIsProfileStatsObject (stats, {level}) {
     statsKeys.push('nodes');
   }
 
+  if (fullCount) {
+    statsKeys.push('fullCount');
+  }
+
   expect(stats).to.have.all.keys(statsKeys);
 
   // check types
@@ -270,6 +283,9 @@ function assertIsProfileStatsObject (stats, {level}) {
   expect(stats.httpRequests).to.be.a('number');
   expect(stats.peakMemoryUsage).to.be.a('number');
   expect(stats.executionTime).to.be.a('number');
+  if (fullCount) {
+    expect(stats.fullCount).to.be.a('number');
+  }
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -377,7 +393,7 @@ function assertIsProfilePlanObject (plan) {
 /// @brief assert that the passed variable looks like a level 0 profile
 ////////////////////////////////////////////////////////////////////////////////
 
-function assertIsLevel0Profile (profile) {
+function assertIsLevel0Profile (profile, {fullCount} = {}) {
   expect(profile)
     .to.be.an('object')
     .that.has.all.keys([
@@ -385,7 +401,7 @@ function assertIsLevel0Profile (profile) {
     'warnings',
   ]);
 
-  assertIsProfileStatsObject(profile.stats, {level: 0});
+  assertIsProfileStatsObject(profile.stats, {level: 0, fullCount});
   assertIsProfileWarningsArray(profile.warnings);
 }
 
@@ -393,7 +409,7 @@ function assertIsLevel0Profile (profile) {
 /// @brief assert that the passed variable looks like a level 1 profile
 ////////////////////////////////////////////////////////////////////////////////
 
-function assertIsLevel1Profile (profile) {
+function assertIsLevel1Profile (profile, {fullCount} = {}) {
   expect(profile)
     .to.be.an('object')
     .that.has.all.keys([
@@ -402,7 +418,7 @@ function assertIsLevel1Profile (profile) {
     'profile',
   ]);
 
-  assertIsProfileStatsObject(profile.stats, {level: 1});
+  assertIsProfileStatsObject(profile.stats, {level: 1, fullCount});
   assertIsProfileWarningsArray(profile.warnings);
   assertIsProfileProfileObject(profile.profile);
 }
@@ -411,7 +427,7 @@ function assertIsLevel1Profile (profile) {
 /// @brief assert that the passed variable looks like a level 2 profile
 ////////////////////////////////////////////////////////////////////////////////
 
-function assertIsLevel2Profile (profile) {
+function assertIsLevel2Profile (profile, {fullCount} = {}) {
   expect(profile)
     .to.be.an('object')
     .that.has.all.keys([
@@ -421,7 +437,7 @@ function assertIsLevel2Profile (profile) {
     'plan',
   ]);
 
-  assertIsProfileStatsObject(profile.stats, {level: 2});
+  assertIsProfileStatsObject(profile.stats, {level: 2, fullCount});
   assertIsProfileWarningsArray(profile.warnings);
   assertIsProfileProfileObject(profile.profile);
   assertIsProfilePlanObject(profile.plan);
@@ -505,17 +521,19 @@ function runDefaultChecks (
     prepare = () => {},
     bind = rows => ({rows}),
     options = {},
+    testRowCounts = defaultTestRowCounts,
     additionalTestRowCounts = [],
   }
 ) {
-  const testRowCounts = _.uniq(defaultTestRowCounts.concat(additionalTestRowCounts).sort());
+  const {fullCount} = options;
+  testRowCounts = _.uniq(testRowCounts.concat(additionalTestRowCounts).sort());
   for (const rows of testRowCounts) {
     prepare(rows);
     const profile = db._query(query, bind(rows),
       _.merge(options, {profile: 2, defaultBatchSize})
     ).getExtra();
 
-    assertIsLevel2Profile(profile);
+    assertIsLevel2Profile(profile, {fullCount});
     assertStatsNodesMatchPlanNodes(profile);
 
     const batches = Math.ceil(rows / defaultBatchSize);
@@ -721,6 +739,7 @@ exports.UpdateNode = UpdateNode;
 exports.UpsertNode = UpsertNode;
 exports.nodeTypesList = nodeTypesList;
 exports.CalculationBlock = CalculationBlock;
+exports.ConstrainedSortBlock = ConstrainedSortBlock;
 exports.CountCollectBlock = CountCollectBlock;
 exports.DistinctCollectBlock = DistinctCollectBlock;
 exports.EnumerateCollectionBlock = EnumerateCollectionBlock;
diff --git a/tests/js/server/aql/aql-profiler.js b/tests/js/server/aql/aql-profiler.js
index a14329fd13..f7e9829686 100644
--- a/tests/js/server/aql/aql-profiler.js
+++ b/tests/js/server/aql/aql-profiler.js
@@ -32,6 +32,7 @@ const profHelper = require("@arangodb/aql-profiler-test-helper");
 const db = require('@arangodb').db;
 const jsunity = require("jsunity");
 const assert = jsunity.jsUnity.assertions;
+const _ = require('lodash');
 
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -65,6 +66,7 @@ function ahuacatlProfilerTestSuite () {
 
   // import some names from profHelper directly into our namespace:
   const defaultBatchSize = profHelper.defaultBatchSize;
+  const defaultTestRowCounts = profHelper.defaultTestRowCounts;
 
   const { CalculationNode, CollectNode, DistributeNode, EnumerateCollectionNode,
     EnumerateListNode, EnumerateViewNode, FilterNode, GatherNode, IndexNode,
@@ -72,7 +74,7 @@ function ahuacatlProfilerTestSuite () {
     ReturnNode, ScatterNode, ShortestPathNode, SingletonNode, SortNode,
     SubqueryNode, TraversalNode, UpdateNode, UpsertNode } = profHelper;
 
-  const { CalculationBlock, CountCollectBlock, DistinctCollectBlock,
+  const { CalculationBlock, ConstrainedSortBlock, CountCollectBlock, DistinctCollectBlock,
     EnumerateCollectionBlock, EnumerateListBlock, FilterBlock,
     HashedCollectBlock, IndexBlock, LimitBlock, NoResultsBlock, RemoteBlock,
     ReturnBlock, ShortestPathBlock, SingletonBlock, SortBlock,
@@ -81,40 +83,84 @@ function ahuacatlProfilerTestSuite () {
     UpsertBlock, ScatterBlock, DistributeBlock, IResearchViewUnorderedBlock,
     IResearchViewBlock, IResearchViewOrderedBlock } = profHelper;
 
-  // See the limit tests (e.g. testLimitBlock3) for limit() and skip().
+  // See the limit tests (e.g. testLimitBlock3) for limit() and offset().
   const additionalLimitTestRowCounts = [
     // limit() = 1000 ± 1:
     1332, 1333, 1334,
-    // skip() = 1000 ± 1:
+    // offset() = 1000 ± 1:
     3999, 4000, 4003, 4004,
     // limit() = 2000 ± 1:
     2665, 2666, 2667,
-    // skip() = 2000 ± 1:
+    // offset() = 2000 ± 1:
     7999, 8000, 8003, 8004,
   ];
 
-  {
-    // These are copies from testLimitBlock3.
-    const skip = rows => Math.floor(rows/4);
-    const limit = rows => Math.ceil(3*rows/4);
+  const offset = rows => Math.floor(rows/4);
+  const limit = rows => Math.ceil(3*rows/4);
+  const offsetBatches = rows => Math.ceil(offset(rows) / defaultBatchSize);
+  const skipOffsetBatches = rows => Math.ceil(offset(rows) === 0 ? 0 : 1);
+  const limitBatches = rows => Math.ceil(limit(rows) / defaultBatchSize);
 
+  {
     // This is more documentation than anything else:
     assert.assertEqual(999, limit(1332));
     assert.assertEqual(1000, limit(1333));
     assert.assertEqual(1001, limit(1334));
-    assert.assertEqual(999, skip(3999));
-    assert.assertEqual(1000, skip(4000));
-    assert.assertEqual(1000, skip(4003));
-    assert.assertEqual(1001, skip(4004));
+    assert.assertEqual(999, offset(3999));
+    assert.assertEqual(1000, offset(4000));
+    assert.assertEqual(1000, offset(4003));
+    assert.assertEqual(1001, offset(4004));
     assert.assertEqual(1999, limit(2665));
     assert.assertEqual(2000, limit(2666));
     assert.assertEqual(2001, limit(2667));
-    assert.assertEqual(1999, skip(7999));
-    assert.assertEqual(2000, skip(8000));
-    assert.assertEqual(2000, skip(8003));
-    assert.assertEqual(2001, skip(8004));
+    assert.assertEqual(1999, offset(7999));
+    assert.assertEqual(2000, offset(8000));
+    assert.assertEqual(2000, offset(8003));
+    assert.assertEqual(2001, offset(8004));
   }
 
+  // This is the decision made by the sort-limit optimizer rule:
+  const usesHeapSort = rows => {
+    const n = rows;
+    const m = limit(rows);
+    return rows >= 100 && 0.25 * n * Math.log2(m) + m * Math.log2(m) < n * Math.log2(n);
+  };
+  // // Filter out row counts that would use the standard sort strategy
+  // const sortLimitTestRowCounts = _.uniq(defaultTestRowCounts.concat(additionalLimitTestRowCounts).sort())
+  //   .filter(usesHeapSort);
+  const sortLimitTestRowCounts =
+    // defaults, minus those < 100:
+    [100, 999, 1000, 1001, 1500, 2000, 10500]
+      .concat([
+        // limit() - offset() = 1000 ± 1:
+      1995, 1997, 1998, 2000, 1999, 2001,
+        // limit() - offset() = 2000 ± 1:
+      3995, 3997, 3998, 4000, 3999, 4001
+    ]);
+  const limitMinusSkip = rows => limit(rows) - offset(rows);
+  const limitMinusSkipBatches = rows => Math.ceil(limitMinusSkip(rows) / defaultBatchSize);
+  for (const rows of sortLimitTestRowCounts) {
+    assert.assertTrue(usesHeapSort(rows),
+      `Test row count would not trigger sort-limit rule: ${rows}`);
+  }
+  {
+    // Documentation of the expected proportions. These are a little wonky due to the rounding,
+    // but that's fine for the purpose.
+    assert.assertEqual(999, limitMinusSkip(1995));
+    assert.assertEqual(999, limitMinusSkip(1997));
+    assert.assertEqual(1000, limitMinusSkip(1998));
+    assert.assertEqual(1000, limitMinusSkip(2000));
+    assert.assertEqual(1001, limitMinusSkip(1999));
+    assert.assertEqual(1001, limitMinusSkip(2001));
+    assert.assertEqual(1999, limitMinusSkip(3995));
+    assert.assertEqual(1999, limitMinusSkip(3997));
+    assert.assertEqual(2000, limitMinusSkip(3998));
+    assert.assertEqual(2000, limitMinusSkip(4000));
+    assert.assertEqual(2001, limitMinusSkip(3999));
+    assert.assertEqual(2001, limitMinusSkip(4001));
+  }
+
+
   return {
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -140,10 +186,12 @@ function ahuacatlProfilerTestSuite () {
       const profileDefault = db._query(query, {}).getExtra();
       const profile0 = db._query(query, {}, {profile: 0}).getExtra();
       const profileFalse = db._query(query, {}, {profile: false}).getExtra();
+      const profile0WithFullCount = db._query(query, {}, {profile: 0, fullCount: true}).getExtra();
 
       profHelper.assertIsLevel0Profile(profileDefault);
       profHelper.assertIsLevel0Profile(profile0);
       profHelper.assertIsLevel0Profile(profileFalse);
+      profHelper.assertIsLevel0Profile(profile0WithFullCount, {fullCount: true});
     },
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -154,9 +202,11 @@ function ahuacatlProfilerTestSuite () {
       const query = 'RETURN 1';
       const profile1 = db._query(query, {}, {profile: 1}).getExtra();
       const profileTrue = db._query(query, {}, {profile: true}).getExtra();
+      const profile1WithFullCount = db._query(query, {}, {profile: 1, fullCount: true}).getExtra();
 
       profHelper.assertIsLevel1Profile(profile1);
       profHelper.assertIsLevel1Profile(profileTrue);
+      profHelper.assertIsLevel1Profile(profile1WithFullCount, {fullCount: true});
     },
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -166,9 +216,12 @@ function ahuacatlProfilerTestSuite () {
     testProfile2Fields : function () {
       const query = 'RETURN 1';
       const profile2 = db._query(query, {}, {profile: 2}).getExtra();
+      const profile2WithFullCount = db._query(query, {}, {profile: 2, fullCount: true}).getExtra();
 
       profHelper.assertIsLevel2Profile(profile2);
       profHelper.assertStatsNodesMatchPlanNodes(profile2);
+      profHelper.assertIsLevel2Profile(profile2WithFullCount, {fullCount: true});
+      profHelper.assertStatsNodesMatchPlanNodes(profile2WithFullCount);
     },
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -460,22 +513,18 @@ function ahuacatlProfilerTestSuite () {
     ////////////////////////////////////////////////////////////////////////////////
 
     testLimitBlock3: function() {
-      const query = 'FOR i IN 1..@rows LIMIT @skip, @limit RETURN i';
-      const skip = rows => Math.floor(rows/4);
-      const skipBatches = rows => Math.ceil(skip(rows) / defaultBatchSize);
-      const limit = rows => Math.ceil(3*rows/4);
-      const limitBatches = rows => Math.ceil(limit(rows) / defaultBatchSize);
+      const query = 'FOR i IN 1..@rows LIMIT @offset, @limit RETURN i';
 
       const genNodeList = (rows, batches) => [
         {type: SingletonBlock, calls: 1, items: 1},
         {type: CalculationBlock, calls: 1, items: 1},
-        {type: EnumerateListBlock, calls: limitBatches(rows) + skipBatches(rows), items: limit(rows) + skip(rows)},
+        {type: EnumerateListBlock, calls: limitBatches(rows) + offsetBatches(rows), items: limit(rows) + offset(rows)},
         {type: LimitBlock, calls: limitBatches(rows), items: limit(rows)},
         {type: ReturnBlock, calls: limitBatches(rows), items: limit(rows)},
       ];
       const bind = (rows) => ({
         rows,
-        skip: skip(rows),
+        offset: offset(rows),
         limit: limit(rows),
       });
       const additionalTestRowCounts = additionalLimitTestRowCounts;
@@ -585,6 +634,57 @@ function ahuacatlProfilerTestSuite () {
       profHelper.runDefaultChecks({query, genNodeList, bind});
     },
 
+////////////////////////////////////////////////////////////////////////////////
+/// @brief test SortLimitBlock
+////////////////////////////////////////////////////////////////////////////////
+
+    testSortLimitBlock1 : function () {
+      const query = 'FOR i IN 1..@rows SORT i DESC LIMIT @offset, @limit RETURN i';
+      const genNodeList = (rows, batches) => [
+        { type : SingletonBlock, calls : 1, items : 1 },
+        { type : CalculationBlock, calls : 1, items : 1 },
+        { type : EnumerateListBlock, calls : batches, items : rows },
+        { type : ConstrainedSortBlock, calls : skipOffsetBatches(rows) + limitMinusSkipBatches(rows), items : limit(rows) },
+        { type : LimitBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) },
+        { type : ReturnBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) }
+      ];
+      const bind = rows => ({
+        rows,
+        // ~1/4 of rows:
+        offset: offset(rows),
+        // ~1/2 of rows:
+        limit: limitMinusSkip(rows),
+      });
+      profHelper.runDefaultChecks({query, genNodeList, bind, testRowCounts: sortLimitTestRowCounts});
+    },
+
+////////////////////////////////////////////////////////////////////////////////
+/// @brief test SortLimitBlock
+/// with fullCount
+////////////////////////////////////////////////////////////////////////////////
+
+    testSortLimitBlock2 : function () {
+      const query = 'FOR i IN 1..@rows SORT i DESC LIMIT @offset, @limit RETURN i';
+      const remainder = rows => rows - limit(rows);
+      const remainderBatches = rows => remainder(rows) === 0 ? 0 : 1;
+      const genNodeList = (rows, batches) => [
+        { type : SingletonBlock, calls : 1, items : 1 },
+        { type : CalculationBlock, calls : 1, items : 1 },
+        { type : EnumerateListBlock, calls : batches, items : rows },
+        { type : ConstrainedSortBlock, calls : skipOffsetBatches(rows) + limitMinusSkipBatches(rows) + remainderBatches(rows), items : rows },
+        { type : LimitBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) },
+        { type : ReturnBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) }
+      ];
+      const bind = rows => ({
+        rows,
+        // ~1/4 of rows:
+        offset: offset(rows),
+        // ~1/2 of rows:
+        limit: limitMinusSkip(rows),
+      });
+      profHelper.runDefaultChecks({query, genNodeList, bind, testRowCounts: sortLimitTestRowCounts, options: {fullCount: true}});
+    },
+
     ////////////////////////////////////////////////////////////////////////////////
     /// @brief test SortedCollectBlock
     ////////////////////////////////////////////////////////////////////////////////
diff --git a/tests/js/server/aql/aql-queries-optimizer-sort-limit.js b/tests/js/server/aql/aql-queries-optimizer-sort-limit.js
index c85710e0a8..a1164ba65f 100644
--- a/tests/js/server/aql/aql-queries-optimizer-sort-limit.js
+++ b/tests/js/server/aql/aql-queries-optimizer-sort-limit.js
@@ -308,8 +308,8 @@ function ahuacatlQueryOptimizerLimitTestSuite () {
 /// fullCount when 3.5 was released.
 ////////////////////////////////////////////////////////////////////////////////
 
-    testLimitFullCollectionSortWithFullCount : function () {
-      const query = "FOR c IN " + cn + " SORT c.value LIMIT 20, 10 RETURN c";
+    testLimitFullCollectionSortWithFullCount: function () {
+      const query = `FOR c IN ${cn} SORT c.value LIMIT 20, 10 RETURN c`;
 
       const queryResult = AQL_EXECUTE(query, {}, {fullCount: true});
 
@@ -331,6 +331,57 @@ function ahuacatlQueryOptimizerLimitTestSuite () {
       assertEqual(sorts[0].strategy, "constrained-heap");
     },
 
+////////////////////////////////////////////////////////////////////////////////
+/// @brief check limit optimization with sort and fullCount
+/// Here, there are fewer rows to emit than the limit asks for.
+////////////////////////////////////////////////////////////////////////////////
+
+    testLimitFullCollectionSortWithFullCountAndFewRows: function () {
+      const query = `FOR c IN ${cn} FILTER c.value < 30 SORT c.value LIMIT 20, 700 RETURN c`;
+
+      const queryResult = AQL_EXECUTE(query, {}, {fullCount: true});
+
+      const values = queryResult.json;
+      const fullCount = queryResult.stats.fullCount;
+
+      assertEqual(10, values.length);
+
+      assertEqual(20, values[0].value);
+      assertEqual(21, values[1].value);
+      assertEqual(22, values[2].value);
+      assertEqual(29, values[9].value);
+
+      assertEqual(fullCount, 30);
+
+      const sorts = getSorts(query);
+      assertEqual(sorts.length, 1);
+      assertEqual(sorts[0].limit, 720);
+      assertEqual(sorts[0].strategy, 'constrained-heap');
+    },
+
+////////////////////////////////////////////////////////////////////////////////
+/// @brief check limit optimization with sort and fullCount
+/// Here, all rows are skipped during the limit block's offset.
+////////////////////////////////////////////////////////////////////////////////
+
+    testLimitFullCollectionSortWithFullCountAndAllRowsSkipped: function () {
+      const query = `FOR c IN ${cn} FILTER c.value < 30 SORT c.value LIMIT 40, 700 RETURN c`;
+
+      const queryResult = AQL_EXECUTE(query, {}, {fullCount: true});
+
+      const values = queryResult.json;
+      const fullCount = queryResult.stats.fullCount;
+
+      assertEqual(0, values.length);
+
+      assertEqual(fullCount, 30);
+
+      const sorts = getSorts(query);
+      assertEqual(sorts.length, 1);
+      assertEqual(sorts[0].limit, 740);
+      assertEqual(sorts[0].strategy, 'constrained-heap');
+    },
+
   };
 }