Additional sort-limit tests (#10010) (#10011)

2019-09-13 18:10:43 +02:00 · 2019-09-13 18:10:43 +02:00 · aeba4bc2c6
parent 4656bd0a6a
commit aeba4bc2c6
4 changed files with 208 additions and 36 deletions
--- a/arangod/Aql/ConstrainedSortExecutor.cpp
+++ b/arangod/Aql/ConstrainedSortExecutor.cpp
@ -209,7 +209,9 @@ std::pair<ExecutionState, NoStats> ConstrainedSortExecutor::produceRows(OutputAq
      return {ExecutionState::DONE, NoStats{}};
    }
    // We should never get here, as the following LIMIT block should never fetch
-    // more than our limit. He may only skip after that. Thus:
+    // more than our limit. It may only skip after that.
+    // But note that this means that this block breaks with usual AQL behaviour!
+    // From this point on (i.e. doneProducing()), this block may only skip, not produce.
    TRI_ASSERT(false);
    THROW_ARANGO_EXCEPTION_MESSAGE(
        TRI_ERROR_INTERNAL_AQL,
--- a/js/server/modules/@arangodb/aql-profiler-test-helper.js
+++ b/js/server/modules/@arangodb/aql-profiler-test-helper.js
@ -80,6 +80,7 @@ const nodeTypesList = [
 ];

 const CalculationBlock = 'CalculationNode';
+const ConstrainedSortBlock = 'SortLimitNode';
 const CountCollectBlock = 'CountCollectNode';
 const DistinctCollectBlock = 'DistinctCollectNode';
 const EnumerateCollectionBlock = 'EnumerateCollectionNode';
@ -111,7 +112,7 @@ const IResearchViewBlock = 'IResearchViewNode';
 const IResearchViewOrderedBlock = 'IResearchOrderedViewNode';

 const blockTypesList = [
-  CalculationBlock, CountCollectBlock, DistinctCollectBlock,
+  CalculationBlock, ConstrainedSortBlock, CountCollectBlock, DistinctCollectBlock,
  EnumerateCollectionBlock, EnumerateListBlock, FilterBlock,
  HashedCollectBlock, IndexBlock, LimitBlock, NoResultsBlock, RemoteBlock,
  ReturnBlock, ShortestPathBlock, SingletonBlock, SortBlock,
@ -141,6 +142,14 @@ let translateType = function(nodes, node) {
      } else {
        type = 'UnsortingGatherNode';
      }
+    } else if (node.type === 'SortNode') {
+      if (node.strategy === 'standard') {
+        type = 'SortNode';
+      } else if (node.strategy === 'constrained-heap') {
+        type = 'SortLimitNode';
+      } else {
+        throw new Error('Unhandled sort strategy');
+      }
    }
    types[node.id] = type;
  });
@ -236,7 +245,7 @@ function getStatsNodesWithId (profile) {
 /// @brief assert structure of profile.stats
 ////////////////////////////////////////////////////////////////////////////////

-function assertIsProfileStatsObject (stats, {level}) {
+function assertIsProfileStatsObject (stats, {level, fullCount}) {
  // internal argument check
  expect(level)
    .to.be.a('number')
@ -259,6 +268,10 @@ function assertIsProfileStatsObject (stats, {level}) {
    statsKeys.push('nodes');
  }

+  if (fullCount) {
+    statsKeys.push('fullCount');
+  }
+
  expect(stats).to.have.all.keys(statsKeys);

  // check types
@ -270,6 +283,9 @@ function assertIsProfileStatsObject (stats, {level}) {
  expect(stats.httpRequests).to.be.a('number');
  expect(stats.peakMemoryUsage).to.be.a('number');
  expect(stats.executionTime).to.be.a('number');
+  if (fullCount) {
+    expect(stats.fullCount).to.be.a('number');
+  }
 }

 ////////////////////////////////////////////////////////////////////////////////
@ -377,7 +393,7 @@ function assertIsProfilePlanObject (plan) {
 /// @brief assert that the passed variable looks like a level 0 profile
 ////////////////////////////////////////////////////////////////////////////////

-function assertIsLevel0Profile (profile) {
+function assertIsLevel0Profile (profile, {fullCount} = {}) {
  expect(profile)
    .to.be.an('object')
    .that.has.all.keys([
@ -385,7 +401,7 @@ function assertIsLevel0Profile (profile) {
    'warnings',
  ]);

-  assertIsProfileStatsObject(profile.stats, {level: 0});
+  assertIsProfileStatsObject(profile.stats, {level: 0, fullCount});
  assertIsProfileWarningsArray(profile.warnings);
 }

@ -393,7 +409,7 @@ function assertIsLevel0Profile (profile) {
 /// @brief assert that the passed variable looks like a level 1 profile
 ////////////////////////////////////////////////////////////////////////////////

-function assertIsLevel1Profile (profile) {
+function assertIsLevel1Profile (profile, {fullCount} = {}) {
  expect(profile)
    .to.be.an('object')
    .that.has.all.keys([
@ -402,7 +418,7 @@ function assertIsLevel1Profile (profile) {
    'profile',
  ]);

-  assertIsProfileStatsObject(profile.stats, {level: 1});
+  assertIsProfileStatsObject(profile.stats, {level: 1, fullCount});
  assertIsProfileWarningsArray(profile.warnings);
  assertIsProfileProfileObject(profile.profile);
 }
@ -411,7 +427,7 @@ function assertIsLevel1Profile (profile) {
 /// @brief assert that the passed variable looks like a level 2 profile
 ////////////////////////////////////////////////////////////////////////////////

-function assertIsLevel2Profile (profile) {
+function assertIsLevel2Profile (profile, {fullCount} = {}) {
  expect(profile)
    .to.be.an('object')
    .that.has.all.keys([
@ -421,7 +437,7 @@ function assertIsLevel2Profile (profile) {
    'plan',
  ]);

-  assertIsProfileStatsObject(profile.stats, {level: 2});
+  assertIsProfileStatsObject(profile.stats, {level: 2, fullCount});
  assertIsProfileWarningsArray(profile.warnings);
  assertIsProfileProfileObject(profile.profile);
  assertIsProfilePlanObject(profile.plan);
@ -505,17 +521,19 @@ function runDefaultChecks (
    prepare = () => {},
    bind = rows => ({rows}),
    options = {},
+    testRowCounts = defaultTestRowCounts,
    additionalTestRowCounts = [],
  }
 ) {
-  const testRowCounts = _.uniq(defaultTestRowCounts.concat(additionalTestRowCounts).sort());
+  const {fullCount} = options;
+  testRowCounts = _.uniq(testRowCounts.concat(additionalTestRowCounts).sort());
  for (const rows of testRowCounts) {
    prepare(rows);
    const profile = db._query(query, bind(rows),
      _.merge(options, {profile: 2, defaultBatchSize})
    ).getExtra();

-    assertIsLevel2Profile(profile);
+    assertIsLevel2Profile(profile, {fullCount});
    assertStatsNodesMatchPlanNodes(profile);

    const batches = Math.ceil(rows / defaultBatchSize);
@ -721,6 +739,7 @@ exports.UpdateNode = UpdateNode;
 exports.UpsertNode = UpsertNode;
 exports.nodeTypesList = nodeTypesList;
 exports.CalculationBlock = CalculationBlock;
+exports.ConstrainedSortBlock = ConstrainedSortBlock;
 exports.CountCollectBlock = CountCollectBlock;
 exports.DistinctCollectBlock = DistinctCollectBlock;
 exports.EnumerateCollectionBlock = EnumerateCollectionBlock;
--- a/tests/js/server/aql/aql-profiler.js
+++ b/tests/js/server/aql/aql-profiler.js
@ -32,6 +32,7 @@ const profHelper = require("@arangodb/aql-profiler-test-helper");
 const db = require('@arangodb').db;
 const jsunity = require("jsunity");
 const assert = jsunity.jsUnity.assertions;
+const _ = require('lodash');


 ////////////////////////////////////////////////////////////////////////////////
@ -65,6 +66,7 @@ function ahuacatlProfilerTestSuite () {

  // import some names from profHelper directly into our namespace:
  const defaultBatchSize = profHelper.defaultBatchSize;
+  const defaultTestRowCounts = profHelper.defaultTestRowCounts;

  const { CalculationNode, CollectNode, DistributeNode, EnumerateCollectionNode,
    EnumerateListNode, EnumerateViewNode, FilterNode, GatherNode, IndexNode,
@ -72,7 +74,7 @@ function ahuacatlProfilerTestSuite () {
    ReturnNode, ScatterNode, ShortestPathNode, SingletonNode, SortNode,
    SubqueryNode, TraversalNode, UpdateNode, UpsertNode } = profHelper;

-  const { CalculationBlock, CountCollectBlock, DistinctCollectBlock,
+  const { CalculationBlock, ConstrainedSortBlock, CountCollectBlock, DistinctCollectBlock,
    EnumerateCollectionBlock, EnumerateListBlock, FilterBlock,
    HashedCollectBlock, IndexBlock, LimitBlock, NoResultsBlock, RemoteBlock,
    ReturnBlock, ShortestPathBlock, SingletonBlock, SortBlock,
@ -81,40 +83,84 @@ function ahuacatlProfilerTestSuite () {
    UpsertBlock, ScatterBlock, DistributeBlock, IResearchViewUnorderedBlock,
    IResearchViewBlock, IResearchViewOrderedBlock } = profHelper;

-  // See the limit tests (e.g. testLimitBlock3) for limit() and skip().
+  // See the limit tests (e.g. testLimitBlock3) for limit() and offset().
  const additionalLimitTestRowCounts = [
    // limit() = 1000 ± 1:
    1332, 1333, 1334,
-    // skip() = 1000 ± 1:
+    // offset() = 1000 ± 1:
    3999, 4000, 4003, 4004,
    // limit() = 2000 ± 1:
    2665, 2666, 2667,
-    // skip() = 2000 ± 1:
+    // offset() = 2000 ± 1:
    7999, 8000, 8003, 8004,
  ];

-  {
-    // These are copies from testLimitBlock3.
-    const skip = rows => Math.floor(rows/4);
-    const limit = rows => Math.ceil(3*rows/4);
+  const offset = rows => Math.floor(rows/4);
+  const limit = rows => Math.ceil(3*rows/4);
+  const offsetBatches = rows => Math.ceil(offset(rows) / defaultBatchSize);
+  const skipOffsetBatches = rows => Math.ceil(offset(rows) === 0 ? 0 : 1);
+  const limitBatches = rows => Math.ceil(limit(rows) / defaultBatchSize);

+  {
    // This is more documentation than anything else:
    assert.assertEqual(999, limit(1332));
    assert.assertEqual(1000, limit(1333));
    assert.assertEqual(1001, limit(1334));
-    assert.assertEqual(999, skip(3999));
-    assert.assertEqual(1000, skip(4000));
-    assert.assertEqual(1000, skip(4003));
-    assert.assertEqual(1001, skip(4004));
+    assert.assertEqual(999, offset(3999));
+    assert.assertEqual(1000, offset(4000));
+    assert.assertEqual(1000, offset(4003));
+    assert.assertEqual(1001, offset(4004));
    assert.assertEqual(1999, limit(2665));
    assert.assertEqual(2000, limit(2666));
    assert.assertEqual(2001, limit(2667));
-    assert.assertEqual(1999, skip(7999));
-    assert.assertEqual(2000, skip(8000));
-    assert.assertEqual(2000, skip(8003));
-    assert.assertEqual(2001, skip(8004));
+    assert.assertEqual(1999, offset(7999));
+    assert.assertEqual(2000, offset(8000));
+    assert.assertEqual(2000, offset(8003));
+    assert.assertEqual(2001, offset(8004));
  }

+  // This is the decision made by the sort-limit optimizer rule:
+  const usesHeapSort = rows => {
+    const n = rows;
+    const m = limit(rows);
+    return rows >= 100 && 0.25 * n * Math.log2(m) + m * Math.log2(m) < n * Math.log2(n);
+  };
+  // // Filter out row counts that would use the standard sort strategy
+  // const sortLimitTestRowCounts = _.uniq(defaultTestRowCounts.concat(additionalLimitTestRowCounts).sort())
+  //   .filter(usesHeapSort);
+  const sortLimitTestRowCounts =
+    // defaults, minus those < 100:
+    [100, 999, 1000, 1001, 1500, 2000, 10500]
+      .concat([
+        // limit() - offset() = 1000 ± 1:
+      1995, 1997, 1998, 2000, 1999, 2001,
+        // limit() - offset() = 2000 ± 1:
+      3995, 3997, 3998, 4000, 3999, 4001
+    ]);
+  const limitMinusSkip = rows => limit(rows) - offset(rows);
+  const limitMinusSkipBatches = rows => Math.ceil(limitMinusSkip(rows) / defaultBatchSize);
+  for (const rows of sortLimitTestRowCounts) {
+    assert.assertTrue(usesHeapSort(rows),
+      `Test row count would not trigger sort-limit rule: ${rows}`);
+  }
+  {
+    // Documentation of the expected proportions. These are a little wonky due to the rounding,
+    // but that's fine for the purpose.
+    assert.assertEqual(999, limitMinusSkip(1995));
+    assert.assertEqual(999, limitMinusSkip(1997));
+    assert.assertEqual(1000, limitMinusSkip(1998));
+    assert.assertEqual(1000, limitMinusSkip(2000));
+    assert.assertEqual(1001, limitMinusSkip(1999));
+    assert.assertEqual(1001, limitMinusSkip(2001));
+    assert.assertEqual(1999, limitMinusSkip(3995));
+    assert.assertEqual(1999, limitMinusSkip(3997));
+    assert.assertEqual(2000, limitMinusSkip(3998));
+    assert.assertEqual(2000, limitMinusSkip(4000));
+    assert.assertEqual(2001, limitMinusSkip(3999));
+    assert.assertEqual(2001, limitMinusSkip(4001));
+  }
+
+
  return {

 ////////////////////////////////////////////////////////////////////////////////
@ -140,10 +186,12 @@ function ahuacatlProfilerTestSuite () {
      const profileDefault = db._query(query, {}).getExtra();
      const profile0 = db._query(query, {}, {profile: 0}).getExtra();
      const profileFalse = db._query(query, {}, {profile: false}).getExtra();
+      const profile0WithFullCount = db._query(query, {}, {profile: 0, fullCount: true}).getExtra();

      profHelper.assertIsLevel0Profile(profileDefault);
      profHelper.assertIsLevel0Profile(profile0);
      profHelper.assertIsLevel0Profile(profileFalse);
+      profHelper.assertIsLevel0Profile(profile0WithFullCount, {fullCount: true});
    },

 ////////////////////////////////////////////////////////////////////////////////
@ -154,9 +202,11 @@ function ahuacatlProfilerTestSuite () {
      const query = 'RETURN 1';
      const profile1 = db._query(query, {}, {profile: 1}).getExtra();
      const profileTrue = db._query(query, {}, {profile: true}).getExtra();
+      const profile1WithFullCount = db._query(query, {}, {profile: 1, fullCount: true}).getExtra();

      profHelper.assertIsLevel1Profile(profile1);
      profHelper.assertIsLevel1Profile(profileTrue);
+      profHelper.assertIsLevel1Profile(profile1WithFullCount, {fullCount: true});
    },

 ////////////////////////////////////////////////////////////////////////////////
@ -166,9 +216,12 @@ function ahuacatlProfilerTestSuite () {
    testProfile2Fields : function () {
      const query = 'RETURN 1';
      const profile2 = db._query(query, {}, {profile: 2}).getExtra();
+      const profile2WithFullCount = db._query(query, {}, {profile: 2, fullCount: true}).getExtra();

      profHelper.assertIsLevel2Profile(profile2);
      profHelper.assertStatsNodesMatchPlanNodes(profile2);
+      profHelper.assertIsLevel2Profile(profile2WithFullCount, {fullCount: true});
+      profHelper.assertStatsNodesMatchPlanNodes(profile2WithFullCount);
    },

 ////////////////////////////////////////////////////////////////////////////////
@ -460,22 +513,18 @@ function ahuacatlProfilerTestSuite () {
    ////////////////////////////////////////////////////////////////////////////////

    testLimitBlock3: function() {
-      const query = 'FOR i IN 1..@rows LIMIT @skip, @limit RETURN i';
-      const skip = rows => Math.floor(rows/4);
-      const skipBatches = rows => Math.ceil(skip(rows) / defaultBatchSize);
-      const limit = rows => Math.ceil(3*rows/4);
-      const limitBatches = rows => Math.ceil(limit(rows) / defaultBatchSize);
+      const query = 'FOR i IN 1..@rows LIMIT @offset, @limit RETURN i';

      const genNodeList = (rows, batches) => [
        {type: SingletonBlock, calls: 1, items: 1},
        {type: CalculationBlock, calls: 1, items: 1},
-        {type: EnumerateListBlock, calls: limitBatches(rows) + skipBatches(rows), items: limit(rows) + skip(rows)},
+        {type: EnumerateListBlock, calls: limitBatches(rows) + offsetBatches(rows), items: limit(rows) + offset(rows)},
        {type: LimitBlock, calls: limitBatches(rows), items: limit(rows)},
        {type: ReturnBlock, calls: limitBatches(rows), items: limit(rows)},
      ];
      const bind = (rows) => ({
        rows,
-        skip: skip(rows),
+        offset: offset(rows),
        limit: limit(rows),
      });
      const additionalTestRowCounts = additionalLimitTestRowCounts;
@ -585,6 +634,57 @@ function ahuacatlProfilerTestSuite () {
      profHelper.runDefaultChecks({query, genNodeList, bind});
    },

+////////////////////////////////////////////////////////////////////////////////
+/// @brief test SortLimitBlock
+////////////////////////////////////////////////////////////////////////////////
+
+    testSortLimitBlock1 : function () {
+      const query = 'FOR i IN 1..@rows SORT i DESC LIMIT @offset, @limit RETURN i';
+      const genNodeList = (rows, batches) => [
+        { type : SingletonBlock, calls : 1, items : 1 },
+        { type : CalculationBlock, calls : 1, items : 1 },
+        { type : EnumerateListBlock, calls : batches, items : rows },
+        { type : ConstrainedSortBlock, calls : skipOffsetBatches(rows) + limitMinusSkipBatches(rows), items : limit(rows) },
+        { type : LimitBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) },
+        { type : ReturnBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) }
+      ];
+      const bind = rows => ({
+        rows,
+        // ~1/4 of rows:
+        offset: offset(rows),
+        // ~1/2 of rows:
+        limit: limitMinusSkip(rows),
+      });
+      profHelper.runDefaultChecks({query, genNodeList, bind, testRowCounts: sortLimitTestRowCounts});
+    },
+
+////////////////////////////////////////////////////////////////////////////////
+/// @brief test SortLimitBlock
+/// with fullCount
+////////////////////////////////////////////////////////////////////////////////
+
+    testSortLimitBlock2 : function () {
+      const query = 'FOR i IN 1..@rows SORT i DESC LIMIT @offset, @limit RETURN i';
+      const remainder = rows => rows - limit(rows);
+      const remainderBatches = rows => remainder(rows) === 0 ? 0 : 1;
+      const genNodeList = (rows, batches) => [
+        { type : SingletonBlock, calls : 1, items : 1 },
+        { type : CalculationBlock, calls : 1, items : 1 },
+        { type : EnumerateListBlock, calls : batches, items : rows },
+        { type : ConstrainedSortBlock, calls : skipOffsetBatches(rows) + limitMinusSkipBatches(rows) + remainderBatches(rows), items : rows },
+        { type : LimitBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) },
+        { type : ReturnBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) }
+      ];
+      const bind = rows => ({
+        rows,
+        // ~1/4 of rows:
+        offset: offset(rows),
+        // ~1/2 of rows:
+        limit: limitMinusSkip(rows),
+      });
+      profHelper.runDefaultChecks({query, genNodeList, bind, testRowCounts: sortLimitTestRowCounts, options: {fullCount: true}});
+    },
+
    ////////////////////////////////////////////////////////////////////////////////
    /// @brief test SortedCollectBlock
    ////////////////////////////////////////////////////////////////////////////////
--- a/tests/js/server/aql/aql-queries-optimizer-sort-limit.js
+++ b/tests/js/server/aql/aql-queries-optimizer-sort-limit.js
@ -308,8 +308,8 @@ function ahuacatlQueryOptimizerLimitTestSuite () {
 /// fullCount when 3.5 was released.
 ////////////////////////////////////////////////////////////////////////////////

-    testLimitFullCollectionSortWithFullCount : function () {
-      const query = "FOR c IN " + cn + " SORT c.value LIMIT 20, 10 RETURN c";
+    testLimitFullCollectionSortWithFullCount: function () {
+      const query = `FOR c IN ${cn} SORT c.value LIMIT 20, 10 RETURN c`;

      const queryResult = AQL_EXECUTE(query, {}, {fullCount: true});

@ -331,6 +331,57 @@ function ahuacatlQueryOptimizerLimitTestSuite () {
      assertEqual(sorts[0].strategy, "constrained-heap");
    },

+////////////////////////////////////////////////////////////////////////////////
+/// @brief check limit optimization with sort and fullCount
+/// Here, there are fewer rows to emit than the limit asks for.
+////////////////////////////////////////////////////////////////////////////////
+
+    testLimitFullCollectionSortWithFullCountAndFewRows: function () {
+      const query = `FOR c IN ${cn} FILTER c.value < 30 SORT c.value LIMIT 20, 700 RETURN c`;
+
+      const queryResult = AQL_EXECUTE(query, {}, {fullCount: true});
+
+      const values = queryResult.json;
+      const fullCount = queryResult.stats.fullCount;
+
+      assertEqual(10, values.length);
+
+      assertEqual(20, values[0].value);
+      assertEqual(21, values[1].value);
+      assertEqual(22, values[2].value);
+      assertEqual(29, values[9].value);
+
+      assertEqual(fullCount, 30);
+
+      const sorts = getSorts(query);
+      assertEqual(sorts.length, 1);
+      assertEqual(sorts[0].limit, 720);
+      assertEqual(sorts[0].strategy, 'constrained-heap');
+    },
+
+////////////////////////////////////////////////////////////////////////////////
+/// @brief check limit optimization with sort and fullCount
+/// Here, all rows are skipped during the limit block's offset.
+////////////////////////////////////////////////////////////////////////////////
+
+    testLimitFullCollectionSortWithFullCountAndAllRowsSkipped: function () {
+      const query = `FOR c IN ${cn} FILTER c.value < 30 SORT c.value LIMIT 40, 700 RETURN c`;
+
+      const queryResult = AQL_EXECUTE(query, {}, {fullCount: true});
+
+      const values = queryResult.json;
+      const fullCount = queryResult.stats.fullCount;
+
+      assertEqual(0, values.length);
+
+      assertEqual(fullCount, 30);
+
+      const sorts = getSorts(query);
+      assertEqual(sorts.length, 1);
+      assertEqual(sorts[0].limit, 740);
+      assertEqual(sorts[0].strategy, 'constrained-heap');
+    },
+
  };
 }