From aeba4bc2c6ac7e48adb3d8750bd0b244e3065110 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20G=C3=B6dderz?= Date: Fri, 13 Sep 2019 18:10:43 +0200 Subject: [PATCH] Additional sort-limit tests (#10010) (#10011) --- arangod/Aql/ConstrainedSortExecutor.cpp | 4 +- .../@arangodb/aql-profiler-test-helper.js | 39 +++-- tests/js/server/aql/aql-profiler.js | 146 +++++++++++++++--- .../aql/aql-queries-optimizer-sort-limit.js | 55 ++++++- 4 files changed, 208 insertions(+), 36 deletions(-) diff --git a/arangod/Aql/ConstrainedSortExecutor.cpp b/arangod/Aql/ConstrainedSortExecutor.cpp index 11c87b63de..b5276c5cf9 100644 --- a/arangod/Aql/ConstrainedSortExecutor.cpp +++ b/arangod/Aql/ConstrainedSortExecutor.cpp @@ -209,7 +209,9 @@ std::pair ConstrainedSortExecutor::produceRows(OutputAq return {ExecutionState::DONE, NoStats{}}; } // We should never get here, as the following LIMIT block should never fetch - // more than our limit. He may only skip after that. Thus: + // more than our limit. It may only skip after that. + // But note that this means that this block breaks with usual AQL behaviour! + // From this point on (i.e. doneProducing()), this block may only skip, not produce. TRI_ASSERT(false); THROW_ARANGO_EXCEPTION_MESSAGE( TRI_ERROR_INTERNAL_AQL, diff --git a/js/server/modules/@arangodb/aql-profiler-test-helper.js b/js/server/modules/@arangodb/aql-profiler-test-helper.js index 3e69685624..0768b094e5 100644 --- a/js/server/modules/@arangodb/aql-profiler-test-helper.js +++ b/js/server/modules/@arangodb/aql-profiler-test-helper.js @@ -80,6 +80,7 @@ const nodeTypesList = [ ]; const CalculationBlock = 'CalculationNode'; +const ConstrainedSortBlock = 'SortLimitNode'; const CountCollectBlock = 'CountCollectNode'; const DistinctCollectBlock = 'DistinctCollectNode'; const EnumerateCollectionBlock = 'EnumerateCollectionNode'; @@ -111,7 +112,7 @@ const IResearchViewBlock = 'IResearchViewNode'; const IResearchViewOrderedBlock = 'IResearchOrderedViewNode'; const blockTypesList = [ - CalculationBlock, CountCollectBlock, DistinctCollectBlock, + CalculationBlock, ConstrainedSortBlock, CountCollectBlock, DistinctCollectBlock, EnumerateCollectionBlock, EnumerateListBlock, FilterBlock, HashedCollectBlock, IndexBlock, LimitBlock, NoResultsBlock, RemoteBlock, ReturnBlock, ShortestPathBlock, SingletonBlock, SortBlock, @@ -141,6 +142,14 @@ let translateType = function(nodes, node) { } else { type = 'UnsortingGatherNode'; } + } else if (node.type === 'SortNode') { + if (node.strategy === 'standard') { + type = 'SortNode'; + } else if (node.strategy === 'constrained-heap') { + type = 'SortLimitNode'; + } else { + throw new Error('Unhandled sort strategy'); + } } types[node.id] = type; }); @@ -236,7 +245,7 @@ function getStatsNodesWithId (profile) { /// @brief assert structure of profile.stats //////////////////////////////////////////////////////////////////////////////// -function assertIsProfileStatsObject (stats, {level}) { +function assertIsProfileStatsObject (stats, {level, fullCount}) { // internal argument check expect(level) .to.be.a('number') @@ -259,6 +268,10 @@ function assertIsProfileStatsObject (stats, {level}) { statsKeys.push('nodes'); } + if (fullCount) { + statsKeys.push('fullCount'); + } + expect(stats).to.have.all.keys(statsKeys); // check types @@ -270,6 +283,9 @@ function assertIsProfileStatsObject (stats, {level}) { expect(stats.httpRequests).to.be.a('number'); expect(stats.peakMemoryUsage).to.be.a('number'); expect(stats.executionTime).to.be.a('number'); + if (fullCount) { + expect(stats.fullCount).to.be.a('number'); + } } //////////////////////////////////////////////////////////////////////////////// @@ -377,7 +393,7 @@ function assertIsProfilePlanObject (plan) { /// @brief assert that the passed variable looks like a level 0 profile //////////////////////////////////////////////////////////////////////////////// -function assertIsLevel0Profile (profile) { +function assertIsLevel0Profile (profile, {fullCount} = {}) { expect(profile) .to.be.an('object') .that.has.all.keys([ @@ -385,7 +401,7 @@ function assertIsLevel0Profile (profile) { 'warnings', ]); - assertIsProfileStatsObject(profile.stats, {level: 0}); + assertIsProfileStatsObject(profile.stats, {level: 0, fullCount}); assertIsProfileWarningsArray(profile.warnings); } @@ -393,7 +409,7 @@ function assertIsLevel0Profile (profile) { /// @brief assert that the passed variable looks like a level 1 profile //////////////////////////////////////////////////////////////////////////////// -function assertIsLevel1Profile (profile) { +function assertIsLevel1Profile (profile, {fullCount} = {}) { expect(profile) .to.be.an('object') .that.has.all.keys([ @@ -402,7 +418,7 @@ function assertIsLevel1Profile (profile) { 'profile', ]); - assertIsProfileStatsObject(profile.stats, {level: 1}); + assertIsProfileStatsObject(profile.stats, {level: 1, fullCount}); assertIsProfileWarningsArray(profile.warnings); assertIsProfileProfileObject(profile.profile); } @@ -411,7 +427,7 @@ function assertIsLevel1Profile (profile) { /// @brief assert that the passed variable looks like a level 2 profile //////////////////////////////////////////////////////////////////////////////// -function assertIsLevel2Profile (profile) { +function assertIsLevel2Profile (profile, {fullCount} = {}) { expect(profile) .to.be.an('object') .that.has.all.keys([ @@ -421,7 +437,7 @@ function assertIsLevel2Profile (profile) { 'plan', ]); - assertIsProfileStatsObject(profile.stats, {level: 2}); + assertIsProfileStatsObject(profile.stats, {level: 2, fullCount}); assertIsProfileWarningsArray(profile.warnings); assertIsProfileProfileObject(profile.profile); assertIsProfilePlanObject(profile.plan); @@ -505,17 +521,19 @@ function runDefaultChecks ( prepare = () => {}, bind = rows => ({rows}), options = {}, + testRowCounts = defaultTestRowCounts, additionalTestRowCounts = [], } ) { - const testRowCounts = _.uniq(defaultTestRowCounts.concat(additionalTestRowCounts).sort()); + const {fullCount} = options; + testRowCounts = _.uniq(testRowCounts.concat(additionalTestRowCounts).sort()); for (const rows of testRowCounts) { prepare(rows); const profile = db._query(query, bind(rows), _.merge(options, {profile: 2, defaultBatchSize}) ).getExtra(); - assertIsLevel2Profile(profile); + assertIsLevel2Profile(profile, {fullCount}); assertStatsNodesMatchPlanNodes(profile); const batches = Math.ceil(rows / defaultBatchSize); @@ -721,6 +739,7 @@ exports.UpdateNode = UpdateNode; exports.UpsertNode = UpsertNode; exports.nodeTypesList = nodeTypesList; exports.CalculationBlock = CalculationBlock; +exports.ConstrainedSortBlock = ConstrainedSortBlock; exports.CountCollectBlock = CountCollectBlock; exports.DistinctCollectBlock = DistinctCollectBlock; exports.EnumerateCollectionBlock = EnumerateCollectionBlock; diff --git a/tests/js/server/aql/aql-profiler.js b/tests/js/server/aql/aql-profiler.js index a14329fd13..f7e9829686 100644 --- a/tests/js/server/aql/aql-profiler.js +++ b/tests/js/server/aql/aql-profiler.js @@ -32,6 +32,7 @@ const profHelper = require("@arangodb/aql-profiler-test-helper"); const db = require('@arangodb').db; const jsunity = require("jsunity"); const assert = jsunity.jsUnity.assertions; +const _ = require('lodash'); //////////////////////////////////////////////////////////////////////////////// @@ -65,6 +66,7 @@ function ahuacatlProfilerTestSuite () { // import some names from profHelper directly into our namespace: const defaultBatchSize = profHelper.defaultBatchSize; + const defaultTestRowCounts = profHelper.defaultTestRowCounts; const { CalculationNode, CollectNode, DistributeNode, EnumerateCollectionNode, EnumerateListNode, EnumerateViewNode, FilterNode, GatherNode, IndexNode, @@ -72,7 +74,7 @@ function ahuacatlProfilerTestSuite () { ReturnNode, ScatterNode, ShortestPathNode, SingletonNode, SortNode, SubqueryNode, TraversalNode, UpdateNode, UpsertNode } = profHelper; - const { CalculationBlock, CountCollectBlock, DistinctCollectBlock, + const { CalculationBlock, ConstrainedSortBlock, CountCollectBlock, DistinctCollectBlock, EnumerateCollectionBlock, EnumerateListBlock, FilterBlock, HashedCollectBlock, IndexBlock, LimitBlock, NoResultsBlock, RemoteBlock, ReturnBlock, ShortestPathBlock, SingletonBlock, SortBlock, @@ -81,40 +83,84 @@ function ahuacatlProfilerTestSuite () { UpsertBlock, ScatterBlock, DistributeBlock, IResearchViewUnorderedBlock, IResearchViewBlock, IResearchViewOrderedBlock } = profHelper; - // See the limit tests (e.g. testLimitBlock3) for limit() and skip(). + // See the limit tests (e.g. testLimitBlock3) for limit() and offset(). const additionalLimitTestRowCounts = [ // limit() = 1000 ± 1: 1332, 1333, 1334, - // skip() = 1000 ± 1: + // offset() = 1000 ± 1: 3999, 4000, 4003, 4004, // limit() = 2000 ± 1: 2665, 2666, 2667, - // skip() = 2000 ± 1: + // offset() = 2000 ± 1: 7999, 8000, 8003, 8004, ]; - { - // These are copies from testLimitBlock3. - const skip = rows => Math.floor(rows/4); - const limit = rows => Math.ceil(3*rows/4); + const offset = rows => Math.floor(rows/4); + const limit = rows => Math.ceil(3*rows/4); + const offsetBatches = rows => Math.ceil(offset(rows) / defaultBatchSize); + const skipOffsetBatches = rows => Math.ceil(offset(rows) === 0 ? 0 : 1); + const limitBatches = rows => Math.ceil(limit(rows) / defaultBatchSize); + { // This is more documentation than anything else: assert.assertEqual(999, limit(1332)); assert.assertEqual(1000, limit(1333)); assert.assertEqual(1001, limit(1334)); - assert.assertEqual(999, skip(3999)); - assert.assertEqual(1000, skip(4000)); - assert.assertEqual(1000, skip(4003)); - assert.assertEqual(1001, skip(4004)); + assert.assertEqual(999, offset(3999)); + assert.assertEqual(1000, offset(4000)); + assert.assertEqual(1000, offset(4003)); + assert.assertEqual(1001, offset(4004)); assert.assertEqual(1999, limit(2665)); assert.assertEqual(2000, limit(2666)); assert.assertEqual(2001, limit(2667)); - assert.assertEqual(1999, skip(7999)); - assert.assertEqual(2000, skip(8000)); - assert.assertEqual(2000, skip(8003)); - assert.assertEqual(2001, skip(8004)); + assert.assertEqual(1999, offset(7999)); + assert.assertEqual(2000, offset(8000)); + assert.assertEqual(2000, offset(8003)); + assert.assertEqual(2001, offset(8004)); } + // This is the decision made by the sort-limit optimizer rule: + const usesHeapSort = rows => { + const n = rows; + const m = limit(rows); + return rows >= 100 && 0.25 * n * Math.log2(m) + m * Math.log2(m) < n * Math.log2(n); + }; + // // Filter out row counts that would use the standard sort strategy + // const sortLimitTestRowCounts = _.uniq(defaultTestRowCounts.concat(additionalLimitTestRowCounts).sort()) + // .filter(usesHeapSort); + const sortLimitTestRowCounts = + // defaults, minus those < 100: + [100, 999, 1000, 1001, 1500, 2000, 10500] + .concat([ + // limit() - offset() = 1000 ± 1: + 1995, 1997, 1998, 2000, 1999, 2001, + // limit() - offset() = 2000 ± 1: + 3995, 3997, 3998, 4000, 3999, 4001 + ]); + const limitMinusSkip = rows => limit(rows) - offset(rows); + const limitMinusSkipBatches = rows => Math.ceil(limitMinusSkip(rows) / defaultBatchSize); + for (const rows of sortLimitTestRowCounts) { + assert.assertTrue(usesHeapSort(rows), + `Test row count would not trigger sort-limit rule: ${rows}`); + } + { + // Documentation of the expected proportions. These are a little wonky due to the rounding, + // but that's fine for the purpose. + assert.assertEqual(999, limitMinusSkip(1995)); + assert.assertEqual(999, limitMinusSkip(1997)); + assert.assertEqual(1000, limitMinusSkip(1998)); + assert.assertEqual(1000, limitMinusSkip(2000)); + assert.assertEqual(1001, limitMinusSkip(1999)); + assert.assertEqual(1001, limitMinusSkip(2001)); + assert.assertEqual(1999, limitMinusSkip(3995)); + assert.assertEqual(1999, limitMinusSkip(3997)); + assert.assertEqual(2000, limitMinusSkip(3998)); + assert.assertEqual(2000, limitMinusSkip(4000)); + assert.assertEqual(2001, limitMinusSkip(3999)); + assert.assertEqual(2001, limitMinusSkip(4001)); + } + + return { //////////////////////////////////////////////////////////////////////////////// @@ -140,10 +186,12 @@ function ahuacatlProfilerTestSuite () { const profileDefault = db._query(query, {}).getExtra(); const profile0 = db._query(query, {}, {profile: 0}).getExtra(); const profileFalse = db._query(query, {}, {profile: false}).getExtra(); + const profile0WithFullCount = db._query(query, {}, {profile: 0, fullCount: true}).getExtra(); profHelper.assertIsLevel0Profile(profileDefault); profHelper.assertIsLevel0Profile(profile0); profHelper.assertIsLevel0Profile(profileFalse); + profHelper.assertIsLevel0Profile(profile0WithFullCount, {fullCount: true}); }, //////////////////////////////////////////////////////////////////////////////// @@ -154,9 +202,11 @@ function ahuacatlProfilerTestSuite () { const query = 'RETURN 1'; const profile1 = db._query(query, {}, {profile: 1}).getExtra(); const profileTrue = db._query(query, {}, {profile: true}).getExtra(); + const profile1WithFullCount = db._query(query, {}, {profile: 1, fullCount: true}).getExtra(); profHelper.assertIsLevel1Profile(profile1); profHelper.assertIsLevel1Profile(profileTrue); + profHelper.assertIsLevel1Profile(profile1WithFullCount, {fullCount: true}); }, //////////////////////////////////////////////////////////////////////////////// @@ -166,9 +216,12 @@ function ahuacatlProfilerTestSuite () { testProfile2Fields : function () { const query = 'RETURN 1'; const profile2 = db._query(query, {}, {profile: 2}).getExtra(); + const profile2WithFullCount = db._query(query, {}, {profile: 2, fullCount: true}).getExtra(); profHelper.assertIsLevel2Profile(profile2); profHelper.assertStatsNodesMatchPlanNodes(profile2); + profHelper.assertIsLevel2Profile(profile2WithFullCount, {fullCount: true}); + profHelper.assertStatsNodesMatchPlanNodes(profile2WithFullCount); }, //////////////////////////////////////////////////////////////////////////////// @@ -460,22 +513,18 @@ function ahuacatlProfilerTestSuite () { //////////////////////////////////////////////////////////////////////////////// testLimitBlock3: function() { - const query = 'FOR i IN 1..@rows LIMIT @skip, @limit RETURN i'; - const skip = rows => Math.floor(rows/4); - const skipBatches = rows => Math.ceil(skip(rows) / defaultBatchSize); - const limit = rows => Math.ceil(3*rows/4); - const limitBatches = rows => Math.ceil(limit(rows) / defaultBatchSize); + const query = 'FOR i IN 1..@rows LIMIT @offset, @limit RETURN i'; const genNodeList = (rows, batches) => [ {type: SingletonBlock, calls: 1, items: 1}, {type: CalculationBlock, calls: 1, items: 1}, - {type: EnumerateListBlock, calls: limitBatches(rows) + skipBatches(rows), items: limit(rows) + skip(rows)}, + {type: EnumerateListBlock, calls: limitBatches(rows) + offsetBatches(rows), items: limit(rows) + offset(rows)}, {type: LimitBlock, calls: limitBatches(rows), items: limit(rows)}, {type: ReturnBlock, calls: limitBatches(rows), items: limit(rows)}, ]; const bind = (rows) => ({ rows, - skip: skip(rows), + offset: offset(rows), limit: limit(rows), }); const additionalTestRowCounts = additionalLimitTestRowCounts; @@ -585,6 +634,57 @@ function ahuacatlProfilerTestSuite () { profHelper.runDefaultChecks({query, genNodeList, bind}); }, +//////////////////////////////////////////////////////////////////////////////// +/// @brief test SortLimitBlock +//////////////////////////////////////////////////////////////////////////////// + + testSortLimitBlock1 : function () { + const query = 'FOR i IN 1..@rows SORT i DESC LIMIT @offset, @limit RETURN i'; + const genNodeList = (rows, batches) => [ + { type : SingletonBlock, calls : 1, items : 1 }, + { type : CalculationBlock, calls : 1, items : 1 }, + { type : EnumerateListBlock, calls : batches, items : rows }, + { type : ConstrainedSortBlock, calls : skipOffsetBatches(rows) + limitMinusSkipBatches(rows), items : limit(rows) }, + { type : LimitBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) }, + { type : ReturnBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) } + ]; + const bind = rows => ({ + rows, + // ~1/4 of rows: + offset: offset(rows), + // ~1/2 of rows: + limit: limitMinusSkip(rows), + }); + profHelper.runDefaultChecks({query, genNodeList, bind, testRowCounts: sortLimitTestRowCounts}); + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test SortLimitBlock +/// with fullCount +//////////////////////////////////////////////////////////////////////////////// + + testSortLimitBlock2 : function () { + const query = 'FOR i IN 1..@rows SORT i DESC LIMIT @offset, @limit RETURN i'; + const remainder = rows => rows - limit(rows); + const remainderBatches = rows => remainder(rows) === 0 ? 0 : 1; + const genNodeList = (rows, batches) => [ + { type : SingletonBlock, calls : 1, items : 1 }, + { type : CalculationBlock, calls : 1, items : 1 }, + { type : EnumerateListBlock, calls : batches, items : rows }, + { type : ConstrainedSortBlock, calls : skipOffsetBatches(rows) + limitMinusSkipBatches(rows) + remainderBatches(rows), items : rows }, + { type : LimitBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) }, + { type : ReturnBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) } + ]; + const bind = rows => ({ + rows, + // ~1/4 of rows: + offset: offset(rows), + // ~1/2 of rows: + limit: limitMinusSkip(rows), + }); + profHelper.runDefaultChecks({query, genNodeList, bind, testRowCounts: sortLimitTestRowCounts, options: {fullCount: true}}); + }, + //////////////////////////////////////////////////////////////////////////////// /// @brief test SortedCollectBlock //////////////////////////////////////////////////////////////////////////////// diff --git a/tests/js/server/aql/aql-queries-optimizer-sort-limit.js b/tests/js/server/aql/aql-queries-optimizer-sort-limit.js index c85710e0a8..a1164ba65f 100644 --- a/tests/js/server/aql/aql-queries-optimizer-sort-limit.js +++ b/tests/js/server/aql/aql-queries-optimizer-sort-limit.js @@ -308,8 +308,8 @@ function ahuacatlQueryOptimizerLimitTestSuite () { /// fullCount when 3.5 was released. //////////////////////////////////////////////////////////////////////////////// - testLimitFullCollectionSortWithFullCount : function () { - const query = "FOR c IN " + cn + " SORT c.value LIMIT 20, 10 RETURN c"; + testLimitFullCollectionSortWithFullCount: function () { + const query = `FOR c IN ${cn} SORT c.value LIMIT 20, 10 RETURN c`; const queryResult = AQL_EXECUTE(query, {}, {fullCount: true}); @@ -331,6 +331,57 @@ function ahuacatlQueryOptimizerLimitTestSuite () { assertEqual(sorts[0].strategy, "constrained-heap"); }, +//////////////////////////////////////////////////////////////////////////////// +/// @brief check limit optimization with sort and fullCount +/// Here, there are fewer rows to emit than the limit asks for. +//////////////////////////////////////////////////////////////////////////////// + + testLimitFullCollectionSortWithFullCountAndFewRows: function () { + const query = `FOR c IN ${cn} FILTER c.value < 30 SORT c.value LIMIT 20, 700 RETURN c`; + + const queryResult = AQL_EXECUTE(query, {}, {fullCount: true}); + + const values = queryResult.json; + const fullCount = queryResult.stats.fullCount; + + assertEqual(10, values.length); + + assertEqual(20, values[0].value); + assertEqual(21, values[1].value); + assertEqual(22, values[2].value); + assertEqual(29, values[9].value); + + assertEqual(fullCount, 30); + + const sorts = getSorts(query); + assertEqual(sorts.length, 1); + assertEqual(sorts[0].limit, 720); + assertEqual(sorts[0].strategy, 'constrained-heap'); + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief check limit optimization with sort and fullCount +/// Here, all rows are skipped during the limit block's offset. +//////////////////////////////////////////////////////////////////////////////// + + testLimitFullCollectionSortWithFullCountAndAllRowsSkipped: function () { + const query = `FOR c IN ${cn} FILTER c.value < 30 SORT c.value LIMIT 40, 700 RETURN c`; + + const queryResult = AQL_EXECUTE(query, {}, {fullCount: true}); + + const values = queryResult.json; + const fullCount = queryResult.stats.fullCount; + + assertEqual(0, values.length); + + assertEqual(fullCount, 30); + + const sorts = getSorts(query); + assertEqual(sorts.length, 1); + assertEqual(sorts[0].limit, 740); + assertEqual(sorts[0].strategy, 'constrained-heap'); + }, + }; }