1
0
Fork 0

Additional sort-limit tests (#10010) (#10011)

This commit is contained in:
Tobias Gödderz 2019-09-13 18:10:43 +02:00 committed by KVS85
parent 4656bd0a6a
commit aeba4bc2c6
4 changed files with 208 additions and 36 deletions

View File

@ -209,7 +209,9 @@ std::pair<ExecutionState, NoStats> ConstrainedSortExecutor::produceRows(OutputAq
return {ExecutionState::DONE, NoStats{}};
}
// We should never get here, as the following LIMIT block should never fetch
// more than our limit. He may only skip after that. Thus:
// more than our limit. It may only skip after that.
// But note that this means that this block breaks with usual AQL behaviour!
// From this point on (i.e. doneProducing()), this block may only skip, not produce.
TRI_ASSERT(false);
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_INTERNAL_AQL,

View File

@ -80,6 +80,7 @@ const nodeTypesList = [
];
const CalculationBlock = 'CalculationNode';
const ConstrainedSortBlock = 'SortLimitNode';
const CountCollectBlock = 'CountCollectNode';
const DistinctCollectBlock = 'DistinctCollectNode';
const EnumerateCollectionBlock = 'EnumerateCollectionNode';
@ -111,7 +112,7 @@ const IResearchViewBlock = 'IResearchViewNode';
const IResearchViewOrderedBlock = 'IResearchOrderedViewNode';
const blockTypesList = [
CalculationBlock, CountCollectBlock, DistinctCollectBlock,
CalculationBlock, ConstrainedSortBlock, CountCollectBlock, DistinctCollectBlock,
EnumerateCollectionBlock, EnumerateListBlock, FilterBlock,
HashedCollectBlock, IndexBlock, LimitBlock, NoResultsBlock, RemoteBlock,
ReturnBlock, ShortestPathBlock, SingletonBlock, SortBlock,
@ -141,6 +142,14 @@ let translateType = function(nodes, node) {
} else {
type = 'UnsortingGatherNode';
}
} else if (node.type === 'SortNode') {
if (node.strategy === 'standard') {
type = 'SortNode';
} else if (node.strategy === 'constrained-heap') {
type = 'SortLimitNode';
} else {
throw new Error('Unhandled sort strategy');
}
}
types[node.id] = type;
});
@ -236,7 +245,7 @@ function getStatsNodesWithId (profile) {
/// @brief assert structure of profile.stats
////////////////////////////////////////////////////////////////////////////////
function assertIsProfileStatsObject (stats, {level}) {
function assertIsProfileStatsObject (stats, {level, fullCount}) {
// internal argument check
expect(level)
.to.be.a('number')
@ -259,6 +268,10 @@ function assertIsProfileStatsObject (stats, {level}) {
statsKeys.push('nodes');
}
if (fullCount) {
statsKeys.push('fullCount');
}
expect(stats).to.have.all.keys(statsKeys);
// check types
@ -270,6 +283,9 @@ function assertIsProfileStatsObject (stats, {level}) {
expect(stats.httpRequests).to.be.a('number');
expect(stats.peakMemoryUsage).to.be.a('number');
expect(stats.executionTime).to.be.a('number');
if (fullCount) {
expect(stats.fullCount).to.be.a('number');
}
}
////////////////////////////////////////////////////////////////////////////////
@ -377,7 +393,7 @@ function assertIsProfilePlanObject (plan) {
/// @brief assert that the passed variable looks like a level 0 profile
////////////////////////////////////////////////////////////////////////////////
function assertIsLevel0Profile (profile) {
function assertIsLevel0Profile (profile, {fullCount} = {}) {
expect(profile)
.to.be.an('object')
.that.has.all.keys([
@ -385,7 +401,7 @@ function assertIsLevel0Profile (profile) {
'warnings',
]);
assertIsProfileStatsObject(profile.stats, {level: 0});
assertIsProfileStatsObject(profile.stats, {level: 0, fullCount});
assertIsProfileWarningsArray(profile.warnings);
}
@ -393,7 +409,7 @@ function assertIsLevel0Profile (profile) {
/// @brief assert that the passed variable looks like a level 1 profile
////////////////////////////////////////////////////////////////////////////////
function assertIsLevel1Profile (profile) {
function assertIsLevel1Profile (profile, {fullCount} = {}) {
expect(profile)
.to.be.an('object')
.that.has.all.keys([
@ -402,7 +418,7 @@ function assertIsLevel1Profile (profile) {
'profile',
]);
assertIsProfileStatsObject(profile.stats, {level: 1});
assertIsProfileStatsObject(profile.stats, {level: 1, fullCount});
assertIsProfileWarningsArray(profile.warnings);
assertIsProfileProfileObject(profile.profile);
}
@ -411,7 +427,7 @@ function assertIsLevel1Profile (profile) {
/// @brief assert that the passed variable looks like a level 2 profile
////////////////////////////////////////////////////////////////////////////////
function assertIsLevel2Profile (profile) {
function assertIsLevel2Profile (profile, {fullCount} = {}) {
expect(profile)
.to.be.an('object')
.that.has.all.keys([
@ -421,7 +437,7 @@ function assertIsLevel2Profile (profile) {
'plan',
]);
assertIsProfileStatsObject(profile.stats, {level: 2});
assertIsProfileStatsObject(profile.stats, {level: 2, fullCount});
assertIsProfileWarningsArray(profile.warnings);
assertIsProfileProfileObject(profile.profile);
assertIsProfilePlanObject(profile.plan);
@ -505,17 +521,19 @@ function runDefaultChecks (
prepare = () => {},
bind = rows => ({rows}),
options = {},
testRowCounts = defaultTestRowCounts,
additionalTestRowCounts = [],
}
) {
const testRowCounts = _.uniq(defaultTestRowCounts.concat(additionalTestRowCounts).sort());
const {fullCount} = options;
testRowCounts = _.uniq(testRowCounts.concat(additionalTestRowCounts).sort());
for (const rows of testRowCounts) {
prepare(rows);
const profile = db._query(query, bind(rows),
_.merge(options, {profile: 2, defaultBatchSize})
).getExtra();
assertIsLevel2Profile(profile);
assertIsLevel2Profile(profile, {fullCount});
assertStatsNodesMatchPlanNodes(profile);
const batches = Math.ceil(rows / defaultBatchSize);
@ -721,6 +739,7 @@ exports.UpdateNode = UpdateNode;
exports.UpsertNode = UpsertNode;
exports.nodeTypesList = nodeTypesList;
exports.CalculationBlock = CalculationBlock;
exports.ConstrainedSortBlock = ConstrainedSortBlock;
exports.CountCollectBlock = CountCollectBlock;
exports.DistinctCollectBlock = DistinctCollectBlock;
exports.EnumerateCollectionBlock = EnumerateCollectionBlock;

View File

@ -32,6 +32,7 @@ const profHelper = require("@arangodb/aql-profiler-test-helper");
const db = require('@arangodb').db;
const jsunity = require("jsunity");
const assert = jsunity.jsUnity.assertions;
const _ = require('lodash');
////////////////////////////////////////////////////////////////////////////////
@ -65,6 +66,7 @@ function ahuacatlProfilerTestSuite () {
// import some names from profHelper directly into our namespace:
const defaultBatchSize = profHelper.defaultBatchSize;
const defaultTestRowCounts = profHelper.defaultTestRowCounts;
const { CalculationNode, CollectNode, DistributeNode, EnumerateCollectionNode,
EnumerateListNode, EnumerateViewNode, FilterNode, GatherNode, IndexNode,
@ -72,7 +74,7 @@ function ahuacatlProfilerTestSuite () {
ReturnNode, ScatterNode, ShortestPathNode, SingletonNode, SortNode,
SubqueryNode, TraversalNode, UpdateNode, UpsertNode } = profHelper;
const { CalculationBlock, CountCollectBlock, DistinctCollectBlock,
const { CalculationBlock, ConstrainedSortBlock, CountCollectBlock, DistinctCollectBlock,
EnumerateCollectionBlock, EnumerateListBlock, FilterBlock,
HashedCollectBlock, IndexBlock, LimitBlock, NoResultsBlock, RemoteBlock,
ReturnBlock, ShortestPathBlock, SingletonBlock, SortBlock,
@ -81,40 +83,84 @@ function ahuacatlProfilerTestSuite () {
UpsertBlock, ScatterBlock, DistributeBlock, IResearchViewUnorderedBlock,
IResearchViewBlock, IResearchViewOrderedBlock } = profHelper;
// See the limit tests (e.g. testLimitBlock3) for limit() and skip().
// See the limit tests (e.g. testLimitBlock3) for limit() and offset().
const additionalLimitTestRowCounts = [
// limit() = 1000 ± 1:
1332, 1333, 1334,
// skip() = 1000 ± 1:
// offset() = 1000 ± 1:
3999, 4000, 4003, 4004,
// limit() = 2000 ± 1:
2665, 2666, 2667,
// skip() = 2000 ± 1:
// offset() = 2000 ± 1:
7999, 8000, 8003, 8004,
];
{
// These are copies from testLimitBlock3.
const skip = rows => Math.floor(rows/4);
const limit = rows => Math.ceil(3*rows/4);
const offset = rows => Math.floor(rows/4);
const limit = rows => Math.ceil(3*rows/4);
const offsetBatches = rows => Math.ceil(offset(rows) / defaultBatchSize);
const skipOffsetBatches = rows => Math.ceil(offset(rows) === 0 ? 0 : 1);
const limitBatches = rows => Math.ceil(limit(rows) / defaultBatchSize);
{
// This is more documentation than anything else:
assert.assertEqual(999, limit(1332));
assert.assertEqual(1000, limit(1333));
assert.assertEqual(1001, limit(1334));
assert.assertEqual(999, skip(3999));
assert.assertEqual(1000, skip(4000));
assert.assertEqual(1000, skip(4003));
assert.assertEqual(1001, skip(4004));
assert.assertEqual(999, offset(3999));
assert.assertEqual(1000, offset(4000));
assert.assertEqual(1000, offset(4003));
assert.assertEqual(1001, offset(4004));
assert.assertEqual(1999, limit(2665));
assert.assertEqual(2000, limit(2666));
assert.assertEqual(2001, limit(2667));
assert.assertEqual(1999, skip(7999));
assert.assertEqual(2000, skip(8000));
assert.assertEqual(2000, skip(8003));
assert.assertEqual(2001, skip(8004));
assert.assertEqual(1999, offset(7999));
assert.assertEqual(2000, offset(8000));
assert.assertEqual(2000, offset(8003));
assert.assertEqual(2001, offset(8004));
}
// This is the decision made by the sort-limit optimizer rule:
const usesHeapSort = rows => {
const n = rows;
const m = limit(rows);
return rows >= 100 && 0.25 * n * Math.log2(m) + m * Math.log2(m) < n * Math.log2(n);
};
// // Filter out row counts that would use the standard sort strategy
// const sortLimitTestRowCounts = _.uniq(defaultTestRowCounts.concat(additionalLimitTestRowCounts).sort())
// .filter(usesHeapSort);
const sortLimitTestRowCounts =
// defaults, minus those < 100:
[100, 999, 1000, 1001, 1500, 2000, 10500]
.concat([
// limit() - offset() = 1000 ± 1:
1995, 1997, 1998, 2000, 1999, 2001,
// limit() - offset() = 2000 ± 1:
3995, 3997, 3998, 4000, 3999, 4001
]);
const limitMinusSkip = rows => limit(rows) - offset(rows);
const limitMinusSkipBatches = rows => Math.ceil(limitMinusSkip(rows) / defaultBatchSize);
for (const rows of sortLimitTestRowCounts) {
assert.assertTrue(usesHeapSort(rows),
`Test row count would not trigger sort-limit rule: ${rows}`);
}
{
// Documentation of the expected proportions. These are a little wonky due to the rounding,
// but that's fine for the purpose.
assert.assertEqual(999, limitMinusSkip(1995));
assert.assertEqual(999, limitMinusSkip(1997));
assert.assertEqual(1000, limitMinusSkip(1998));
assert.assertEqual(1000, limitMinusSkip(2000));
assert.assertEqual(1001, limitMinusSkip(1999));
assert.assertEqual(1001, limitMinusSkip(2001));
assert.assertEqual(1999, limitMinusSkip(3995));
assert.assertEqual(1999, limitMinusSkip(3997));
assert.assertEqual(2000, limitMinusSkip(3998));
assert.assertEqual(2000, limitMinusSkip(4000));
assert.assertEqual(2001, limitMinusSkip(3999));
assert.assertEqual(2001, limitMinusSkip(4001));
}
return {
////////////////////////////////////////////////////////////////////////////////
@ -140,10 +186,12 @@ function ahuacatlProfilerTestSuite () {
const profileDefault = db._query(query, {}).getExtra();
const profile0 = db._query(query, {}, {profile: 0}).getExtra();
const profileFalse = db._query(query, {}, {profile: false}).getExtra();
const profile0WithFullCount = db._query(query, {}, {profile: 0, fullCount: true}).getExtra();
profHelper.assertIsLevel0Profile(profileDefault);
profHelper.assertIsLevel0Profile(profile0);
profHelper.assertIsLevel0Profile(profileFalse);
profHelper.assertIsLevel0Profile(profile0WithFullCount, {fullCount: true});
},
////////////////////////////////////////////////////////////////////////////////
@ -154,9 +202,11 @@ function ahuacatlProfilerTestSuite () {
const query = 'RETURN 1';
const profile1 = db._query(query, {}, {profile: 1}).getExtra();
const profileTrue = db._query(query, {}, {profile: true}).getExtra();
const profile1WithFullCount = db._query(query, {}, {profile: 1, fullCount: true}).getExtra();
profHelper.assertIsLevel1Profile(profile1);
profHelper.assertIsLevel1Profile(profileTrue);
profHelper.assertIsLevel1Profile(profile1WithFullCount, {fullCount: true});
},
////////////////////////////////////////////////////////////////////////////////
@ -166,9 +216,12 @@ function ahuacatlProfilerTestSuite () {
testProfile2Fields : function () {
const query = 'RETURN 1';
const profile2 = db._query(query, {}, {profile: 2}).getExtra();
const profile2WithFullCount = db._query(query, {}, {profile: 2, fullCount: true}).getExtra();
profHelper.assertIsLevel2Profile(profile2);
profHelper.assertStatsNodesMatchPlanNodes(profile2);
profHelper.assertIsLevel2Profile(profile2WithFullCount, {fullCount: true});
profHelper.assertStatsNodesMatchPlanNodes(profile2WithFullCount);
},
////////////////////////////////////////////////////////////////////////////////
@ -460,22 +513,18 @@ function ahuacatlProfilerTestSuite () {
////////////////////////////////////////////////////////////////////////////////
testLimitBlock3: function() {
const query = 'FOR i IN 1..@rows LIMIT @skip, @limit RETURN i';
const skip = rows => Math.floor(rows/4);
const skipBatches = rows => Math.ceil(skip(rows) / defaultBatchSize);
const limit = rows => Math.ceil(3*rows/4);
const limitBatches = rows => Math.ceil(limit(rows) / defaultBatchSize);
const query = 'FOR i IN 1..@rows LIMIT @offset, @limit RETURN i';
const genNodeList = (rows, batches) => [
{type: SingletonBlock, calls: 1, items: 1},
{type: CalculationBlock, calls: 1, items: 1},
{type: EnumerateListBlock, calls: limitBatches(rows) + skipBatches(rows), items: limit(rows) + skip(rows)},
{type: EnumerateListBlock, calls: limitBatches(rows) + offsetBatches(rows), items: limit(rows) + offset(rows)},
{type: LimitBlock, calls: limitBatches(rows), items: limit(rows)},
{type: ReturnBlock, calls: limitBatches(rows), items: limit(rows)},
];
const bind = (rows) => ({
rows,
skip: skip(rows),
offset: offset(rows),
limit: limit(rows),
});
const additionalTestRowCounts = additionalLimitTestRowCounts;
@ -585,6 +634,57 @@ function ahuacatlProfilerTestSuite () {
profHelper.runDefaultChecks({query, genNodeList, bind});
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test SortLimitBlock
////////////////////////////////////////////////////////////////////////////////
testSortLimitBlock1 : function () {
const query = 'FOR i IN 1..@rows SORT i DESC LIMIT @offset, @limit RETURN i';
const genNodeList = (rows, batches) => [
{ type : SingletonBlock, calls : 1, items : 1 },
{ type : CalculationBlock, calls : 1, items : 1 },
{ type : EnumerateListBlock, calls : batches, items : rows },
{ type : ConstrainedSortBlock, calls : skipOffsetBatches(rows) + limitMinusSkipBatches(rows), items : limit(rows) },
{ type : LimitBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) },
{ type : ReturnBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) }
];
const bind = rows => ({
rows,
// ~1/4 of rows:
offset: offset(rows),
// ~1/2 of rows:
limit: limitMinusSkip(rows),
});
profHelper.runDefaultChecks({query, genNodeList, bind, testRowCounts: sortLimitTestRowCounts});
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test SortLimitBlock
/// with fullCount
////////////////////////////////////////////////////////////////////////////////
testSortLimitBlock2 : function () {
const query = 'FOR i IN 1..@rows SORT i DESC LIMIT @offset, @limit RETURN i';
const remainder = rows => rows - limit(rows);
const remainderBatches = rows => remainder(rows) === 0 ? 0 : 1;
const genNodeList = (rows, batches) => [
{ type : SingletonBlock, calls : 1, items : 1 },
{ type : CalculationBlock, calls : 1, items : 1 },
{ type : EnumerateListBlock, calls : batches, items : rows },
{ type : ConstrainedSortBlock, calls : skipOffsetBatches(rows) + limitMinusSkipBatches(rows) + remainderBatches(rows), items : rows },
{ type : LimitBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) },
{ type : ReturnBlock, calls : limitMinusSkipBatches(rows), items : limitMinusSkip(rows) }
];
const bind = rows => ({
rows,
// ~1/4 of rows:
offset: offset(rows),
// ~1/2 of rows:
limit: limitMinusSkip(rows),
});
profHelper.runDefaultChecks({query, genNodeList, bind, testRowCounts: sortLimitTestRowCounts, options: {fullCount: true}});
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test SortedCollectBlock
////////////////////////////////////////////////////////////////////////////////

View File

@ -308,8 +308,8 @@ function ahuacatlQueryOptimizerLimitTestSuite () {
/// fullCount when 3.5 was released.
////////////////////////////////////////////////////////////////////////////////
testLimitFullCollectionSortWithFullCount : function () {
const query = "FOR c IN " + cn + " SORT c.value LIMIT 20, 10 RETURN c";
testLimitFullCollectionSortWithFullCount: function () {
const query = `FOR c IN ${cn} SORT c.value LIMIT 20, 10 RETURN c`;
const queryResult = AQL_EXECUTE(query, {}, {fullCount: true});
@ -331,6 +331,57 @@ function ahuacatlQueryOptimizerLimitTestSuite () {
assertEqual(sorts[0].strategy, "constrained-heap");
},
////////////////////////////////////////////////////////////////////////////////
/// @brief check limit optimization with sort and fullCount
/// Here, there are fewer rows to emit than the limit asks for.
////////////////////////////////////////////////////////////////////////////////
testLimitFullCollectionSortWithFullCountAndFewRows: function () {
const query = `FOR c IN ${cn} FILTER c.value < 30 SORT c.value LIMIT 20, 700 RETURN c`;
const queryResult = AQL_EXECUTE(query, {}, {fullCount: true});
const values = queryResult.json;
const fullCount = queryResult.stats.fullCount;
assertEqual(10, values.length);
assertEqual(20, values[0].value);
assertEqual(21, values[1].value);
assertEqual(22, values[2].value);
assertEqual(29, values[9].value);
assertEqual(fullCount, 30);
const sorts = getSorts(query);
assertEqual(sorts.length, 1);
assertEqual(sorts[0].limit, 720);
assertEqual(sorts[0].strategy, 'constrained-heap');
},
////////////////////////////////////////////////////////////////////////////////
/// @brief check limit optimization with sort and fullCount
/// Here, all rows are skipped during the limit block's offset.
////////////////////////////////////////////////////////////////////////////////
testLimitFullCollectionSortWithFullCountAndAllRowsSkipped: function () {
const query = `FOR c IN ${cn} FILTER c.value < 30 SORT c.value LIMIT 40, 700 RETURN c`;
const queryResult = AQL_EXECUTE(query, {}, {fullCount: true});
const values = queryResult.json;
const fullCount = queryResult.stats.fullCount;
assertEqual(0, values.length);
assertEqual(fullCount, 30);
const sorts = getSorts(query);
assertEqual(sorts.length, 1);
assertEqual(sorts[0].limit, 740);
assertEqual(sorts[0].strategy, 'constrained-heap');
},
};
}