From 17ed7fbef4ee0ec3e69393f2c3a042cbb49a5a39 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 13 Dec 2014 17:47:59 +0000 Subject: [PATCH] sorting works, tests updated --- arangod/Aql/ExecutionBlock.cpp | 82 ++++++++++++++++++- arangod/Aql/ExecutionBlock.h | 26 +++++- arangod/Aql/RangeInfo.cpp | 23 +++++- arangod/Aql/RangeInfo.h | 4 +- .../aql-queries-optimiser-in-noncluster.js | 54 ++++++------ 5 files changed, 155 insertions(+), 34 deletions(-) diff --git a/arangod/Aql/ExecutionBlock.cpp b/arangod/Aql/ExecutionBlock.cpp index b162078818..95e3ffc1c6 100644 --- a/arangod/Aql/ExecutionBlock.cpp +++ b/arangod/Aql/ExecutionBlock.cpp @@ -857,6 +857,7 @@ IndexRangeBlock::IndexRangeBlock (ExecutionEngine* engine, _skiplistIterator(nullptr), _condition(new IndexOrCondition()), _posInRanges(0), + _sortCoords(), _freeCondition(true) { for (size_t i = 0; i < en->_ranges.size(); i++) { @@ -1199,8 +1200,41 @@ bool IndexRangeBlock::initRanges () { if (en->_index->type == TRI_IDX_TYPE_SKIPLIST_INDEX) { if (! _condition->empty()) { + // sort the conditions! + + // TODO this should also be done for hash indexes when + // they are lazy too. + + // first sort by the prefix of the index + std::vector> prefix; + if (! _sortCoords.empty()) { + _sortCoords.clear(); + _sortCoords.reserve(_condition->size()); + } + for (size_t s = 0; s < _condition->size(); s++) { + _sortCoords.push_back(s); + std::vector next; + next.reserve(en->_index->fields.size()); + prefix.emplace_back(next); + // prefix[s][t] = position in _condition[s] corresponding to the th index + // field + for (size_t t = 0; t < en->_index->fields.size(); t++) { + for (size_t u = 0; u < _condition->at(s).size(); u++) { + auto ri = _condition->at(s)[u]; + if (en->_index->fields[t].compare(ri._attr) == 0) { + prefix.at(s).insert(prefix.at(s).begin()+t, u); + break; + } + } + } + } + + SortFunc sortFunc(prefix, _condition, en->_reverse); + + // then sort by the values of the bounds + std::sort(_sortCoords.begin(), _sortCoords.end(), sortFunc); _posInRanges = 0; - getSkiplistIterator(_condition->at(_posInRanges)); + getSkiplistIterator(_condition->at(_sortCoords[_posInRanges])); return (_skiplistIterator != nullptr); } else { return false; @@ -1214,6 +1248,50 @@ bool IndexRangeBlock::initRanges () { LEAVE_BLOCK; } +// is _condition[i] < _condition[j]? these are IndexAndConditions + +bool IndexRangeBlock::SortFunc::operator() (size_t const& i, size_t const& j) { + size_t l, r; + + if (! _reverse) { + l = i; + r = j; + } else { + l = j; + r = i; + } + + size_t shortest = std::min(_prefix.at(i).size(), _prefix.at(j).size()); + + for (size_t k = 0; k < shortest; k++) { + RangeInfo lhs = _condition->at(l).at(_prefix.at(l).at(k)); + RangeInfo rhs = _condition->at(r).at(_prefix.at(r).at(k)); + int cmp; + + if (lhs.is1ValueRangeInfo() && rhs.is1ValueRangeInfo()) { + cmp = TRI_CompareValuesJson(lhs._lowConst.bound().json(), + rhs._lowConst.bound().json()); + if (cmp != 0) { + return (cmp == -1); + } + } else { + // assuming lhs and rhs are disjoint!! + TRI_ASSERT_EXPENSIVE(areDisjointRangeInfos(lhs, rhs)); + if (lhs._highConst.isDefined() && rhs._lowConst.isDefined()) { + cmp = (TRI_CompareValuesJson(lhs._highConst.bound().json(), + rhs._lowConst.bound().json())); + return (cmp == 0 || cmp == -1); + } else { // lhs._lowConst.isDefined() && rhs._highConst.isDefined() + return false; + } + } + } + TRI_ASSERT(false); + // shouldn't get here since the IndexAndConditions in _condition should be + // disjoint! + return false; +} + // std::vector IndexRangeBlock::andCombineRangeInfoVecs ( @@ -1898,7 +1976,7 @@ void IndexRangeBlock::readSkiplistIndex (size_t atMost) { TRI_FreeSkiplistIterator(_skiplistIterator); _skiplistIterator = nullptr; if (++_posInRanges < _condition->size()) { - getSkiplistIterator(_condition->at(_posInRanges)); + getSkiplistIterator(_condition->at(_sortCoords[_posInRanges])); } } else { _documents.emplace_back(*(indexElement->_document)); diff --git a/arangod/Aql/ExecutionBlock.h b/arangod/Aql/ExecutionBlock.h index 315e186854..c16d8d3b34 100644 --- a/arangod/Aql/ExecutionBlock.h +++ b/arangod/Aql/ExecutionBlock.h @@ -618,10 +618,29 @@ namespace triagens { void readHashIndex (IndexOrCondition const&); void orCombineIndexOrs(IndexOrCondition*, IndexOrCondition*); - std::vector andCombineRangeInfoVecs (std::vector& riv1, - std::vector& riv2); + std::vector andCombineRangeInfoVecs (std::vector&, + std::vector&); - IndexOrCondition* cartesian (std::vector> collector); + IndexOrCondition* cartesian (std::vector>); + + class SortFunc { + public: + SortFunc (std::vector> prefix, + IndexOrCondition* condition, + bool reverse) + : _prefix(prefix), + _condition(condition), + _reverse(reverse){ + } + + bool operator() (size_t const&, + size_t const&); + + private: + std::vector> _prefix; + IndexOrCondition* _condition; + bool _reverse; + }; // ----------------------------------------------------------------------------- @@ -703,6 +722,7 @@ namespace triagens { bool _flag; size_t _posInRanges; + std::vector _sortCoords; //////////////////////////////////////////////////////////////////////////////// /// @brief _freeCondition: whether or not the _condition is owned by the diff --git a/arangod/Aql/RangeInfo.cpp b/arangod/Aql/RangeInfo.cpp index a41b1c9ce1..983d9662e4 100644 --- a/arangod/Aql/RangeInfo.cpp +++ b/arangod/Aql/RangeInfo.cpp @@ -659,7 +659,10 @@ static int containmentRangeInfos (RangeInfo const& lhs, RangeInfo const& rhs) { // returns true if the constant parts of lhs and rhs are disjoint and false // otherwise -static bool areDisjointRangeInfos (RangeInfo const& lhs, RangeInfo const& rhs) { +bool triagens::aql::areDisjointRangeInfos (RangeInfo const& lhs, + RangeInfo const& rhs) { + TRI_ASSERT(lhs._var == rhs._var); + TRI_ASSERT(lhs._attr == rhs._attr); int HiLo; if (lhs._highConst.isDefined() && rhs._lowConst.isDefined()) { @@ -887,3 +890,21 @@ void triagens::aql::removeOverlapsIndexOr (IndexOrCondition& ioc) { } } +// 3 way comparison for sorting +/*int triagens::aql::compareRangeInfos (RangeInfo const& lhs, RangeInfo const& rhs) { + TRI_ASSERT(lhs._var == rhs._var); + TRI_ASSERT(lhs._attr == rhs._attr); + + if (lhs.is1ValueRangeInfo() && rhs.is1ValueRangeInfo()) { + return TRI_CompareValuesJson(lhs._lowConst.bound().json(), + rhs._lowConst.bound().json()); + } + + // assuming lhs and rhs are disjoint!! + TRI_ASSERT_EXPENSIVE(areDisjointRangeInfos(lhs, rhs)); + if (lhs._highConst.isDefined() && rhs._lowConst.isDefined()) { + return TRI_CompareValuesJson(lhs._highConst.bound().json(), + rhs._lowConst.bound().json()); + } + return 1; +}*/ diff --git a/arangod/Aql/RangeInfo.h b/arangod/Aql/RangeInfo.h index 7d6fa720c6..4a8b9d6e44 100644 --- a/arangod/Aql/RangeInfo.h +++ b/arangod/Aql/RangeInfo.h @@ -944,10 +944,12 @@ namespace triagens { void differenceRangeInfos (RangeInfo&, RangeInfo&); void differenceIndexOrRangeInfo (IndexOrCondition const*, RangeInfo&); - bool isContainedIndexAndConditions (IndexAndCondition& and1, IndexAndCondition& and2); + bool isContainedIndexAndConditions (IndexAndCondition&, IndexAndCondition&); void differenceIndexAnd (IndexAndCondition&, IndexAndCondition&); void removeOverlapsIndexOr (IndexOrCondition&); bool areDisjointIndexAndConditions (IndexAndCondition&, IndexAndCondition&); + bool areDisjointRangeInfos (RangeInfo const&, RangeInfo const&); + int compareRangeInfos (RangeInfo const&, RangeInfo const&); } } diff --git a/js/server/tests/aql-queries-optimiser-in-noncluster.js b/js/server/tests/aql-queries-optimiser-in-noncluster.js index 2032cf3a29..b99f1b0a56 100644 --- a/js/server/tests/aql-queries-optimiser-in-noncluster.js +++ b/js/server/tests/aql-queries-optimiser-in-noncluster.js @@ -711,14 +711,13 @@ function ahuacatlQueryOptimiserInTestSuite () { ruleIsUsed(query); }, - // FIXME SORT doesn't work here testOverlappingInListSkiplist3 : function () { for (var i = 1; i < 100; ++i) { c.save({ value: i }); } c.ensureSkiplist("value"); var query = "FOR x IN " + cn + " FILTER (x.value > 3 || x.value == 1) && x.value IN [1,3,35,90] SORT x.value RETURN x.value"; - var expected = [ 35, 90, 1 ]; + var expected = [ 1, 35, 90 ]; var actual = getQueryResults(query); assertEqual(expected, actual); ruleIsUsed(query); @@ -729,7 +728,7 @@ function ahuacatlQueryOptimiserInTestSuite () { c.save({ value: i }); } c.ensureHashIndex("value"); - var query = "FOR x IN " + cn + " FILTER (x.value IN [3,35,90] || x.value IN [3, 90]) RETURN x.value"; + var query = "FOR x IN " + cn + " FILTER (x.value IN [3,35,90] || x.value IN [3, 90]) SORT x.value RETURN x.value"; var expected = [ 3, 35, 90 ]; var actual = getQueryResults(query); assertEqual(expected, actual); @@ -844,15 +843,14 @@ function ahuacatlQueryOptimiserInTestSuite () { ruleIsUsed(query); }, - // FIXME this test fails if "DESC" is not included in the query testOverlappingRangesListSkiplist1 : function () { for (var i = 1; i < 100; ++i) { c.save({ value: i }); } c.ensureSkiplist("value"); - var query = "FOR x IN " + cn + " FILTER (x.value > 3 || x.value < 90) SORT x.value DESC RETURN x.value"; + var query = "FOR x IN " + cn + " FILTER (x.value > 3 || x.value < 90) SORT x.value RETURN x.value"; var expected = [ ]; - for (i = 99; i > 0; i--) { + for (i = 1; i < 100; i++) { expected.push(i); } var actual = getQueryResults(query); @@ -865,7 +863,7 @@ function ahuacatlQueryOptimiserInTestSuite () { c.save({ value: i }); } c.ensureHashIndex("value"); - var query = "FOR x IN " + cn + " FILTER (x.value > 3 || x.value < 90) SORT x.value DESC RETURN x.value"; + var query = "FOR x IN " + cn + " FILTER (x.value > 3 || x.value < 90) RETURN x.value"; ruleIsNotUsed(query); }, @@ -887,7 +885,7 @@ function ahuacatlQueryOptimiserInTestSuite () { c.save({ value: i }); } c.ensureHashIndex("value"); - var query = "FOR i IN " + cn + " FILTER i.value == 8 || i.value <= 7 SORT i.value DESC RETURN i.value"; + var query = "FOR i IN " + cn + " FILTER i.value == 8 || i.value <= 7 RETURN i.value"; ruleIsNotUsed(query); }, @@ -948,11 +946,11 @@ function ahuacatlQueryOptimiserInTestSuite () { c.ensureSkiplist("value1", "value2"); var query = "FOR x in " + cn + " FILTER (x.value1 in [4,5] && x.value2 <= 2) || (x.value1 in [1,6] && x.value2 == 9) RETURN x.value1"; - var expected = [ 4, 4, 5, 5, 1, 6 ]; + var expected = [ 1, 4, 4, 5, 5, 6 ]; var actual = getQueryResults(query); assertEqual(expected, actual); ruleIsUsed(query); - }, //TODO more test like this . . . + }, testSkiplistMoreThanOne2 : function () { for (var i = 1; i <= 100; i++) { @@ -963,7 +961,7 @@ function ahuacatlQueryOptimiserInTestSuite () { c.ensureSkiplist("value1", "value2"); var query = "FOR x in " + cn + " FILTER (x.value1 in [4,5] && x.value2 <= PASSTHRU(2)) || (x.value1 in [1,6] && x.value2 == 9) RETURN x.value1"; - var expected = [ 4, 4, 5, 5, 1, 6 ]; + var expected = [ 1, 4, 4, 5, 5, 6 ]; var actual = getQueryResults(query); assertEqual(expected, actual); ruleIsUsed(query); @@ -978,12 +976,12 @@ function ahuacatlQueryOptimiserInTestSuite () { c.ensureSkiplist("value1", "value2"); var query = "FOR x in " + cn + " FILTER (x.value1 in [4,5] && x.value2 <= PASSTHRU(2)) || (x.value1 in [PASSTHRU(1),6] && x.value2 == 9) RETURN x.value1"; - var expected = [ 4, 4, 5, 5, 1, 6 ]; + var expected = [ 1, 4, 4, 5, 5, 6 ]; var actual = getQueryResults(query); assertEqual(expected, actual); ruleIsUsed(query); }, - //TODO add SORT here + testSkiplistMoreThanOne4 : function () { for (var i = 1;i <= 100;i++) { for (var j = 1; j <= 100; j++) { @@ -995,15 +993,16 @@ function ahuacatlQueryOptimiserInTestSuite () { c.ensureSkiplist("value1", "value2", "value3", "value4"); var query = "FOR x IN " + cn + " FILTER (x.value1 IN [1, 2, 3] && x.value1 IN [2, 3, 4] && x.value2 == 10 && x.value3 <= 20) || (x.value1 == 1 && x.value2 == 2 && x.value3 >= 0 && x.value3 <= 6 && x.value4 in ['somethings2', 'somethings4'] ) RETURN [x.value1, x.value2, x.value3, x.value4]"; - var expected = [ [ 2, 10, 12, "somethings20" ], + var expected = [ + [ 1, 2, 3, "somethings4" ], + [ 2, 10, 12, "somethings20" ], [ 3, 10, 13, "somethings20" ], - [ 1, 2, 3, "somethings4" ] ]; + ]; var actual = getQueryResults(query); assertEqual(expected, actual); ruleIsUsed(query); }, - //TODO add SORT here testSkiplistMoreThanOne5 : function () { for (var i = 1;i <= 100;i++) { @@ -1019,17 +1018,17 @@ function ahuacatlQueryOptimiserInTestSuite () { var query = "FOR x IN " + cn + " FILTER (x.value1 IN [PASSTHRU(1), PASSTHRU(2), PASSTHRU(3)] && x.value1 IN [2, 3, 4] && x.value2 == PASSTHRU(10) && x.value3 <= 2) || (x.value1 == 1 && x.value2 == 2 && x.value3 >= 0 && x.value3 == PASSTHRU(6) && x.value4 in ['somethings2', PASSTHRU('somethings4')] ) RETURN [x.value1, x.value2, x.value3, x.value4]"; var expected = [ - [ 2, 10, 1, "somethings20" ], - [ 2, 10, 2, "somethings20" ], - [ 3, 10, 1, "somethings20" ], - [ 3, 10, 2, "somethings20" ], - [ 1, 2, 6, "somethings4" ] ]; + [ 1, 2, 6, "somethings4" ] , + [ 2, 10, 1, "somethings20" ], + [ 2, 10, 2, "somethings20" ], + [ 3, 10, 1, "somethings20" ], + [ 3, 10, 2, "somethings20" ] + ]; var actual = getQueryResults(query); assertEqual(expected, actual); ruleIsUsed(query); }, - //TODO add SORT here testSkiplistMoreThanOne6 : function () { for (var i = 1;i <= 100;i++) { @@ -1045,11 +1044,12 @@ function ahuacatlQueryOptimiserInTestSuite () { var query = "FOR x IN " + cn + " FILTER (x.value1 IN [PASSTHRU(1), PASSTHRU(2), PASSTHRU(3)] && x.value1 IN PASSTHRU([2, 3, 4]) && x.value2 == PASSTHRU(10) && x.value3 <= 2) || (x.value1 == 1 && x.value2 == 2 && x.value3 >= 0 && x.value3 == PASSTHRU(6) && x.value4 in ['somethings2', PASSTHRU('somethings4')] ) RETURN [x.value1, x.value2, x.value3, x.value4]"; var expected = [ - [ 2, 10, 1, "somethings20" ], - [ 2, 10, 2, "somethings20" ], - [ 3, 10, 1, "somethings20" ], - [ 3, 10, 2, "somethings20" ], - [ 1, 2, 6, "somethings4" ] ]; + [ 1, 2, 6, "somethings4" ] , + [ 2, 10, 1, "somethings20" ], + [ 2, 10, 2, "somethings20" ], + [ 3, 10, 1, "somethings20" ], + [ 3, 10, 2, "somethings20" ] + ]; var actual = getQueryResults(query); assertEqual(expected, actual); ruleIsUsed(query);