diff --git a/arangod/Aql/ExecutionBlock.cpp b/arangod/Aql/ExecutionBlock.cpp index ea712b1df7..02482007d1 100644 --- a/arangod/Aql/ExecutionBlock.cpp +++ b/arangod/Aql/ExecutionBlock.cpp @@ -846,23 +846,73 @@ IndexRangeBlock::~IndexRangeBlock () { bool IndexRangeBlock::readIndex () { - // TODO: adjust to make callable multiple times with proper cleanup - // TODO: in variable case, evaluate all expressions, work actual values - // into constant bound and finally use these + // This is either called from initialize if all bounds are constant, + // in this case it is never called again. If there is at least one + // variable bound, then readIndex is called once for every item coming + // in from our dependency. In that case, it is guaranteed that + // _buffer is not empty, in particular _buffer.front() is defined + // _pos points to a position in _buffer.front() + // Therefore, we can use the register values in _buffer.front() in row + // _pos to evaluate the variable bounds. if (_documents.empty()) { _documents.reserve(DefaultBatchSize); } + else { + _documents.clear(); + } - _documents.clear(); - auto en = static_cast(getPlanNode()); - + IndexOrCondition const* condition = &en->_ranges; + + // Find out about the actual values for the bounds in the variable bound case: + if (! _allBoundsConstant) { +#if 0 + auto newCondition = new IndexOrCondition(); + try { + newCondition->push_back(std::vector()); + size_t posInExpressions = 0; + for (auto r : en->_ranges.at(0)) { + // First create a new RangeInfo containing only the constant + // low and high bound of r: + RangeInfo actualRange(r._var, r._attr, r._lowConst, r._highConst, + r.equality); + // Now work the actual values of the variable lows and highs into + // this constant range: + Expression* e; + for (auto l : r._lows) { + e = &(_allVariableBoundExpressions[posInExpressions++]); + //AqlValue a = e.execute(...); + if (a._type == AqlValue::JSON) { + //RangeInfoBound b( + //actualRange.andCombineLowerBounds( + + } + else { + } + } + for (auto h : r._highs) { + e = &(_allVariableBoundExpressions[posInExpressions++]); + //... + } + + newCondition->at(0).push_back(actualRange); + } + + } + catch (...) { + delete newCondition; + throw; + } + condition = newCondition; +#endif + } + if (en->_index->_type == TRI_IDX_TYPE_SKIPLIST_INDEX) { - readSkiplistIndex(); + readSkiplistIndex(*condition); } else if (en->_index->_type == TRI_IDX_TYPE_HASH_INDEX) { - readHashIndex(); + readHashIndex(*condition); } else { TRI_ASSERT(false); @@ -879,8 +929,9 @@ int IndexRangeBlock::initialize () { } } - // TODO: if all constant, then do it here, otherwise not - readIndex(); + if (_allBoundsConstant) { + readIndex(); + } return res; } @@ -915,10 +966,16 @@ AqlItemBlock* IndexRangeBlock::getSome (size_t atLeast, return nullptr; } _pos = 0; // this is in the first block + + // This is a new item, so let's read the index if bounds are variable: + if (! _allBoundsConstant) { + readIndex(); + } + _posInDocs = 0; // position in _documents . . . } - // If we get here, we do have _buffer.front() + // If we get here, we do have _buffer.front() and _pos points into it AqlItemBlock* cur = _buffer.front(); size_t const curRegs = cur->getNrRegs(); @@ -956,17 +1013,22 @@ AqlItemBlock* IndexRangeBlock::getSome (size_t atLeast, // Advance read position: if (_posInDocs >= _documents.size()) { - // we have exhausted our local documents buffer - _posInDocs = 0; + // we have exhausted our local documents buffer, if (++_pos >= cur->size()) { _buffer.pop_front(); // does not throw delete cur; _pos = 0; } - - // TODO: if bounds are variable, then repeat readIndex here + // let's read the index if bounds are variable: + if (! _buffer.empty() && ! _allBoundsConstant) { + readIndex(); + } + _posInDocs = 0; + + // If _buffer is empty, then we will fetch a new block in the next call + // and then read the index. } // Clear out registers no longer needed later: @@ -991,31 +1053,47 @@ size_t IndexRangeBlock::skipSome (size_t atLeast, if (_buffer.empty()) { if (! ExecutionBlock::getBlock(DefaultBatchSize, DefaultBatchSize)) { _done = true; - return 0; + return skipped; } _pos = 0; // this is in the first block + + // This is a new item, so let's read the index if bounds are variable: + if (! _allBoundsConstant) { + readIndex(); + } + + _posInDocs = 0; // position in _documents . . . } - // If we get here, we do have _buffer.front() + // If we get here, we do have _buffer.front() and _pos points into it AqlItemBlock* cur = _buffer.front(); - _posInDocs += std::min(atMost, _documents.size() - _posInDocs); + size_t available = _documents.size() - _posInDocs; + size_t toSkip = std::min(atMost - skipped, available); + _posInDocs += toSkip; + skipped += toSkip; - if (atMost < _documents.size() - _posInDocs){ - // eat just enough of _documents . . . - _posInDocs += atMost; - skipped = atMost; - } - else { - // eat the whole of the current inVariable and proceed . . . - skipped += (_documents.size() - _posInDocs); - _posInDocs = 0; - delete cur; - _buffer.pop_front(); - _pos = 0; + // Advance read position: + if (_posInDocs >= _documents.size()) { + // we have exhausted our local documents buffer, + if (++_pos >= cur->size()) { + _buffer.pop_front(); // does not throw + delete cur; + _pos = 0; + } + + // let's read the index if bounds are variable: + if (! _buffer.empty() && ! _allBoundsConstant) { + readIndex(); + } + _posInDocs = 0; + + // If _buffer is empty, then we will fetch a new block in the next round + // and then read the index. } + } return skipped; @@ -1053,13 +1131,11 @@ size_t IndexRangeBlock::skipSome (size_t atLeast, // (i.e. the 1 in x.c >= 1) cannot be lists or arrays. // -void IndexRangeBlock::readSkiplistIndex () { +void IndexRangeBlock::readSkiplistIndex (IndexOrCondition const& ranges) { auto en = static_cast(getPlanNode()); TRI_index_t* idx = en->_index; TRI_ASSERT(idx != nullptr); - std::vector> const& ranges = en->_ranges; - TRI_shaper_t* shaper = _collection->documentCollection()->getShaper(); TRI_ASSERT(shaper != nullptr); @@ -1069,8 +1145,10 @@ void IndexRangeBlock::readSkiplistIndex () { size_t i = 0; for (;i < ranges.at(0).size(); i++) { // ranges.at(0) corresponds to a prefix of idx->_fields . . . - // TODO only doing 1 dim case at the moment . . . + // TODO only doing case with a single OR (i.e. ranges.size()==1 at the + // moment ... auto range = ranges.at(0).at(i); + TRI_ASSERT(range.isConstant()); if (range.is1ValueRangeInfo()) { // it's an equality . . . parameters(range._lowConst.bound().copy()); } @@ -1140,7 +1218,7 @@ void IndexRangeBlock::readSkiplistIndex () { TRI_FreeSkiplistIterator(skiplistIterator); } -void IndexRangeBlock::readHashIndex () { +void IndexRangeBlock::readHashIndex (IndexOrCondition const& ranges) { auto en = static_cast(getPlanNode()); TRI_index_t* idx = en->_index; TRI_ASSERT(idx != nullptr); @@ -1179,8 +1257,8 @@ void IndexRangeBlock::readHashIndex () { char const* name = TRI_AttributeNameShapePid(shaper, pid); - for (auto x: en->_ranges.at(0)) { - if (x._attr == std::string(name)){//found attribute + for (auto x: ranges.at(0)) { + if (x._attr == std::string(name)){ //found attribute auto shaped = TRI_ShapedJsonJson(shaper, x._lowConst.bound().json(), false); // here x->_low->_bound = x->_high->_bound searchValue._values[i] = *shaped; diff --git a/arangod/Aql/ExecutionBlock.h b/arangod/Aql/ExecutionBlock.h index b4487f80c0..c640fa471b 100644 --- a/arangod/Aql/ExecutionBlock.h +++ b/arangod/Aql/ExecutionBlock.h @@ -652,13 +652,13 @@ public: /// @brief read using a skiplist index //////////////////////////////////////////////////////////////////////////////// - void readSkiplistIndex (); + void readSkiplistIndex (IndexOrCondition const&); //////////////////////////////////////////////////////////////////////////////// /// @brief read using a hash index //////////////////////////////////////////////////////////////////////////////// - void readHashIndex (); + void readHashIndex (IndexOrCondition const&); // ----------------------------------------------------------------------------- // --SECTION-- private variables diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 1b4e1656f6..08e6afddf9 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -806,7 +806,7 @@ int triagens::aql::useIndexRange (Optimizer* opt, class SortAnalysis { using ECN = triagens::aql::EnumerateCollectionNode; - typedef std::pair Range_IndexPair; + typedef std::pair Range_IndexPair; struct sortNodeData { bool ASC; @@ -902,7 +902,7 @@ public: Range_IndexPair getAttrsForVariableName (std::string &variableName) { ECN::IndexMatchVec v; - RangeInfoVec rangeInfo; + IndexOrCondition rangeInfo; for (size_t j = 0; j < _sortNodeData.size(); j ++) { if (_sortNodeData[j]->variableName != variableName) { diff --git a/arangod/Aql/RangeInfo.h b/arangod/Aql/RangeInfo.h index 26a928dcd5..91a1638b74 100644 --- a/arangod/Aql/RangeInfo.h +++ b/arangod/Aql/RangeInfo.h @@ -590,15 +590,15 @@ namespace triagens { }; //////////////////////////////////////////////////////////////////////////////// -/// @brief RangeInfoVec, type for vector of vector of RangeInfo. The meaning +/// @brief IndexOrCondition, type for vector of vector of RangeInfo. The meaning /// is: the outer vector means an implicit "OR" between the entries. Each -/// entry is a vector which means an implicit "AND" between the individual -/// RangeInfo objects. This is actually the disjunctive normal form of -/// all conditions, therefore, every boolean expression of ANDs and ORs -/// can be expressed in this form. +/// entry is a vector whose entries correspond to the attributes of the +/// index. They are a RangeInfo specifying the condition for that attribute. +/// Note that in the variable range bound case one RangeInfo can contain +/// multiple conditions which are implicitly "AND"ed. //////////////////////////////////////////////////////////////////////////////// - typedef std::vector> RangeInfoVec; + typedef std::vector> IndexOrCondition; } }