mirror of https://gitee.com/bigwinds/arangodb
Speed up `db.<collection>.any()` for the RocksDB storage engine (#5934)
This commit is contained in:
parent
7c61ad92f5
commit
03933e6e02
15
CHANGELOG
15
CHANGELOG
|
@ -1,6 +1,21 @@
|
|||
v3.3.13 (XXXX-XX-XX)
|
||||
--------------------
|
||||
|
||||
* put an upper bound on the number of documents to be scanned when using
|
||||
`db.<collection>.any()` in the RocksDB storage engine
|
||||
|
||||
previous versions of ArangoDB did a scan of a random amount of documents in
|
||||
the collection, up to the total number of documents available. this produced
|
||||
a random selection with a good quality, but needed to scan half the number
|
||||
of documents in the collection on average.
|
||||
|
||||
The new version will only scan up to 500 documents, so it produces a less
|
||||
random result, but will be a lot faster especially for large collections.
|
||||
|
||||
The implementation of `any()` for the MMFiles engine remains unchanged. The
|
||||
MMFiles engine will pick a random document from the entire range of the
|
||||
in-memory primary index without performing scans.
|
||||
|
||||
* return an empty result set instead of an "out of memory" exception when
|
||||
querying the geo index with invalid (out of range) coordinates
|
||||
|
||||
|
|
|
@ -193,26 +193,52 @@ RocksDBAnyIndexIterator::RocksDBAnyIndexIterator(
|
|||
TRI_ASSERT(_iterator);
|
||||
|
||||
_total = col->numberDocuments(trx);
|
||||
uint64_t off = RandomGenerator::interval(_total - 1);
|
||||
_forward = RandomGenerator::interval(uint16_t(1)) ? true : false;
|
||||
|
||||
//initial seek
|
||||
if (_total > 0) {
|
||||
if (off <= _total / 2) {
|
||||
_iterator->Seek(_bounds.start());
|
||||
while (_iterator->Valid() && off-- > 0) {
|
||||
_iterator->Next();
|
||||
uint64_t steps = RandomGenerator::interval(_total - 1) % 500;
|
||||
auto initialKey = RocksDBKey();
|
||||
initialKey.constructDocument(
|
||||
static_cast<RocksDBCollection*>(col->getPhysical())->objectId(),
|
||||
LocalDocumentId(RandomGenerator::interval(UINT64_MAX))
|
||||
);
|
||||
_iterator->Seek(initialKey.string());
|
||||
|
||||
if (checkIter()) {
|
||||
if (_forward) {
|
||||
while (steps-- > 0) {
|
||||
_iterator->Next();
|
||||
if(!checkIter()) { break; }
|
||||
}
|
||||
} else {
|
||||
while (steps-- > 0) {
|
||||
_iterator->Prev();
|
||||
if(!checkIter()) { break; }
|
||||
}
|
||||
}
|
||||
} else {
|
||||
off = _total - (off + 1);
|
||||
_iterator->SeekForPrev(_bounds.end());
|
||||
while (_iterator->Valid() && off-- > 0) {
|
||||
_iterator->Prev();
|
||||
}
|
||||
}
|
||||
if (!_iterator->Valid() || outOfRange()) {
|
||||
_iterator->Seek(_bounds.start());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool RocksDBAnyIndexIterator::checkIter(){
|
||||
if ( /* not valid */ !_iterator->Valid() ||
|
||||
/* out of range forward */ ( _forward && _cmp->Compare(_iterator->key(), _bounds.end()) > 0) ||
|
||||
/* out of range backward */ (!_forward && _cmp->Compare(_iterator->key(), _bounds.start()) < 0) ) {
|
||||
|
||||
if (_forward) {
|
||||
_iterator->Seek(_bounds.start());
|
||||
} else {
|
||||
_iterator->SeekForPrev(_bounds.end());
|
||||
}
|
||||
|
||||
if (!_iterator->Valid()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RocksDBAnyIndexIterator::next(LocalDocumentIdCallback const& cb, size_t limit) {
|
||||
TRI_ASSERT(_trx->state()->isRunning());
|
||||
|
||||
|
|
|
@ -87,14 +87,14 @@ class RocksDBAnyIndexIterator final : public IndexIterator {
|
|||
|
||||
private:
|
||||
bool outOfRange() const;
|
||||
static uint64_t newOffset(LogicalCollection* collection,
|
||||
transaction::Methods* trx);
|
||||
bool checkIter();
|
||||
|
||||
rocksdb::Comparator const* _cmp;
|
||||
std::unique_ptr<rocksdb::Iterator> _iterator;
|
||||
RocksDBKeyBounds const _bounds;
|
||||
uint64_t _total;
|
||||
uint64_t _returned;
|
||||
bool _forward;
|
||||
};
|
||||
|
||||
/// @brief iterates over the primary index and does lookups
|
||||
|
|
Loading…
Reference in New Issue