From 03933e6e02e863d151dbbbc0bd5c9548c43d18fa Mon Sep 17 00:00:00 2001 From: Jan Date: Tue, 24 Jul 2018 18:03:22 +0200 Subject: [PATCH] Speed up `db..any()` for the RocksDB storage engine (#5934) --- CHANGELOG | 15 ++++++ arangod/RocksDBEngine/RocksDBIterators.cpp | 54 ++++++++++++++++------ arangod/RocksDBEngine/RocksDBIterators.h | 4 +- 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 1b110953b7..740a1cb070 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,21 @@ v3.3.13 (XXXX-XX-XX) -------------------- +* put an upper bound on the number of documents to be scanned when using + `db..any()` in the RocksDB storage engine + + previous versions of ArangoDB did a scan of a random amount of documents in + the collection, up to the total number of documents available. this produced + a random selection with a good quality, but needed to scan half the number + of documents in the collection on average. + + The new version will only scan up to 500 documents, so it produces a less + random result, but will be a lot faster especially for large collections. + + The implementation of `any()` for the MMFiles engine remains unchanged. The + MMFiles engine will pick a random document from the entire range of the + in-memory primary index without performing scans. + * return an empty result set instead of an "out of memory" exception when querying the geo index with invalid (out of range) coordinates diff --git a/arangod/RocksDBEngine/RocksDBIterators.cpp b/arangod/RocksDBEngine/RocksDBIterators.cpp index 9b7797af37..4f30cbcdb5 100644 --- a/arangod/RocksDBEngine/RocksDBIterators.cpp +++ b/arangod/RocksDBEngine/RocksDBIterators.cpp @@ -193,26 +193,52 @@ RocksDBAnyIndexIterator::RocksDBAnyIndexIterator( TRI_ASSERT(_iterator); _total = col->numberDocuments(trx); - uint64_t off = RandomGenerator::interval(_total - 1); + _forward = RandomGenerator::interval(uint16_t(1)) ? true : false; + + //initial seek if (_total > 0) { - if (off <= _total / 2) { - _iterator->Seek(_bounds.start()); - while (_iterator->Valid() && off-- > 0) { - _iterator->Next(); + uint64_t steps = RandomGenerator::interval(_total - 1) % 500; + auto initialKey = RocksDBKey(); + initialKey.constructDocument( + static_cast(col->getPhysical())->objectId(), + LocalDocumentId(RandomGenerator::interval(UINT64_MAX)) + ); + _iterator->Seek(initialKey.string()); + + if (checkIter()) { + if (_forward) { + while (steps-- > 0) { + _iterator->Next(); + if(!checkIter()) { break; } + } + } else { + while (steps-- > 0) { + _iterator->Prev(); + if(!checkIter()) { break; } + } } - } else { - off = _total - (off + 1); - _iterator->SeekForPrev(_bounds.end()); - while (_iterator->Valid() && off-- > 0) { - _iterator->Prev(); - } - } - if (!_iterator->Valid() || outOfRange()) { - _iterator->Seek(_bounds.start()); } } } +bool RocksDBAnyIndexIterator::checkIter(){ + if ( /* not valid */ !_iterator->Valid() || + /* out of range forward */ ( _forward && _cmp->Compare(_iterator->key(), _bounds.end()) > 0) || + /* out of range backward */ (!_forward && _cmp->Compare(_iterator->key(), _bounds.start()) < 0) ) { + + if (_forward) { + _iterator->Seek(_bounds.start()); + } else { + _iterator->SeekForPrev(_bounds.end()); + } + + if (!_iterator->Valid()) { + return false; + } + } + return true; +} + bool RocksDBAnyIndexIterator::next(LocalDocumentIdCallback const& cb, size_t limit) { TRI_ASSERT(_trx->state()->isRunning()); diff --git a/arangod/RocksDBEngine/RocksDBIterators.h b/arangod/RocksDBEngine/RocksDBIterators.h index 6228e9956a..0668af38e6 100644 --- a/arangod/RocksDBEngine/RocksDBIterators.h +++ b/arangod/RocksDBEngine/RocksDBIterators.h @@ -87,14 +87,14 @@ class RocksDBAnyIndexIterator final : public IndexIterator { private: bool outOfRange() const; - static uint64_t newOffset(LogicalCollection* collection, - transaction::Methods* trx); + bool checkIter(); rocksdb::Comparator const* _cmp; std::unique_ptr _iterator; RocksDBKeyBounds const _bounds; uint64_t _total; uint64_t _returned; + bool _forward; }; /// @brief iterates over the primary index and does lookups