1
0
Fork 0

Speed up `db.<collection>.any()` for the RocksDB storage engine (#5934)

This commit is contained in:
Jan 2018-07-24 18:03:22 +02:00 committed by GitHub
parent 7c61ad92f5
commit 03933e6e02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 57 additions and 16 deletions

View File

@ -1,6 +1,21 @@
v3.3.13 (XXXX-XX-XX) v3.3.13 (XXXX-XX-XX)
-------------------- --------------------
* put an upper bound on the number of documents to be scanned when using
`db.<collection>.any()` in the RocksDB storage engine
previous versions of ArangoDB did a scan of a random amount of documents in
the collection, up to the total number of documents available. this produced
a random selection with a good quality, but needed to scan half the number
of documents in the collection on average.
The new version will only scan up to 500 documents, so it produces a less
random result, but will be a lot faster especially for large collections.
The implementation of `any()` for the MMFiles engine remains unchanged. The
MMFiles engine will pick a random document from the entire range of the
in-memory primary index without performing scans.
* return an empty result set instead of an "out of memory" exception when * return an empty result set instead of an "out of memory" exception when
querying the geo index with invalid (out of range) coordinates querying the geo index with invalid (out of range) coordinates

View File

@ -193,26 +193,52 @@ RocksDBAnyIndexIterator::RocksDBAnyIndexIterator(
TRI_ASSERT(_iterator); TRI_ASSERT(_iterator);
_total = col->numberDocuments(trx); _total = col->numberDocuments(trx);
uint64_t off = RandomGenerator::interval(_total - 1); _forward = RandomGenerator::interval(uint16_t(1)) ? true : false;
//initial seek
if (_total > 0) { if (_total > 0) {
if (off <= _total / 2) { uint64_t steps = RandomGenerator::interval(_total - 1) % 500;
_iterator->Seek(_bounds.start()); auto initialKey = RocksDBKey();
while (_iterator->Valid() && off-- > 0) { initialKey.constructDocument(
_iterator->Next(); static_cast<RocksDBCollection*>(col->getPhysical())->objectId(),
LocalDocumentId(RandomGenerator::interval(UINT64_MAX))
);
_iterator->Seek(initialKey.string());
if (checkIter()) {
if (_forward) {
while (steps-- > 0) {
_iterator->Next();
if(!checkIter()) { break; }
}
} else {
while (steps-- > 0) {
_iterator->Prev();
if(!checkIter()) { break; }
}
} }
} else {
off = _total - (off + 1);
_iterator->SeekForPrev(_bounds.end());
while (_iterator->Valid() && off-- > 0) {
_iterator->Prev();
}
}
if (!_iterator->Valid() || outOfRange()) {
_iterator->Seek(_bounds.start());
} }
} }
} }
bool RocksDBAnyIndexIterator::checkIter(){
if ( /* not valid */ !_iterator->Valid() ||
/* out of range forward */ ( _forward && _cmp->Compare(_iterator->key(), _bounds.end()) > 0) ||
/* out of range backward */ (!_forward && _cmp->Compare(_iterator->key(), _bounds.start()) < 0) ) {
if (_forward) {
_iterator->Seek(_bounds.start());
} else {
_iterator->SeekForPrev(_bounds.end());
}
if (!_iterator->Valid()) {
return false;
}
}
return true;
}
bool RocksDBAnyIndexIterator::next(LocalDocumentIdCallback const& cb, size_t limit) { bool RocksDBAnyIndexIterator::next(LocalDocumentIdCallback const& cb, size_t limit) {
TRI_ASSERT(_trx->state()->isRunning()); TRI_ASSERT(_trx->state()->isRunning());

View File

@ -87,14 +87,14 @@ class RocksDBAnyIndexIterator final : public IndexIterator {
private: private:
bool outOfRange() const; bool outOfRange() const;
static uint64_t newOffset(LogicalCollection* collection, bool checkIter();
transaction::Methods* trx);
rocksdb::Comparator const* _cmp; rocksdb::Comparator const* _cmp;
std::unique_ptr<rocksdb::Iterator> _iterator; std::unique_ptr<rocksdb::Iterator> _iterator;
RocksDBKeyBounds const _bounds; RocksDBKeyBounds const _bounds;
uint64_t _total; uint64_t _total;
uint64_t _returned; uint64_t _returned;
bool _forward;
}; };
/// @brief iterates over the primary index and does lookups /// @brief iterates over the primary index and does lookups