1
0
Fork 0

Speed up `db.<collection>.any()` for the RocksDB storage engine (#5934)

This commit is contained in:
Jan 2018-07-24 18:03:22 +02:00 committed by GitHub
parent 7c61ad92f5
commit 03933e6e02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 57 additions and 16 deletions

View File

@ -1,6 +1,21 @@
v3.3.13 (XXXX-XX-XX)
--------------------
* put an upper bound on the number of documents to be scanned when using
`db.<collection>.any()` in the RocksDB storage engine
previous versions of ArangoDB did a scan of a random amount of documents in
the collection, up to the total number of documents available. this produced
a random selection with a good quality, but needed to scan half the number
of documents in the collection on average.
The new version will only scan up to 500 documents, so it produces a less
random result, but will be a lot faster especially for large collections.
The implementation of `any()` for the MMFiles engine remains unchanged. The
MMFiles engine will pick a random document from the entire range of the
in-memory primary index without performing scans.
* return an empty result set instead of an "out of memory" exception when
querying the geo index with invalid (out of range) coordinates

View File

@ -193,26 +193,52 @@ RocksDBAnyIndexIterator::RocksDBAnyIndexIterator(
TRI_ASSERT(_iterator);
_total = col->numberDocuments(trx);
uint64_t off = RandomGenerator::interval(_total - 1);
_forward = RandomGenerator::interval(uint16_t(1)) ? true : false;
//initial seek
if (_total > 0) {
if (off <= _total / 2) {
_iterator->Seek(_bounds.start());
while (_iterator->Valid() && off-- > 0) {
_iterator->Next();
uint64_t steps = RandomGenerator::interval(_total - 1) % 500;
auto initialKey = RocksDBKey();
initialKey.constructDocument(
static_cast<RocksDBCollection*>(col->getPhysical())->objectId(),
LocalDocumentId(RandomGenerator::interval(UINT64_MAX))
);
_iterator->Seek(initialKey.string());
if (checkIter()) {
if (_forward) {
while (steps-- > 0) {
_iterator->Next();
if(!checkIter()) { break; }
}
} else {
while (steps-- > 0) {
_iterator->Prev();
if(!checkIter()) { break; }
}
}
} else {
off = _total - (off + 1);
_iterator->SeekForPrev(_bounds.end());
while (_iterator->Valid() && off-- > 0) {
_iterator->Prev();
}
}
if (!_iterator->Valid() || outOfRange()) {
_iterator->Seek(_bounds.start());
}
}
}
bool RocksDBAnyIndexIterator::checkIter(){
if ( /* not valid */ !_iterator->Valid() ||
/* out of range forward */ ( _forward && _cmp->Compare(_iterator->key(), _bounds.end()) > 0) ||
/* out of range backward */ (!_forward && _cmp->Compare(_iterator->key(), _bounds.start()) < 0) ) {
if (_forward) {
_iterator->Seek(_bounds.start());
} else {
_iterator->SeekForPrev(_bounds.end());
}
if (!_iterator->Valid()) {
return false;
}
}
return true;
}
bool RocksDBAnyIndexIterator::next(LocalDocumentIdCallback const& cb, size_t limit) {
TRI_ASSERT(_trx->state()->isRunning());

View File

@ -87,14 +87,14 @@ class RocksDBAnyIndexIterator final : public IndexIterator {
private:
bool outOfRange() const;
static uint64_t newOffset(LogicalCollection* collection,
transaction::Methods* trx);
bool checkIter();
rocksdb::Comparator const* _cmp;
std::unique_ptr<rocksdb::Iterator> _iterator;
RocksDBKeyBounds const _bounds;
uint64_t _total;
uint64_t _returned;
bool _forward;
};
/// @brief iterates over the primary index and does lookups