1
0
Fork 0

handle incomplete reads (#9019)

This commit is contained in:
Jan 2019-05-17 11:17:51 +02:00 committed by GitHub
parent b5e3fcf06e
commit 4d86dd5fa8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 127 additions and 44 deletions

View File

@ -1,6 +1,17 @@
v3.4.6 (XXXX-XX-XX)
-------------------
* added error code 1240 "incomplete read" for RocksDB-based reads which cannot retrieve
documents due to the RocksDB block cache being size-restricted (with size limit enforced)
and uncompressed data blocks not fitting into the block cache
The error can only occur for collection or index scans with the RocksDB storage engine
when the RocksDB block cache is used and set to a very small size, plus its maximum size is
enforced by setting the `--rocksdb.enforce-block-cache-size-limit` option to `true`.
Previously these incomplete reads could have been ignored silently, making collection or
index scans return less documents than there were actually present.
* fixed internal issue #3918: added optional second parameter "withId" to AQL
function PREGEL_RESULT

View File

@ -63,6 +63,14 @@ using namespace arangodb::basics;
namespace {
constexpr bool EdgeIndexFillBlockCache = false;
void checkIteratorStatus(rocksdb::Iterator const* iterator) {
auto s = iterator->status();
if (!s.ok()) {
THROW_ARANGO_EXCEPTION(arangodb::rocksutils::convertStatus(s));
}
}
}
RocksDBEdgeIndexWarmupTask::RocksDBEdgeIndexWarmupTask(
@ -476,6 +484,9 @@ void RocksDBEdgeIndexIterator::lookupInRocksDB(StringRef fromTo) {
_iterator->Next();
}
_builder.close();
::checkIteratorStatus(_iterator.get());
if (cc != nullptr) {
// TODO Add cache retry on next call
// Now we have something in _inplaceMemory.

View File

@ -46,6 +46,41 @@
using namespace arangodb;
/// El Cheapo index iterator
class RocksDBFulltextIndexIterator : public IndexIterator {
public:
RocksDBFulltextIndexIterator(LogicalCollection* collection, transaction::Methods* trx,
std::set<LocalDocumentId>&& docs)
: IndexIterator(collection, trx), _docs(std::move(docs)), _pos(_docs.begin()) {}
~RocksDBFulltextIndexIterator() {}
char const* typeName() const override { return "fulltext-index-iterator"; }
bool next(LocalDocumentIdCallback const& cb, size_t limit) override {
TRI_ASSERT(limit > 0);
while (_pos != _docs.end() && limit > 0) {
cb(*_pos);
++_pos;
limit--;
}
return _pos != _docs.end();
}
void reset() override { _pos = _docs.begin(); }
void skip(uint64_t count, uint64_t& skipped) override {
while (_pos != _docs.end() && skipped < count) {
++_pos;
skipped++;
}
}
private:
std::set<LocalDocumentId> const _docs;
std::set<LocalDocumentId>::iterator _pos;
};
RocksDBFulltextIndex::RocksDBFulltextIndex(TRI_idx_iid_t iid,
arangodb::LogicalCollection& collection,
arangodb::velocypack::Slice const& info)

View File

@ -126,41 +126,6 @@ class RocksDBFulltextIndex final : public RocksDBIndex {
std::set<LocalDocumentId>& resultSet);
};
/// El Cheapo index iterator
class RocksDBFulltextIndexIterator : public IndexIterator {
public:
RocksDBFulltextIndexIterator(LogicalCollection* collection, transaction::Methods* trx,
std::set<LocalDocumentId>&& docs)
: IndexIterator(collection, trx), _docs(std::move(docs)), _pos(_docs.begin()) {}
~RocksDBFulltextIndexIterator() {}
char const* typeName() const override { return "fulltext-index-iterator"; }
bool next(LocalDocumentIdCallback const& cb, size_t limit) override {
TRI_ASSERT(limit > 0);
while (_pos != _docs.end() && limit > 0) {
cb(*_pos);
++_pos;
limit--;
}
return _pos != _docs.end();
}
void reset() override { _pos = _docs.begin(); }
void skip(uint64_t count, uint64_t& skipped) override {
while (_pos != _docs.end() && skipped < count) {
++_pos;
skipped++;
}
}
private:
std::set<LocalDocumentId> const _docs;
std::set<LocalDocumentId>::iterator _pos;
};
} // namespace arangodb
#endif

View File

@ -42,6 +42,17 @@
using namespace arangodb;
namespace {
void checkIteratorStatus(rocksdb::Iterator const* iterator) {
auto s = iterator->status();
if (!s.ok()) {
THROW_ARANGO_EXCEPTION(arangodb::rocksutils::convertStatus(s));
}
}
}
template <typename CMP = geo_index::DocumentsAscending>
class RDBNearIterator final : public IndexIterator {
public:
@ -202,6 +213,8 @@ class RDBNearIterator final : public IndexIterator {
_near.reportFound(documentId, RocksDBValue::centroid(_iter->value()));
_iter->Next();
}
::checkIteratorStatus(_iter.get());
}
_near.didScanIntervals(); // calculate next bounds
@ -429,7 +442,7 @@ Result RocksDBGeoIndex::removeInternal(transaction::Methods* trx, RocksDBMethods
key->constructGeoIndexValue(_objectId, cell.id(), documentId);
res = mthd->Delete(RocksDBColumnFamily::geo(), key.ref());
if (res.fail()) {
return res;
break;
}
}
return res;

View File

@ -35,6 +35,14 @@ using namespace arangodb;
namespace {
constexpr bool AllIteratorFillBlockCache = true;
constexpr bool AnyIteratorFillBlockCache = false;
void checkIteratorStatus(rocksdb::Iterator const* iterator) {
auto s = iterator->status();
if (!s.ok()) {
THROW_ARANGO_EXCEPTION(arangodb::rocksutils::convertStatus(s));
}
}
} // namespace
// ================ All Iterator ==================
@ -81,6 +89,7 @@ bool RocksDBAllIndexIterator::next(LocalDocumentIdCallback const& cb, size_t lim
// No limit no data, or we are actually done. The last call should have
// returned false
TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken
::checkIteratorStatus(_iterator.get());
return false;
}
@ -94,6 +103,7 @@ bool RocksDBAllIndexIterator::next(LocalDocumentIdCallback const& cb, size_t lim
_iterator->Next();
if (!_iterator->Valid() || outOfRange()) {
::checkIteratorStatus(_iterator.get());
return false;
}
}
@ -109,15 +119,17 @@ bool RocksDBAllIndexIterator::nextDocument(IndexIterator::DocumentCallback const
// No limit no data, or we are actually done. The last call should have
// returned false
TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken
::checkIteratorStatus(_iterator.get());
return false;
}
while (limit > 0) {
cb(RocksDBKey::documentId(_iterator->key()), VPackSlice(_iterator->value().data()));
--limit;
_iterator->Next();
if (!_iterator->Valid() || outOfRange()) {
::checkIteratorStatus(_iterator.get());
return false;
}
}
@ -134,6 +146,8 @@ void RocksDBAllIndexIterator::skip(uint64_t count, uint64_t& skipped) {
_iterator->Next();
}
::checkIteratorStatus(_iterator.get());
}
void RocksDBAllIndexIterator::reset() {
@ -191,6 +205,7 @@ bool RocksDBAnyIndexIterator::next(LocalDocumentIdCallback const& cb, size_t lim
// No limit no data, or we are actually done. The last call should have
// returned false
TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken
::checkIteratorStatus(_iterator.get());
return false;
}
@ -200,6 +215,7 @@ bool RocksDBAnyIndexIterator::next(LocalDocumentIdCallback const& cb, size_t lim
_returned++;
_iterator->Next();
if (!_iterator->Valid() || outOfRange()) {
::checkIteratorStatus(_iterator.get());
if (_returned < _total) {
_iterator->Seek(_bounds.start());
continue;
@ -218,6 +234,7 @@ bool RocksDBAnyIndexIterator::nextDocument(IndexIterator::DocumentCallback const
// No limit no data, or we are actually done. The last call should have
// returned false
TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken
::checkIteratorStatus(_iterator.get());
return false;
}
@ -227,6 +244,7 @@ bool RocksDBAnyIndexIterator::nextDocument(IndexIterator::DocumentCallback const
_returned++;
_iterator->Next();
if (!_iterator->Valid() || outOfRange()) {
::checkIteratorStatus(_iterator.get());
if (_returned < _total) {
_iterator->Seek(_bounds.start());
continue;
@ -335,6 +353,9 @@ bool RocksDBGenericIterator::next(GenericCallback const& cb, size_t limit) {
if (limit == 0) {
// No limit no data, or we are actually done. The last call should have
// returned false
if (!_iterator->Valid()) {
::checkIteratorStatus(_iterator.get());
}
return false;
}
@ -353,6 +374,10 @@ bool RocksDBGenericIterator::next(GenericCallback const& cb, size_t limit) {
} else {
_iterator->Next();
}
if (!_iterator->Valid()) {
::checkIteratorStatus(_iterator.get());
}
}
return hasMore();

View File

@ -55,10 +55,19 @@
using namespace arangodb;
namespace {
/// @brief the _key attribute, which, when used in an index, will implictly make
/// it unique
static std::vector<arangodb::basics::AttributeName> const KeyAttribute{
arangodb::basics::AttributeName("_key", false)};
std::vector<arangodb::basics::AttributeName> const KeyAttribute{arangodb::basics::AttributeName("_key", false)};
void checkIteratorStatus(rocksdb::Iterator const* iterator) {
auto s = iterator->status();
if (!s.ok()) {
THROW_ARANGO_EXCEPTION(arangodb::rocksutils::convertStatus(s));
}
}
}
// .............................................................................
// recall for all of the following comparison functions:
@ -199,6 +208,7 @@ bool RocksDBVPackIndexIterator::next(LocalDocumentIdCallback const& cb, size_t l
// No limit no data, or we are actually done. The last call should have
// returned false
TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken
::checkIteratorStatus(_iterator.get());
return false;
}
@ -216,6 +226,7 @@ bool RocksDBVPackIndexIterator::next(LocalDocumentIdCallback const& cb, size_t l
}
if (!_iterator->Valid() || outOfRange()) {
::checkIteratorStatus(_iterator.get());
return false;
}
}
@ -230,6 +241,7 @@ bool RocksDBVPackIndexIterator::nextCovering(DocumentCallback const& cb, size_t
// No limit no data, or we are actually done. The last call should have
// returned false
TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken
::checkIteratorStatus(_iterator.get());
return false;
}
@ -249,6 +261,7 @@ bool RocksDBVPackIndexIterator::nextCovering(DocumentCallback const& cb, size_t
}
if (!_iterator->Valid() || outOfRange()) {
::checkIteratorStatus(_iterator.get());
return false;
}
}
@ -260,6 +273,7 @@ void RocksDBVPackIndexIterator::skip(uint64_t count, uint64_t& skipped) {
TRI_ASSERT(_trx->state()->isRunning());
if (!_iterator->Valid() || outOfRange()) {
::checkIteratorStatus(_iterator.get());
return;
}
@ -275,6 +289,7 @@ void RocksDBVPackIndexIterator::skip(uint64_t count, uint64_t& skipped) {
}
if (!_iterator->Valid() || outOfRange()) {
::checkIteratorStatus(_iterator.get());
return;
}
}
@ -352,7 +367,7 @@ bool RocksDBVPackIndex::implicitlyUnique() const {
for (auto const& it : _fields) {
// if _key is contained in the index fields definition, then the index is
// implicitly unique
if (it == KeyAttribute) {
if (it == ::KeyAttribute) {
return true;
}
}

View File

@ -111,6 +111,7 @@
"ERROR_ARANGO_COLLECTION_TYPE_MISMATCH" : { "code" : 1237, "message" : "collection type mismatch" },
"ERROR_ARANGO_COLLECTION_NOT_LOADED" : { "code" : 1238, "message" : "collection not loaded" },
"ERROR_ARANGO_DOCUMENT_REV_BAD" : { "code" : 1239, "message" : "illegal document revision" },
"ERROR_ARANGO_INCOMPLETE_READ" : { "code" : 1240, "message" : "incomplete read" },
"ERROR_ARANGO_DATAFILE_FULL" : { "code" : 1300, "message" : "datafile full" },
"ERROR_ARANGO_EMPTY_DATADIR" : { "code" : 1301, "message" : "server database directory is empty" },
"ERROR_ARANGO_TRY_AGAIN" : { "code" : 1302, "message" : "operation should be tried again" },

View File

@ -116,8 +116,8 @@ arangodb::Result convertStatus(rocksdb::Status const& status, StatusHint hint,
case rocksdb::Status::Code::kMergeInProgress:
return {TRI_ERROR_ARANGO_MERGE_IN_PROGRESS, std::move(message)};
case rocksdb::Status::Code::kIncomplete:
return {TRI_ERROR_INTERNAL,
prefix + "'incomplete' error in storage engine" + postfix};
return {TRI_ERROR_ARANGO_INCOMPLETE_READ,
prefix + "'incomplete' error in storage engine " + postfix};
case rocksdb::Status::Code::kShutdownInProgress:
return {TRI_ERROR_SHUTTING_DOWN, std::move(message)};
case rocksdb::Status::Code::kTimedOut:
@ -126,7 +126,7 @@ arangodb::Result convertStatus(rocksdb::Status const& status, StatusHint hint,
}
if (status.subcode() == rocksdb::Status::SubCode::kLockTimeout) {
return {TRI_ERROR_ARANGO_CONFLICT,
prefix + "timeout waiting to lock key" + postfix};
prefix + "timeout waiting to lock key " + postfix};
}
return {TRI_ERROR_LOCK_TIMEOUT, std::move(message)};
case rocksdb::Status::Code::kAborted:

View File

@ -133,6 +133,7 @@ ERROR_ARANGO_WRITE_THROTTLE_TIMEOUT,1236,"write-throttling timeout","Will be rai
ERROR_ARANGO_COLLECTION_TYPE_MISMATCH,1237,"collection type mismatch","Will be raised when a collection has a different type from what has been expected."
ERROR_ARANGO_COLLECTION_NOT_LOADED,1238,"collection not loaded","Will be raised when a collection is accessed that is not yet loaded."
ERROR_ARANGO_DOCUMENT_REV_BAD,1239,"illegal document revision","Will be raised when a document revision is corrupt or is missing where needed."
ERROR_ARANGO_INCOMPLETE_READ,1240,"incomplete read","Will be raised by the storage engine when a read cannot be completed."
################################################################################
## Checked ArangoDB storage errors

View File

@ -110,6 +110,7 @@ void TRI_InitializeErrorMessages() {
REG_ERROR(ERROR_ARANGO_COLLECTION_TYPE_MISMATCH, "collection type mismatch");
REG_ERROR(ERROR_ARANGO_COLLECTION_NOT_LOADED, "collection not loaded");
REG_ERROR(ERROR_ARANGO_DOCUMENT_REV_BAD, "illegal document revision");
REG_ERROR(ERROR_ARANGO_INCOMPLETE_READ, "incomplete read");
REG_ERROR(ERROR_ARANGO_DATAFILE_FULL, "datafile full");
REG_ERROR(ERROR_ARANGO_EMPTY_DATADIR, "server database directory is empty");
REG_ERROR(ERROR_ARANGO_TRY_AGAIN, "operation should be tried again");

View File

@ -547,6 +547,11 @@ constexpr int TRI_ERROR_ARANGO_COLLECTION_NOT_LOADED
/// needed.
constexpr int TRI_ERROR_ARANGO_DOCUMENT_REV_BAD = 1239;
/// 1240: ERROR_ARANGO_INCOMPLETE_READ
/// "incomplete read"
/// Will be raised by the storage engine when a read cannot be completed.
constexpr int TRI_ERROR_ARANGO_INCOMPLETE_READ = 1240;
/// 1300: ERROR_ARANGO_DATAFILE_FULL
/// "datafile full"
/// Will be raised when the datafile reaches its limit.