diff --git a/CHANGELOG b/CHANGELOG index c09ac8c556..bbbd1667a6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,17 @@ v3.4.6 (XXXX-XX-XX) ------------------- +* added error code 1240 "incomplete read" for RocksDB-based reads which cannot retrieve + documents due to the RocksDB block cache being size-restricted (with size limit enforced) + and uncompressed data blocks not fitting into the block cache + + The error can only occur for collection or index scans with the RocksDB storage engine + when the RocksDB block cache is used and set to a very small size, plus its maximum size is + enforced by setting the `--rocksdb.enforce-block-cache-size-limit` option to `true`. + + Previously these incomplete reads could have been ignored silently, making collection or + index scans return less documents than there were actually present. + * fixed internal issue #3918: added optional second parameter "withId" to AQL function PREGEL_RESULT diff --git a/arangod/RocksDBEngine/RocksDBEdgeIndex.cpp b/arangod/RocksDBEngine/RocksDBEdgeIndex.cpp index 87d373cc0e..d67b3c8cd4 100644 --- a/arangod/RocksDBEngine/RocksDBEdgeIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBEdgeIndex.cpp @@ -63,6 +63,14 @@ using namespace arangodb::basics; namespace { constexpr bool EdgeIndexFillBlockCache = false; + +void checkIteratorStatus(rocksdb::Iterator const* iterator) { + auto s = iterator->status(); + if (!s.ok()) { + THROW_ARANGO_EXCEPTION(arangodb::rocksutils::convertStatus(s)); + } +} + } RocksDBEdgeIndexWarmupTask::RocksDBEdgeIndexWarmupTask( @@ -476,6 +484,9 @@ void RocksDBEdgeIndexIterator::lookupInRocksDB(StringRef fromTo) { _iterator->Next(); } _builder.close(); + + ::checkIteratorStatus(_iterator.get()); + if (cc != nullptr) { // TODO Add cache retry on next call // Now we have something in _inplaceMemory. diff --git a/arangod/RocksDBEngine/RocksDBFulltextIndex.cpp b/arangod/RocksDBEngine/RocksDBFulltextIndex.cpp index ee288f38e9..24cba2d869 100644 --- a/arangod/RocksDBEngine/RocksDBFulltextIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBFulltextIndex.cpp @@ -46,6 +46,41 @@ using namespace arangodb; +/// El Cheapo index iterator +class RocksDBFulltextIndexIterator : public IndexIterator { + public: + RocksDBFulltextIndexIterator(LogicalCollection* collection, transaction::Methods* trx, + std::set&& docs) + : IndexIterator(collection, trx), _docs(std::move(docs)), _pos(_docs.begin()) {} + + ~RocksDBFulltextIndexIterator() {} + + char const* typeName() const override { return "fulltext-index-iterator"; } + + bool next(LocalDocumentIdCallback const& cb, size_t limit) override { + TRI_ASSERT(limit > 0); + while (_pos != _docs.end() && limit > 0) { + cb(*_pos); + ++_pos; + limit--; + } + return _pos != _docs.end(); + } + + void reset() override { _pos = _docs.begin(); } + + void skip(uint64_t count, uint64_t& skipped) override { + while (_pos != _docs.end() && skipped < count) { + ++_pos; + skipped++; + } + } + + private: + std::set const _docs; + std::set::iterator _pos; +}; + RocksDBFulltextIndex::RocksDBFulltextIndex(TRI_idx_iid_t iid, arangodb::LogicalCollection& collection, arangodb::velocypack::Slice const& info) diff --git a/arangod/RocksDBEngine/RocksDBFulltextIndex.h b/arangod/RocksDBEngine/RocksDBFulltextIndex.h index 3f0738eee9..b7eb1001dc 100644 --- a/arangod/RocksDBEngine/RocksDBFulltextIndex.h +++ b/arangod/RocksDBEngine/RocksDBFulltextIndex.h @@ -126,41 +126,6 @@ class RocksDBFulltextIndex final : public RocksDBIndex { std::set& resultSet); }; -/// El Cheapo index iterator -class RocksDBFulltextIndexIterator : public IndexIterator { - public: - RocksDBFulltextIndexIterator(LogicalCollection* collection, transaction::Methods* trx, - std::set&& docs) - : IndexIterator(collection, trx), _docs(std::move(docs)), _pos(_docs.begin()) {} - - ~RocksDBFulltextIndexIterator() {} - - char const* typeName() const override { return "fulltext-index-iterator"; } - - bool next(LocalDocumentIdCallback const& cb, size_t limit) override { - TRI_ASSERT(limit > 0); - while (_pos != _docs.end() && limit > 0) { - cb(*_pos); - ++_pos; - limit--; - } - return _pos != _docs.end(); - } - - void reset() override { _pos = _docs.begin(); } - - void skip(uint64_t count, uint64_t& skipped) override { - while (_pos != _docs.end() && skipped < count) { - ++_pos; - skipped++; - } - } - - private: - std::set const _docs; - std::set::iterator _pos; -}; - } // namespace arangodb #endif diff --git a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp index 4a6cae86bb..ce7f628eb1 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndex.cpp @@ -42,6 +42,17 @@ using namespace arangodb; +namespace { + +void checkIteratorStatus(rocksdb::Iterator const* iterator) { + auto s = iterator->status(); + if (!s.ok()) { + THROW_ARANGO_EXCEPTION(arangodb::rocksutils::convertStatus(s)); + } +} + +} + template class RDBNearIterator final : public IndexIterator { public: @@ -202,6 +213,8 @@ class RDBNearIterator final : public IndexIterator { _near.reportFound(documentId, RocksDBValue::centroid(_iter->value())); _iter->Next(); } + + ::checkIteratorStatus(_iter.get()); } _near.didScanIntervals(); // calculate next bounds @@ -429,7 +442,7 @@ Result RocksDBGeoIndex::removeInternal(transaction::Methods* trx, RocksDBMethods key->constructGeoIndexValue(_objectId, cell.id(), documentId); res = mthd->Delete(RocksDBColumnFamily::geo(), key.ref()); if (res.fail()) { - return res; + break; } } return res; diff --git a/arangod/RocksDBEngine/RocksDBIterators.cpp b/arangod/RocksDBEngine/RocksDBIterators.cpp index 677826dc87..f76f1f8007 100644 --- a/arangod/RocksDBEngine/RocksDBIterators.cpp +++ b/arangod/RocksDBEngine/RocksDBIterators.cpp @@ -35,6 +35,14 @@ using namespace arangodb; namespace { constexpr bool AllIteratorFillBlockCache = true; constexpr bool AnyIteratorFillBlockCache = false; + +void checkIteratorStatus(rocksdb::Iterator const* iterator) { + auto s = iterator->status(); + if (!s.ok()) { + THROW_ARANGO_EXCEPTION(arangodb::rocksutils::convertStatus(s)); + } +} + } // namespace // ================ All Iterator ================== @@ -81,6 +89,7 @@ bool RocksDBAllIndexIterator::next(LocalDocumentIdCallback const& cb, size_t lim // No limit no data, or we are actually done. The last call should have // returned false TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken + ::checkIteratorStatus(_iterator.get()); return false; } @@ -94,6 +103,7 @@ bool RocksDBAllIndexIterator::next(LocalDocumentIdCallback const& cb, size_t lim _iterator->Next(); if (!_iterator->Valid() || outOfRange()) { + ::checkIteratorStatus(_iterator.get()); return false; } } @@ -109,15 +119,17 @@ bool RocksDBAllIndexIterator::nextDocument(IndexIterator::DocumentCallback const // No limit no data, or we are actually done. The last call should have // returned false TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken + ::checkIteratorStatus(_iterator.get()); return false; } - + while (limit > 0) { cb(RocksDBKey::documentId(_iterator->key()), VPackSlice(_iterator->value().data())); --limit; _iterator->Next(); - + if (!_iterator->Valid() || outOfRange()) { + ::checkIteratorStatus(_iterator.get()); return false; } } @@ -134,6 +146,8 @@ void RocksDBAllIndexIterator::skip(uint64_t count, uint64_t& skipped) { _iterator->Next(); } + + ::checkIteratorStatus(_iterator.get()); } void RocksDBAllIndexIterator::reset() { @@ -191,6 +205,7 @@ bool RocksDBAnyIndexIterator::next(LocalDocumentIdCallback const& cb, size_t lim // No limit no data, or we are actually done. The last call should have // returned false TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken + ::checkIteratorStatus(_iterator.get()); return false; } @@ -200,6 +215,7 @@ bool RocksDBAnyIndexIterator::next(LocalDocumentIdCallback const& cb, size_t lim _returned++; _iterator->Next(); if (!_iterator->Valid() || outOfRange()) { + ::checkIteratorStatus(_iterator.get()); if (_returned < _total) { _iterator->Seek(_bounds.start()); continue; @@ -218,6 +234,7 @@ bool RocksDBAnyIndexIterator::nextDocument(IndexIterator::DocumentCallback const // No limit no data, or we are actually done. The last call should have // returned false TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken + ::checkIteratorStatus(_iterator.get()); return false; } @@ -227,6 +244,7 @@ bool RocksDBAnyIndexIterator::nextDocument(IndexIterator::DocumentCallback const _returned++; _iterator->Next(); if (!_iterator->Valid() || outOfRange()) { + ::checkIteratorStatus(_iterator.get()); if (_returned < _total) { _iterator->Seek(_bounds.start()); continue; @@ -335,6 +353,9 @@ bool RocksDBGenericIterator::next(GenericCallback const& cb, size_t limit) { if (limit == 0) { // No limit no data, or we are actually done. The last call should have // returned false + if (!_iterator->Valid()) { + ::checkIteratorStatus(_iterator.get()); + } return false; } @@ -353,6 +374,10 @@ bool RocksDBGenericIterator::next(GenericCallback const& cb, size_t limit) { } else { _iterator->Next(); } + + if (!_iterator->Valid()) { + ::checkIteratorStatus(_iterator.get()); + } } return hasMore(); diff --git a/arangod/RocksDBEngine/RocksDBVPackIndex.cpp b/arangod/RocksDBEngine/RocksDBVPackIndex.cpp index 3c0998f2e7..326dd43506 100644 --- a/arangod/RocksDBEngine/RocksDBVPackIndex.cpp +++ b/arangod/RocksDBEngine/RocksDBVPackIndex.cpp @@ -55,10 +55,19 @@ using namespace arangodb; +namespace { /// @brief the _key attribute, which, when used in an index, will implictly make /// it unique -static std::vector const KeyAttribute{ - arangodb::basics::AttributeName("_key", false)}; +std::vector const KeyAttribute{arangodb::basics::AttributeName("_key", false)}; + +void checkIteratorStatus(rocksdb::Iterator const* iterator) { + auto s = iterator->status(); + if (!s.ok()) { + THROW_ARANGO_EXCEPTION(arangodb::rocksutils::convertStatus(s)); + } +} + +} // ............................................................................. // recall for all of the following comparison functions: @@ -199,6 +208,7 @@ bool RocksDBVPackIndexIterator::next(LocalDocumentIdCallback const& cb, size_t l // No limit no data, or we are actually done. The last call should have // returned false TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken + ::checkIteratorStatus(_iterator.get()); return false; } @@ -216,6 +226,7 @@ bool RocksDBVPackIndexIterator::next(LocalDocumentIdCallback const& cb, size_t l } if (!_iterator->Valid() || outOfRange()) { + ::checkIteratorStatus(_iterator.get()); return false; } } @@ -230,6 +241,7 @@ bool RocksDBVPackIndexIterator::nextCovering(DocumentCallback const& cb, size_t // No limit no data, or we are actually done. The last call should have // returned false TRI_ASSERT(limit > 0); // Someone called with limit == 0. Api broken + ::checkIteratorStatus(_iterator.get()); return false; } @@ -249,6 +261,7 @@ bool RocksDBVPackIndexIterator::nextCovering(DocumentCallback const& cb, size_t } if (!_iterator->Valid() || outOfRange()) { + ::checkIteratorStatus(_iterator.get()); return false; } } @@ -260,6 +273,7 @@ void RocksDBVPackIndexIterator::skip(uint64_t count, uint64_t& skipped) { TRI_ASSERT(_trx->state()->isRunning()); if (!_iterator->Valid() || outOfRange()) { + ::checkIteratorStatus(_iterator.get()); return; } @@ -275,6 +289,7 @@ void RocksDBVPackIndexIterator::skip(uint64_t count, uint64_t& skipped) { } if (!_iterator->Valid() || outOfRange()) { + ::checkIteratorStatus(_iterator.get()); return; } } @@ -352,7 +367,7 @@ bool RocksDBVPackIndex::implicitlyUnique() const { for (auto const& it : _fields) { // if _key is contained in the index fields definition, then the index is // implicitly unique - if (it == KeyAttribute) { + if (it == ::KeyAttribute) { return true; } } diff --git a/js/common/bootstrap/errors.js b/js/common/bootstrap/errors.js index 0bc6683da9..dbb588dadc 100644 --- a/js/common/bootstrap/errors.js +++ b/js/common/bootstrap/errors.js @@ -111,6 +111,7 @@ "ERROR_ARANGO_COLLECTION_TYPE_MISMATCH" : { "code" : 1237, "message" : "collection type mismatch" }, "ERROR_ARANGO_COLLECTION_NOT_LOADED" : { "code" : 1238, "message" : "collection not loaded" }, "ERROR_ARANGO_DOCUMENT_REV_BAD" : { "code" : 1239, "message" : "illegal document revision" }, + "ERROR_ARANGO_INCOMPLETE_READ" : { "code" : 1240, "message" : "incomplete read" }, "ERROR_ARANGO_DATAFILE_FULL" : { "code" : 1300, "message" : "datafile full" }, "ERROR_ARANGO_EMPTY_DATADIR" : { "code" : 1301, "message" : "server database directory is empty" }, "ERROR_ARANGO_TRY_AGAIN" : { "code" : 1302, "message" : "operation should be tried again" }, diff --git a/lib/Basics/RocksDBUtils.cpp b/lib/Basics/RocksDBUtils.cpp index 75839b340d..c39c0db4e2 100644 --- a/lib/Basics/RocksDBUtils.cpp +++ b/lib/Basics/RocksDBUtils.cpp @@ -116,8 +116,8 @@ arangodb::Result convertStatus(rocksdb::Status const& status, StatusHint hint, case rocksdb::Status::Code::kMergeInProgress: return {TRI_ERROR_ARANGO_MERGE_IN_PROGRESS, std::move(message)}; case rocksdb::Status::Code::kIncomplete: - return {TRI_ERROR_INTERNAL, - prefix + "'incomplete' error in storage engine" + postfix}; + return {TRI_ERROR_ARANGO_INCOMPLETE_READ, + prefix + "'incomplete' error in storage engine " + postfix}; case rocksdb::Status::Code::kShutdownInProgress: return {TRI_ERROR_SHUTTING_DOWN, std::move(message)}; case rocksdb::Status::Code::kTimedOut: @@ -126,7 +126,7 @@ arangodb::Result convertStatus(rocksdb::Status const& status, StatusHint hint, } if (status.subcode() == rocksdb::Status::SubCode::kLockTimeout) { return {TRI_ERROR_ARANGO_CONFLICT, - prefix + "timeout waiting to lock key" + postfix}; + prefix + "timeout waiting to lock key " + postfix}; } return {TRI_ERROR_LOCK_TIMEOUT, std::move(message)}; case rocksdb::Status::Code::kAborted: diff --git a/lib/Basics/errors.dat b/lib/Basics/errors.dat index d3f91c296f..76b00d5d41 100755 --- a/lib/Basics/errors.dat +++ b/lib/Basics/errors.dat @@ -133,6 +133,7 @@ ERROR_ARANGO_WRITE_THROTTLE_TIMEOUT,1236,"write-throttling timeout","Will be rai ERROR_ARANGO_COLLECTION_TYPE_MISMATCH,1237,"collection type mismatch","Will be raised when a collection has a different type from what has been expected." ERROR_ARANGO_COLLECTION_NOT_LOADED,1238,"collection not loaded","Will be raised when a collection is accessed that is not yet loaded." ERROR_ARANGO_DOCUMENT_REV_BAD,1239,"illegal document revision","Will be raised when a document revision is corrupt or is missing where needed." +ERROR_ARANGO_INCOMPLETE_READ,1240,"incomplete read","Will be raised by the storage engine when a read cannot be completed." ################################################################################ ## Checked ArangoDB storage errors diff --git a/lib/Basics/voc-errors.cpp b/lib/Basics/voc-errors.cpp index 834773e8d6..3ddef0e240 100644 --- a/lib/Basics/voc-errors.cpp +++ b/lib/Basics/voc-errors.cpp @@ -110,6 +110,7 @@ void TRI_InitializeErrorMessages() { REG_ERROR(ERROR_ARANGO_COLLECTION_TYPE_MISMATCH, "collection type mismatch"); REG_ERROR(ERROR_ARANGO_COLLECTION_NOT_LOADED, "collection not loaded"); REG_ERROR(ERROR_ARANGO_DOCUMENT_REV_BAD, "illegal document revision"); + REG_ERROR(ERROR_ARANGO_INCOMPLETE_READ, "incomplete read"); REG_ERROR(ERROR_ARANGO_DATAFILE_FULL, "datafile full"); REG_ERROR(ERROR_ARANGO_EMPTY_DATADIR, "server database directory is empty"); REG_ERROR(ERROR_ARANGO_TRY_AGAIN, "operation should be tried again"); diff --git a/lib/Basics/voc-errors.h b/lib/Basics/voc-errors.h index 21ad260c1c..18d11c518a 100644 --- a/lib/Basics/voc-errors.h +++ b/lib/Basics/voc-errors.h @@ -547,6 +547,11 @@ constexpr int TRI_ERROR_ARANGO_COLLECTION_NOT_LOADED /// needed. constexpr int TRI_ERROR_ARANGO_DOCUMENT_REV_BAD = 1239; +/// 1240: ERROR_ARANGO_INCOMPLETE_READ +/// "incomplete read" +/// Will be raised by the storage engine when a read cannot be completed. +constexpr int TRI_ERROR_ARANGO_INCOMPLETE_READ = 1240; + /// 1300: ERROR_ARANGO_DATAFILE_FULL /// "datafile full" /// Will be raised when the datafile reaches its limit.