From 5f65a9ed4fb38ce644039946de9de5e3680a7e97 Mon Sep 17 00:00:00 2001 From: jsteemann Date: Fri, 4 Nov 2016 23:17:01 +0100 Subject: [PATCH] allow more control over handling of pre-3.1 _rev values this changes the server startup option `--database.check-30-revisions` from a boolean (true/false) parameter to a string parameter with the following possible values: - "fail": will validate _rev values of 3.0 collections on collection loading and throw an exception when invalid _rev values are found. in this case collections with invalid _rev values are marked as corrupted and cannot be used in the ArangoDB 3.1 instance. the fix procedure for such collections is to export the collections from 3.0 database with arangodump and restore them in 3.1 with arangorestore. collections that do not contain invalid _rev values are marked as ok and will not be re-checked on following loads. collections that contain invalid _rev values will be re-checked on following loads. - "true": will validate _rev values of 3.0 collections on collection loading and print a warning when invalid _rev values are found. in this case collections with invalid _rev values can be used in the ArangoDB 3.1 instance. however, subsequent operations on documents with invalid _rev values may silently fail or fail with explicit errors. the fix procedure for such collections is to export the collections from 3.0 database with arangodump and restore them in 3.1 with arangorestore. collections that do not contain invalid _rev values are marked as ok and will not be re-checked on following loads. collections that contain invalid _rev values will be re-checked on following loads. - "false": will not validate _rev values on collection loading and not print warnings. no hint is given when invalid _rev values are found. subsequent operations on documents with invalid _rev values may silently fail or fail with explicit errors. this setting does not affect whether collections are re-checked later. collections will be re-checked on following loads if `--database.check-30-revisions` is later set to either `true` or `fail`. The change also suppresses warnings that were printed when collections were restored using arangorestore, and the restore data contained invalid _rev values. Now these warnings are suppressed, and new HLC _rev values are generated for these documents as before. --- arangod/Cluster/ClusterMethods.cpp | 2 +- .../RestHandler/RestReplicationHandler.cpp | 13 +++++------ .../RestHandler/RestVocbaseBaseHandler.cpp | 4 ++-- arangod/RestServer/DatabaseFeature.cpp | 5 +++-- arangod/RestServer/DatabaseFeature.h | 5 +++-- arangod/StorageEngine/MMFilesCollection.cpp | 8 +++++-- arangod/Utils/Transaction.cpp | 8 +++---- arangod/V8Server/v8-util.cpp | 2 +- arangod/VocBase/LogicalCollection.cpp | 20 +++++++++-------- arangod/VocBase/LogicalCollection.h | 3 +++ arangod/VocBase/transaction.cpp | 4 +--- arangod/VocBase/voc-types.h | 10 ++++----- arangod/VocBase/vocbase.cpp | 22 +++++++++---------- arangosh/Restore/RestoreFeature.cpp | 2 +- 14 files changed, 57 insertions(+), 51 deletions(-) diff --git a/arangod/Cluster/ClusterMethods.cpp b/arangod/Cluster/ClusterMethods.cpp index 8dd36c84ad..e5970d8a09 100644 --- a/arangod/Cluster/ClusterMethods.cpp +++ b/arangod/Cluster/ClusterMethods.cpp @@ -601,7 +601,7 @@ int revisionOnCoordinator(std::string const& dbname, if (r.isString()) { VPackValueLength len; char const* p = r.getString(len); - TRI_voc_rid_t cmp = TRI_StringToRid(p, len); + TRI_voc_rid_t cmp = TRI_StringToRid(p, len, false); if (cmp > rid) { // get the maximum value diff --git a/arangod/RestHandler/RestReplicationHandler.cpp b/arangod/RestHandler/RestReplicationHandler.cpp index 0c7fc11581..7e2e16199d 100644 --- a/arangod/RestHandler/RestReplicationHandler.cpp +++ b/arangod/RestHandler/RestReplicationHandler.cpp @@ -2031,7 +2031,7 @@ int RestReplicationHandler::applyCollectionDumpMarker( static int restoreDataParser(char const* ptr, char const* pos, std::string const& invalidMsg, bool useRevision, std::string& errorMsg, std::string& key, - std::string& rev, VPackBuilder& builder, + VPackBuilder& builder, VPackSlice& doc, TRI_replication_operation_e& type) { builder.clear(); @@ -2090,8 +2090,6 @@ static int restoreDataParser(char const* ptr, char const* pos, if (doc.hasKey(StaticStrings::KeyString)) { key = doc.get(StaticStrings::KeyString).copyString(); - } else if (useRevision && doc.hasKey(StaticStrings::RevString)) { - rev = doc.get(StaticStrings::RevString).copyString(); } } } @@ -2150,6 +2148,7 @@ int RestReplicationHandler::processRestoreDataBatch( { VPackArrayBuilder guard(&allMarkers); + std::string key; while (ptr < end) { char const* pos = strchr(ptr, '\n'); @@ -2161,13 +2160,12 @@ int RestReplicationHandler::processRestoreDataBatch( if (pos - ptr > 1) { // found something - std::string key; - std::string rev; + key.clear(); VPackSlice doc; TRI_replication_operation_e type = REPLICATION_INVALID; int res = restoreDataParser(ptr, pos, invalidMsg, useRevision, errorMsg, - key, rev, builder, doc, type); + key, builder, doc, type); if (res != TRI_ERROR_NO_ERROR) { return res; } @@ -2492,11 +2490,10 @@ void RestReplicationHandler::handleCommandRestoreDataCoordinator() { // found something // std::string key; - std::string rev; VPackSlice doc; TRI_replication_operation_e type = REPLICATION_INVALID; - res = restoreDataParser(ptr, pos, invalidMsg, false, errorMsg, key, rev, + res = restoreDataParser(ptr, pos, invalidMsg, false, errorMsg, key, builder, doc, type); if (res != TRI_ERROR_NO_ERROR) { // We might need to clean up buffers diff --git a/arangod/RestHandler/RestVocbaseBaseHandler.cpp b/arangod/RestHandler/RestVocbaseBaseHandler.cpp index 566cfb9c2e..cc0b850741 100644 --- a/arangod/RestHandler/RestVocbaseBaseHandler.cpp +++ b/arangod/RestHandler/RestVocbaseBaseHandler.cpp @@ -597,7 +597,7 @@ TRI_voc_rid_t RestVocbaseBaseHandler::extractRevision(char const* header, TRI_voc_rid_t rid = 0; bool isOld; - rid = TRI_StringToRidWithCheck(s, e - s, isOld); + rid = TRI_StringToRidWithCheck(s, e - s, isOld, false); isValid = (rid != 0); return rid; @@ -610,7 +610,7 @@ TRI_voc_rid_t RestVocbaseBaseHandler::extractRevision(char const* header, TRI_voc_rid_t rid = 0; bool isOld; - rid = TRI_StringToRidWithCheck(etag2, isOld); + rid = TRI_StringToRidWithCheck(etag2, isOld, false); isValid = (rid != 0); return rid; diff --git a/arangod/RestServer/DatabaseFeature.cpp b/arangod/RestServer/DatabaseFeature.cpp index c422f35c12..968be26d58 100644 --- a/arangod/RestServer/DatabaseFeature.cpp +++ b/arangod/RestServer/DatabaseFeature.cpp @@ -217,7 +217,7 @@ DatabaseFeature::DatabaseFeature(ApplicationServer* server) _defaultWaitForSync(false), _forceSyncProperties(true), _ignoreDatafileErrors(false), - _check30Revisions(true), + _check30Revisions("true"), _throwCollectionNotLoadedError(false), _vocbase(nullptr), _databasesLists(new DatabasesLists()), @@ -281,7 +281,8 @@ void DatabaseFeature::collectOptions(std::shared_ptr options) { options->addHiddenOption("--database.check-30-revisions", "check _rev values in collections created before 3.1", - new BooleanParameter(&_check30Revisions)); + new DiscreteValuesParameter(&_check30Revisions, + std::unordered_set{ "true", "false", "fail" })); } void DatabaseFeature::validateOptions(std::shared_ptr options) { diff --git a/arangod/RestServer/DatabaseFeature.h b/arangod/RestServer/DatabaseFeature.h index 15e86e9c49..001672d4ff 100644 --- a/arangod/RestServer/DatabaseFeature.h +++ b/arangod/RestServer/DatabaseFeature.h @@ -118,7 +118,8 @@ class DatabaseFeature final : public application_features::ApplicationFeature { void enableUpgrade() { _upgrade = true; } bool throwCollectionNotLoadedError() const { return _throwCollectionNotLoadedError.load(std::memory_order_relaxed); } void throwCollectionNotLoadedError(bool value) { _throwCollectionNotLoadedError.store(value); } - bool check30Revisions() const { return _check30Revisions; } + bool check30Revisions() const { return _check30Revisions == "true" || _check30Revisions == "fail"; } + bool fail30Revisions() const { return _check30Revisions == "fail"; } void isInitiallyEmpty(bool value) { _isInitiallyEmpty = value; } private: @@ -156,7 +157,7 @@ class DatabaseFeature final : public application_features::ApplicationFeature { bool _defaultWaitForSync; bool _forceSyncProperties; bool _ignoreDatafileErrors; - bool _check30Revisions; + std::string _check30Revisions; std::atomic _throwCollectionNotLoadedError; TRI_vocbase_t* _vocbase; diff --git a/arangod/StorageEngine/MMFilesCollection.cpp b/arangod/StorageEngine/MMFilesCollection.cpp index 875c53b8df..68909e179e 100644 --- a/arangod/StorageEngine/MMFilesCollection.cpp +++ b/arangod/StorageEngine/MMFilesCollection.cpp @@ -1089,9 +1089,13 @@ int MMFilesCollection::iterateMarkersOnLoad(arangodb::Transaction* trx) { _lastRevision >= static_cast(2016 - 1970) * 1000 * 60 * 60 * 24 * 365 && application_features::ApplicationServer::server->getFeature("Database")->check30Revisions()) { // a collection from 3.0 or earlier with a _rev value that is higher than we can handle safely - LOG(WARN) << "collection '" << _logicalCollection->name() << "' contains _rev values that are higher than expected for an ArangoDB 3.0 database. If this collection was created or used with a pre-release or development version of ArangoDB 3.1, please restart the server with option '--database.check-30-revisions false' to suppress this warning."; - } + _logicalCollection->setRevisionError(); + LOG(WARN) << "collection '" << _logicalCollection->name() << "' contains _rev values that are higher than expected for an ArangoDB 3.1 database. If this collection was created or used with a pre-release or development version of ArangoDB 3.1, please restart the server with option '--database.check-30-revisions false' to suppress this warning. If this collection was created with an ArangoDB 3.0, please dump the 3.0 database with arangodump and restore it in 3.1 with arangorestore."; + if (application_features::ApplicationServer::server->getFeature("Database")->fail30Revisions()) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE, std::string("collection '") + _logicalCollection->name() + "' contains _rev values from 3.0 and needs to be migrated using dump/restore"); + } + } // update the real statistics for the collection try { diff --git a/arangod/Utils/Transaction.cpp b/arangod/Utils/Transaction.cpp index a99740ffa0..ffe4686816 100644 --- a/arangod/Utils/Transaction.cpp +++ b/arangod/Utils/Transaction.cpp @@ -996,7 +996,7 @@ void Transaction::extractKeyAndRevFromDocument(VPackSlice slice, if (revSlice.isString()) { VPackValueLength l; char const* p = revSlice.getString(l); - revisionId = TRI_StringToRid(p, l); + revisionId = TRI_StringToRid(p, l, false); } else if (revSlice.isNumber()) { revisionId = revSlice.getNumericValue(); } @@ -1016,7 +1016,7 @@ void Transaction::extractKeyAndRevFromDocument(VPackSlice slice, keySlice = slice.get(StaticStrings::KeyString); VPackValueLength l; char const* p = slice.get(StaticStrings::RevString).getString(l); - revisionId = TRI_StringToRid(p, l); + revisionId = TRI_StringToRid(p, l, false); } } @@ -1037,7 +1037,7 @@ TRI_voc_rid_t Transaction::extractRevFromDocument(VPackSlice slice) { if (revSlice.isString()) { VPackValueLength l; char const* p = revSlice.getString(l); - return TRI_StringToRid(p, l); + return TRI_StringToRid(p, l, false); } else if (revSlice.isNumber()) { return revSlice.getNumericValue(); } @@ -1054,7 +1054,7 @@ TRI_voc_rid_t Transaction::extractRevFromDocument(VPackSlice slice) { { VPackValueLength l; char const* p = slice.get(StaticStrings::RevString).getString(l); - return TRI_StringToRid(p, l); + return TRI_StringToRid(p, l, false); } } diff --git a/arangod/V8Server/v8-util.cpp b/arangod/V8Server/v8-util.cpp index 298c56171f..63814d40a5 100644 --- a/arangod/V8Server/v8-util.cpp +++ b/arangod/V8Server/v8-util.cpp @@ -172,7 +172,7 @@ bool ExtractDocumentHandle(v8::Isolate* isolate, } v8::String::Utf8Value str(revObj); bool isOld; - uint64_t rid = TRI_StringToRidWithCheck(*str, str.length(), isOld); + uint64_t rid = TRI_StringToRidWithCheck(*str, str.length(), isOld, false); if (rid == 0) { return false; diff --git a/arangod/VocBase/LogicalCollection.cpp b/arangod/VocBase/LogicalCollection.cpp index 82ae82d271..5db5a3eb57 100644 --- a/arangod/VocBase/LogicalCollection.cpp +++ b/arangod/VocBase/LogicalCollection.cpp @@ -331,10 +331,9 @@ LogicalCollection::LogicalCollection( _nextCompactionStartIndex(0), _lastCompactionStatus(nullptr), _lastCompactionStamp(0.0), - _uncollectedLogfileEntries(0) { + _uncollectedLogfileEntries(0), + _revisionError(false) { _keyGenerator.reset(KeyGenerator::factory(other.keyOptions())); - - // TODO Only DBServer? Is this correct? if (ServerState::instance()->isDBServer()) { @@ -397,7 +396,8 @@ LogicalCollection::LogicalCollection(TRI_vocbase_t* vocbase, _nextCompactionStartIndex(0), _lastCompactionStatus(nullptr), _lastCompactionStamp(0.0), - _uncollectedLogfileEntries(0) { + _uncollectedLogfileEntries(0), + _revisionError(false) { if (!IsAllowedName(info)) { THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_ILLEGAL_NAME); @@ -1317,7 +1317,9 @@ void LogicalCollection::open(bool ignoreErrors) { << _name << " }"; // successfully opened collection. now adjust version number - if (_version != VERSION_31) { + if (_version != VERSION_31 && + !_revisionError && + application_features::ApplicationServer::server->getFeature("Database")->check30Revisions()) { _version = VERSION_31; bool const doSync = application_features::ApplicationServer::getFeature( @@ -2010,7 +2012,7 @@ int LogicalCollection::update(Transaction* trx, VPackSlice const newSlice, bool isOld; VPackValueLength l; char const* p = oldRev.getString(l); - revisionId = TRI_StringToRid(p, l, isOld); + revisionId = TRI_StringToRid(p, l, isOld, false); if (isOld) { // Do not tolerate old revision IDs revisionId = TRI_HybridLogicalClock(); @@ -2170,7 +2172,7 @@ int LogicalCollection::replace(Transaction* trx, VPackSlice const newSlice, bool isOld; VPackValueLength l; char const* p = oldRev.getString(l); - revisionId = TRI_StringToRid(p, l, isOld); + revisionId = TRI_StringToRid(p, l, isOld, false); if (isOld) { // Do not tolerate old revision ticks: revisionId = TRI_HybridLogicalClock(); @@ -2298,7 +2300,7 @@ int LogicalCollection::remove(arangodb::Transaction* trx, bool isOld; VPackValueLength l; char const* p = oldRev.getString(l); - revisionId = TRI_StringToRid(p, l, isOld); + revisionId = TRI_StringToRid(p, l, isOld, false); if (isOld) { // Do not tolerate old revisions revisionId = TRI_HybridLogicalClock(); @@ -3296,7 +3298,7 @@ int LogicalCollection::newObjectForInsert( bool isOld; VPackValueLength l; char const* p = oldRev.getString(l); - TRI_voc_rid_t oldRevision = TRI_StringToRid(p, l, isOld); + TRI_voc_rid_t oldRevision = TRI_StringToRid(p, l, isOld, false); if (isOld) { oldRevision = TRI_HybridLogicalClock(); } diff --git a/arangod/VocBase/LogicalCollection.h b/arangod/VocBase/LogicalCollection.h index f80496de0f..e831c6a9e4 100644 --- a/arangod/VocBase/LogicalCollection.h +++ b/arangod/VocBase/LogicalCollection.h @@ -126,6 +126,8 @@ class LogicalCollection { void setCompactionStatus(char const*); double lastCompactionStamp() const { return _lastCompactionStamp; } void lastCompactionStamp(double value) { _lastCompactionStamp = value; } + + void setRevisionError() { _revisionError = true; } // SECTION: Meta Information @@ -573,6 +575,7 @@ class LogicalCollection { double _lastCompactionStamp; std::atomic _uncollectedLogfileEntries; + bool _revisionError; }; } // namespace arangodb diff --git a/arangod/VocBase/transaction.cpp b/arangod/VocBase/transaction.cpp index a31e2eeb8f..3b50fca3bd 100644 --- a/arangod/VocBase/transaction.cpp +++ b/arangod/VocBase/transaction.cpp @@ -1072,9 +1072,7 @@ int TRI_AddOperationTransaction(TRI_transaction_t* trx, { VPackSlice s(static_cast(marker->vpack())); - VPackValueLength l; - char const* p = s.get(StaticStrings::RevString).getString(l); - TRI_voc_rid_t rid = TRI_StringToRid(p, l); + TRI_voc_rid_t rid = Transaction::extractRevFromDocument(s); collection->setRevision(rid, false); } diff --git a/arangod/VocBase/voc-types.h b/arangod/VocBase/voc-types.h index 85c986f21c..851b1ab3e2 100644 --- a/arangod/VocBase/voc-types.h +++ b/arangod/VocBase/voc-types.h @@ -63,19 +63,19 @@ typedef uint64_t TRI_server_id_t; std::string TRI_RidToString(TRI_voc_rid_t rid); /// @brief Convert a string into a revision ID, no check variant -TRI_voc_rid_t TRI_StringToRid(std::string const& ridStr, bool& isOld); +TRI_voc_rid_t TRI_StringToRid(std::string const& ridStr, bool& isOld, bool warn); /// @brief Convert a string into a revision ID, no check variant -TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len); +TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool warn); /// @brief Convert a string into a revision ID, no check variant -TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld); +TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld, bool warn); /// @brief Convert a string into a revision ID, returns 0 if format invalid -TRI_voc_rid_t TRI_StringToRidWithCheck(std::string const& ridStr, bool& isOld); +TRI_voc_rid_t TRI_StringToRidWithCheck(std::string const& ridStr, bool& isOld, bool warn); /// @brief Convert a string into a revision ID, returns 0 if format invalid -TRI_voc_rid_t TRI_StringToRidWithCheck(char const* p, size_t len, bool& isOld); +TRI_voc_rid_t TRI_StringToRidWithCheck(char const* p, size_t len, bool& isOld, bool warn); /// @brief enum for write operations enum TRI_voc_document_operation_e : uint8_t { diff --git a/arangod/VocBase/vocbase.cpp b/arangod/VocBase/vocbase.cpp index 6f1b24a2a2..6ba1e9964d 100644 --- a/arangod/VocBase/vocbase.cpp +++ b/arangod/VocBase/vocbase.cpp @@ -1246,7 +1246,7 @@ TRI_voc_rid_t TRI_ExtractRevisionId(VPackSlice slice) { if (r.isString()) { VPackValueLength l; char const* p = r.getString(l); - return TRI_StringToRid(p, l); + return TRI_StringToRid(p, l, false); } if (r.isInteger()) { return r.getNumber(); @@ -1312,18 +1312,18 @@ std::string TRI_RidToString(TRI_voc_rid_t rid) { } /// @brief Convert a string into a revision ID, no check variant -TRI_voc_rid_t TRI_StringToRid(std::string const& ridStr, bool& isOld) { - return TRI_StringToRid(ridStr.c_str(), ridStr.size(), isOld); +TRI_voc_rid_t TRI_StringToRid(std::string const& ridStr, bool& isOld, bool warn) { + return TRI_StringToRid(ridStr.c_str(), ridStr.size(), isOld, warn); } /// @brief Convert a string into a revision ID, no check variant -TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len) { +TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool warn) { bool isOld; - return TRI_StringToRid(p, len, isOld); + return TRI_StringToRid(p, len, isOld, warn); } /// @brief Convert a string into a revision ID, no check variant -TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld) { +TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld, bool warn) { if (len > 0 && *p >= '1' && *p <= '9') { // Remove this case before the year 3887 AD because then it will // start to clash with encoded timestamps: @@ -1336,7 +1336,7 @@ TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld) { } r *= 10; } while (true); - if (r > tickLimit) { + if (warn && r > tickLimit) { // An old tick value that could be confused with a time stamp LOG(WARN) << "Saw old _rev value that could be confused with a time stamp!"; @@ -1349,17 +1349,17 @@ TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld) { } /// @brief Convert a string into a revision ID, returns 0 if format invalid -TRI_voc_rid_t TRI_StringToRidWithCheck(std::string const& ridStr, bool& isOld) { - return TRI_StringToRidWithCheck(ridStr.c_str(), ridStr.size(), isOld); +TRI_voc_rid_t TRI_StringToRidWithCheck(std::string const& ridStr, bool& isOld, bool warn) { + return TRI_StringToRidWithCheck(ridStr.c_str(), ridStr.size(), isOld, warn); } /// @brief Convert a string into a revision ID, returns 0 if format invalid -TRI_voc_rid_t TRI_StringToRidWithCheck(char const* p, size_t len, bool& isOld) { +TRI_voc_rid_t TRI_StringToRidWithCheck(char const* p, size_t len, bool& isOld, bool warn) { if (len > 0 && *p >= '1' && *p <= '9') { // Remove this case before the year 3887 AD because then it will // start to clash with encoded timestamps: TRI_voc_rid_t r = arangodb::basics::StringUtils::uint64_check(p, len); - if (r > tickLimit) { + if (warn && r > tickLimit) { // An old tick value that could be confused with a time stamp LOG(WARN) << "Saw old _rev value that could be confused with a time stamp!"; diff --git a/arangosh/Restore/RestoreFeature.cpp b/arangosh/Restore/RestoreFeature.cpp index bd189fc05e..6f7b43d2d4 100644 --- a/arangosh/Restore/RestoreFeature.cpp +++ b/arangosh/Restore/RestoreFeature.cpp @@ -113,7 +113,7 @@ void RestoreFeature::collectOptions( new BooleanParameter(&_overwrite)); options->addOption("--recycle-ids", - "recycle collection and revision ids from dump", + "recycle collection ids from dump", new BooleanParameter(&_recycleIds)); options->addOption("--default-number-of-shards",