mirror of https://gitee.com/bigwinds/arangodb
allow more control over handling of pre-3.1 _rev values
this changes the server startup option `--database.check-30-revisions` from a boolean (true/false) parameter to a string parameter with the following possible values: - "fail": will validate _rev values of 3.0 collections on collection loading and throw an exception when invalid _rev values are found. in this case collections with invalid _rev values are marked as corrupted and cannot be used in the ArangoDB 3.1 instance. the fix procedure for such collections is to export the collections from 3.0 database with arangodump and restore them in 3.1 with arangorestore. collections that do not contain invalid _rev values are marked as ok and will not be re-checked on following loads. collections that contain invalid _rev values will be re-checked on following loads. - "true": will validate _rev values of 3.0 collections on collection loading and print a warning when invalid _rev values are found. in this case collections with invalid _rev values can be used in the ArangoDB 3.1 instance. however, subsequent operations on documents with invalid _rev values may silently fail or fail with explicit errors. the fix procedure for such collections is to export the collections from 3.0 database with arangodump and restore them in 3.1 with arangorestore. collections that do not contain invalid _rev values are marked as ok and will not be re-checked on following loads. collections that contain invalid _rev values will be re-checked on following loads. - "false": will not validate _rev values on collection loading and not print warnings. no hint is given when invalid _rev values are found. subsequent operations on documents with invalid _rev values may silently fail or fail with explicit errors. this setting does not affect whether collections are re-checked later. collections will be re-checked on following loads if `--database.check-30-revisions` is later set to either `true` or `fail`. The change also suppresses warnings that were printed when collections were restored using arangorestore, and the restore data contained invalid _rev values. Now these warnings are suppressed, and new HLC _rev values are generated for these documents as before.
This commit is contained in:
parent
ccb251e1f5
commit
5f65a9ed4f
|
@ -601,7 +601,7 @@ int revisionOnCoordinator(std::string const& dbname,
|
|||
if (r.isString()) {
|
||||
VPackValueLength len;
|
||||
char const* p = r.getString(len);
|
||||
TRI_voc_rid_t cmp = TRI_StringToRid(p, len);
|
||||
TRI_voc_rid_t cmp = TRI_StringToRid(p, len, false);
|
||||
|
||||
if (cmp > rid) {
|
||||
// get the maximum value
|
||||
|
|
|
@ -2031,7 +2031,7 @@ int RestReplicationHandler::applyCollectionDumpMarker(
|
|||
static int restoreDataParser(char const* ptr, char const* pos,
|
||||
std::string const& invalidMsg, bool useRevision,
|
||||
std::string& errorMsg, std::string& key,
|
||||
std::string& rev, VPackBuilder& builder,
|
||||
VPackBuilder& builder,
|
||||
VPackSlice& doc,
|
||||
TRI_replication_operation_e& type) {
|
||||
builder.clear();
|
||||
|
@ -2090,8 +2090,6 @@ static int restoreDataParser(char const* ptr, char const* pos,
|
|||
|
||||
if (doc.hasKey(StaticStrings::KeyString)) {
|
||||
key = doc.get(StaticStrings::KeyString).copyString();
|
||||
} else if (useRevision && doc.hasKey(StaticStrings::RevString)) {
|
||||
rev = doc.get(StaticStrings::RevString).copyString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2150,6 +2148,7 @@ int RestReplicationHandler::processRestoreDataBatch(
|
|||
|
||||
{
|
||||
VPackArrayBuilder guard(&allMarkers);
|
||||
std::string key;
|
||||
while (ptr < end) {
|
||||
char const* pos = strchr(ptr, '\n');
|
||||
|
||||
|
@ -2161,13 +2160,12 @@ int RestReplicationHandler::processRestoreDataBatch(
|
|||
|
||||
if (pos - ptr > 1) {
|
||||
// found something
|
||||
std::string key;
|
||||
std::string rev;
|
||||
key.clear();
|
||||
VPackSlice doc;
|
||||
TRI_replication_operation_e type = REPLICATION_INVALID;
|
||||
|
||||
int res = restoreDataParser(ptr, pos, invalidMsg, useRevision, errorMsg,
|
||||
key, rev, builder, doc, type);
|
||||
key, builder, doc, type);
|
||||
if (res != TRI_ERROR_NO_ERROR) {
|
||||
return res;
|
||||
}
|
||||
|
@ -2492,11 +2490,10 @@ void RestReplicationHandler::handleCommandRestoreDataCoordinator() {
|
|||
// found something
|
||||
//
|
||||
std::string key;
|
||||
std::string rev;
|
||||
VPackSlice doc;
|
||||
TRI_replication_operation_e type = REPLICATION_INVALID;
|
||||
|
||||
res = restoreDataParser(ptr, pos, invalidMsg, false, errorMsg, key, rev,
|
||||
res = restoreDataParser(ptr, pos, invalidMsg, false, errorMsg, key,
|
||||
builder, doc, type);
|
||||
if (res != TRI_ERROR_NO_ERROR) {
|
||||
// We might need to clean up buffers
|
||||
|
|
|
@ -597,7 +597,7 @@ TRI_voc_rid_t RestVocbaseBaseHandler::extractRevision(char const* header,
|
|||
TRI_voc_rid_t rid = 0;
|
||||
|
||||
bool isOld;
|
||||
rid = TRI_StringToRidWithCheck(s, e - s, isOld);
|
||||
rid = TRI_StringToRidWithCheck(s, e - s, isOld, false);
|
||||
isValid = (rid != 0);
|
||||
|
||||
return rid;
|
||||
|
@ -610,7 +610,7 @@ TRI_voc_rid_t RestVocbaseBaseHandler::extractRevision(char const* header,
|
|||
TRI_voc_rid_t rid = 0;
|
||||
|
||||
bool isOld;
|
||||
rid = TRI_StringToRidWithCheck(etag2, isOld);
|
||||
rid = TRI_StringToRidWithCheck(etag2, isOld, false);
|
||||
isValid = (rid != 0);
|
||||
|
||||
return rid;
|
||||
|
|
|
@ -217,7 +217,7 @@ DatabaseFeature::DatabaseFeature(ApplicationServer* server)
|
|||
_defaultWaitForSync(false),
|
||||
_forceSyncProperties(true),
|
||||
_ignoreDatafileErrors(false),
|
||||
_check30Revisions(true),
|
||||
_check30Revisions("true"),
|
||||
_throwCollectionNotLoadedError(false),
|
||||
_vocbase(nullptr),
|
||||
_databasesLists(new DatabasesLists()),
|
||||
|
@ -281,7 +281,8 @@ void DatabaseFeature::collectOptions(std::shared_ptr<ProgramOptions> options) {
|
|||
|
||||
options->addHiddenOption("--database.check-30-revisions",
|
||||
"check _rev values in collections created before 3.1",
|
||||
new BooleanParameter(&_check30Revisions));
|
||||
new DiscreteValuesParameter<StringParameter>(&_check30Revisions,
|
||||
std::unordered_set<std::string>{ "true", "false", "fail" }));
|
||||
}
|
||||
|
||||
void DatabaseFeature::validateOptions(std::shared_ptr<ProgramOptions> options) {
|
||||
|
|
|
@ -118,7 +118,8 @@ class DatabaseFeature final : public application_features::ApplicationFeature {
|
|||
void enableUpgrade() { _upgrade = true; }
|
||||
bool throwCollectionNotLoadedError() const { return _throwCollectionNotLoadedError.load(std::memory_order_relaxed); }
|
||||
void throwCollectionNotLoadedError(bool value) { _throwCollectionNotLoadedError.store(value); }
|
||||
bool check30Revisions() const { return _check30Revisions; }
|
||||
bool check30Revisions() const { return _check30Revisions == "true" || _check30Revisions == "fail"; }
|
||||
bool fail30Revisions() const { return _check30Revisions == "fail"; }
|
||||
void isInitiallyEmpty(bool value) { _isInitiallyEmpty = value; }
|
||||
|
||||
private:
|
||||
|
@ -156,7 +157,7 @@ class DatabaseFeature final : public application_features::ApplicationFeature {
|
|||
bool _defaultWaitForSync;
|
||||
bool _forceSyncProperties;
|
||||
bool _ignoreDatafileErrors;
|
||||
bool _check30Revisions;
|
||||
std::string _check30Revisions;
|
||||
std::atomic<bool> _throwCollectionNotLoadedError;
|
||||
|
||||
TRI_vocbase_t* _vocbase;
|
||||
|
|
|
@ -1089,9 +1089,13 @@ int MMFilesCollection::iterateMarkersOnLoad(arangodb::Transaction* trx) {
|
|||
_lastRevision >= static_cast<TRI_voc_rid_t>(2016 - 1970) * 1000 * 60 * 60 * 24 * 365 &&
|
||||
application_features::ApplicationServer::server->getFeature<DatabaseFeature>("Database")->check30Revisions()) {
|
||||
// a collection from 3.0 or earlier with a _rev value that is higher than we can handle safely
|
||||
LOG(WARN) << "collection '" << _logicalCollection->name() << "' contains _rev values that are higher than expected for an ArangoDB 3.0 database. If this collection was created or used with a pre-release or development version of ArangoDB 3.1, please restart the server with option '--database.check-30-revisions false' to suppress this warning.";
|
||||
}
|
||||
_logicalCollection->setRevisionError();
|
||||
|
||||
LOG(WARN) << "collection '" << _logicalCollection->name() << "' contains _rev values that are higher than expected for an ArangoDB 3.1 database. If this collection was created or used with a pre-release or development version of ArangoDB 3.1, please restart the server with option '--database.check-30-revisions false' to suppress this warning. If this collection was created with an ArangoDB 3.0, please dump the 3.0 database with arangodump and restore it in 3.1 with arangorestore.";
|
||||
if (application_features::ApplicationServer::server->getFeature<DatabaseFeature>("Database")->fail30Revisions()) {
|
||||
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE, std::string("collection '") + _logicalCollection->name() + "' contains _rev values from 3.0 and needs to be migrated using dump/restore");
|
||||
}
|
||||
}
|
||||
|
||||
// update the real statistics for the collection
|
||||
try {
|
||||
|
|
|
@ -996,7 +996,7 @@ void Transaction::extractKeyAndRevFromDocument(VPackSlice slice,
|
|||
if (revSlice.isString()) {
|
||||
VPackValueLength l;
|
||||
char const* p = revSlice.getString(l);
|
||||
revisionId = TRI_StringToRid(p, l);
|
||||
revisionId = TRI_StringToRid(p, l, false);
|
||||
} else if (revSlice.isNumber()) {
|
||||
revisionId = revSlice.getNumericValue<TRI_voc_rid_t>();
|
||||
}
|
||||
|
@ -1016,7 +1016,7 @@ void Transaction::extractKeyAndRevFromDocument(VPackSlice slice,
|
|||
keySlice = slice.get(StaticStrings::KeyString);
|
||||
VPackValueLength l;
|
||||
char const* p = slice.get(StaticStrings::RevString).getString(l);
|
||||
revisionId = TRI_StringToRid(p, l);
|
||||
revisionId = TRI_StringToRid(p, l, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1037,7 +1037,7 @@ TRI_voc_rid_t Transaction::extractRevFromDocument(VPackSlice slice) {
|
|||
if (revSlice.isString()) {
|
||||
VPackValueLength l;
|
||||
char const* p = revSlice.getString(l);
|
||||
return TRI_StringToRid(p, l);
|
||||
return TRI_StringToRid(p, l, false);
|
||||
} else if (revSlice.isNumber()) {
|
||||
return revSlice.getNumericValue<TRI_voc_rid_t>();
|
||||
}
|
||||
|
@ -1054,7 +1054,7 @@ TRI_voc_rid_t Transaction::extractRevFromDocument(VPackSlice slice) {
|
|||
{
|
||||
VPackValueLength l;
|
||||
char const* p = slice.get(StaticStrings::RevString).getString(l);
|
||||
return TRI_StringToRid(p, l);
|
||||
return TRI_StringToRid(p, l, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -172,7 +172,7 @@ bool ExtractDocumentHandle(v8::Isolate* isolate,
|
|||
}
|
||||
v8::String::Utf8Value str(revObj);
|
||||
bool isOld;
|
||||
uint64_t rid = TRI_StringToRidWithCheck(*str, str.length(), isOld);
|
||||
uint64_t rid = TRI_StringToRidWithCheck(*str, str.length(), isOld, false);
|
||||
|
||||
if (rid == 0) {
|
||||
return false;
|
||||
|
|
|
@ -331,10 +331,9 @@ LogicalCollection::LogicalCollection(
|
|||
_nextCompactionStartIndex(0),
|
||||
_lastCompactionStatus(nullptr),
|
||||
_lastCompactionStamp(0.0),
|
||||
_uncollectedLogfileEntries(0) {
|
||||
_uncollectedLogfileEntries(0),
|
||||
_revisionError(false) {
|
||||
_keyGenerator.reset(KeyGenerator::factory(other.keyOptions()));
|
||||
|
||||
|
||||
|
||||
// TODO Only DBServer? Is this correct?
|
||||
if (ServerState::instance()->isDBServer()) {
|
||||
|
@ -397,7 +396,8 @@ LogicalCollection::LogicalCollection(TRI_vocbase_t* vocbase,
|
|||
_nextCompactionStartIndex(0),
|
||||
_lastCompactionStatus(nullptr),
|
||||
_lastCompactionStamp(0.0),
|
||||
_uncollectedLogfileEntries(0) {
|
||||
_uncollectedLogfileEntries(0),
|
||||
_revisionError(false) {
|
||||
|
||||
if (!IsAllowedName(info)) {
|
||||
THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_ILLEGAL_NAME);
|
||||
|
@ -1317,7 +1317,9 @@ void LogicalCollection::open(bool ignoreErrors) {
|
|||
<< _name << " }";
|
||||
|
||||
// successfully opened collection. now adjust version number
|
||||
if (_version != VERSION_31) {
|
||||
if (_version != VERSION_31 &&
|
||||
!_revisionError &&
|
||||
application_features::ApplicationServer::server->getFeature<DatabaseFeature>("Database")->check30Revisions()) {
|
||||
_version = VERSION_31;
|
||||
bool const doSync =
|
||||
application_features::ApplicationServer::getFeature<DatabaseFeature>(
|
||||
|
@ -2010,7 +2012,7 @@ int LogicalCollection::update(Transaction* trx, VPackSlice const newSlice,
|
|||
bool isOld;
|
||||
VPackValueLength l;
|
||||
char const* p = oldRev.getString(l);
|
||||
revisionId = TRI_StringToRid(p, l, isOld);
|
||||
revisionId = TRI_StringToRid(p, l, isOld, false);
|
||||
if (isOld) {
|
||||
// Do not tolerate old revision IDs
|
||||
revisionId = TRI_HybridLogicalClock();
|
||||
|
@ -2170,7 +2172,7 @@ int LogicalCollection::replace(Transaction* trx, VPackSlice const newSlice,
|
|||
bool isOld;
|
||||
VPackValueLength l;
|
||||
char const* p = oldRev.getString(l);
|
||||
revisionId = TRI_StringToRid(p, l, isOld);
|
||||
revisionId = TRI_StringToRid(p, l, isOld, false);
|
||||
if (isOld) {
|
||||
// Do not tolerate old revision ticks:
|
||||
revisionId = TRI_HybridLogicalClock();
|
||||
|
@ -2298,7 +2300,7 @@ int LogicalCollection::remove(arangodb::Transaction* trx,
|
|||
bool isOld;
|
||||
VPackValueLength l;
|
||||
char const* p = oldRev.getString(l);
|
||||
revisionId = TRI_StringToRid(p, l, isOld);
|
||||
revisionId = TRI_StringToRid(p, l, isOld, false);
|
||||
if (isOld) {
|
||||
// Do not tolerate old revisions
|
||||
revisionId = TRI_HybridLogicalClock();
|
||||
|
@ -3296,7 +3298,7 @@ int LogicalCollection::newObjectForInsert(
|
|||
bool isOld;
|
||||
VPackValueLength l;
|
||||
char const* p = oldRev.getString(l);
|
||||
TRI_voc_rid_t oldRevision = TRI_StringToRid(p, l, isOld);
|
||||
TRI_voc_rid_t oldRevision = TRI_StringToRid(p, l, isOld, false);
|
||||
if (isOld) {
|
||||
oldRevision = TRI_HybridLogicalClock();
|
||||
}
|
||||
|
|
|
@ -126,6 +126,8 @@ class LogicalCollection {
|
|||
void setCompactionStatus(char const*);
|
||||
double lastCompactionStamp() const { return _lastCompactionStamp; }
|
||||
void lastCompactionStamp(double value) { _lastCompactionStamp = value; }
|
||||
|
||||
void setRevisionError() { _revisionError = true; }
|
||||
|
||||
|
||||
// SECTION: Meta Information
|
||||
|
@ -573,6 +575,7 @@ class LogicalCollection {
|
|||
double _lastCompactionStamp;
|
||||
|
||||
std::atomic<int64_t> _uncollectedLogfileEntries;
|
||||
bool _revisionError;
|
||||
};
|
||||
|
||||
} // namespace arangodb
|
||||
|
|
|
@ -1072,9 +1072,7 @@ int TRI_AddOperationTransaction(TRI_transaction_t* trx,
|
|||
|
||||
{
|
||||
VPackSlice s(static_cast<uint8_t*>(marker->vpack()));
|
||||
VPackValueLength l;
|
||||
char const* p = s.get(StaticStrings::RevString).getString(l);
|
||||
TRI_voc_rid_t rid = TRI_StringToRid(p, l);
|
||||
TRI_voc_rid_t rid = Transaction::extractRevFromDocument(s);
|
||||
collection->setRevision(rid, false);
|
||||
}
|
||||
|
||||
|
|
|
@ -63,19 +63,19 @@ typedef uint64_t TRI_server_id_t;
|
|||
std::string TRI_RidToString(TRI_voc_rid_t rid);
|
||||
|
||||
/// @brief Convert a string into a revision ID, no check variant
|
||||
TRI_voc_rid_t TRI_StringToRid(std::string const& ridStr, bool& isOld);
|
||||
TRI_voc_rid_t TRI_StringToRid(std::string const& ridStr, bool& isOld, bool warn);
|
||||
|
||||
/// @brief Convert a string into a revision ID, no check variant
|
||||
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len);
|
||||
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool warn);
|
||||
|
||||
/// @brief Convert a string into a revision ID, no check variant
|
||||
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld);
|
||||
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld, bool warn);
|
||||
|
||||
/// @brief Convert a string into a revision ID, returns 0 if format invalid
|
||||
TRI_voc_rid_t TRI_StringToRidWithCheck(std::string const& ridStr, bool& isOld);
|
||||
TRI_voc_rid_t TRI_StringToRidWithCheck(std::string const& ridStr, bool& isOld, bool warn);
|
||||
|
||||
/// @brief Convert a string into a revision ID, returns 0 if format invalid
|
||||
TRI_voc_rid_t TRI_StringToRidWithCheck(char const* p, size_t len, bool& isOld);
|
||||
TRI_voc_rid_t TRI_StringToRidWithCheck(char const* p, size_t len, bool& isOld, bool warn);
|
||||
|
||||
/// @brief enum for write operations
|
||||
enum TRI_voc_document_operation_e : uint8_t {
|
||||
|
|
|
@ -1246,7 +1246,7 @@ TRI_voc_rid_t TRI_ExtractRevisionId(VPackSlice slice) {
|
|||
if (r.isString()) {
|
||||
VPackValueLength l;
|
||||
char const* p = r.getString(l);
|
||||
return TRI_StringToRid(p, l);
|
||||
return TRI_StringToRid(p, l, false);
|
||||
}
|
||||
if (r.isInteger()) {
|
||||
return r.getNumber<TRI_voc_rid_t>();
|
||||
|
@ -1312,18 +1312,18 @@ std::string TRI_RidToString(TRI_voc_rid_t rid) {
|
|||
}
|
||||
|
||||
/// @brief Convert a string into a revision ID, no check variant
|
||||
TRI_voc_rid_t TRI_StringToRid(std::string const& ridStr, bool& isOld) {
|
||||
return TRI_StringToRid(ridStr.c_str(), ridStr.size(), isOld);
|
||||
TRI_voc_rid_t TRI_StringToRid(std::string const& ridStr, bool& isOld, bool warn) {
|
||||
return TRI_StringToRid(ridStr.c_str(), ridStr.size(), isOld, warn);
|
||||
}
|
||||
|
||||
/// @brief Convert a string into a revision ID, no check variant
|
||||
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len) {
|
||||
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool warn) {
|
||||
bool isOld;
|
||||
return TRI_StringToRid(p, len, isOld);
|
||||
return TRI_StringToRid(p, len, isOld, warn);
|
||||
}
|
||||
|
||||
/// @brief Convert a string into a revision ID, no check variant
|
||||
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld) {
|
||||
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld, bool warn) {
|
||||
if (len > 0 && *p >= '1' && *p <= '9') {
|
||||
// Remove this case before the year 3887 AD because then it will
|
||||
// start to clash with encoded timestamps:
|
||||
|
@ -1336,7 +1336,7 @@ TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld) {
|
|||
}
|
||||
r *= 10;
|
||||
} while (true);
|
||||
if (r > tickLimit) {
|
||||
if (warn && r > tickLimit) {
|
||||
// An old tick value that could be confused with a time stamp
|
||||
LOG(WARN)
|
||||
<< "Saw old _rev value that could be confused with a time stamp!";
|
||||
|
@ -1349,17 +1349,17 @@ TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld) {
|
|||
}
|
||||
|
||||
/// @brief Convert a string into a revision ID, returns 0 if format invalid
|
||||
TRI_voc_rid_t TRI_StringToRidWithCheck(std::string const& ridStr, bool& isOld) {
|
||||
return TRI_StringToRidWithCheck(ridStr.c_str(), ridStr.size(), isOld);
|
||||
TRI_voc_rid_t TRI_StringToRidWithCheck(std::string const& ridStr, bool& isOld, bool warn) {
|
||||
return TRI_StringToRidWithCheck(ridStr.c_str(), ridStr.size(), isOld, warn);
|
||||
}
|
||||
|
||||
/// @brief Convert a string into a revision ID, returns 0 if format invalid
|
||||
TRI_voc_rid_t TRI_StringToRidWithCheck(char const* p, size_t len, bool& isOld) {
|
||||
TRI_voc_rid_t TRI_StringToRidWithCheck(char const* p, size_t len, bool& isOld, bool warn) {
|
||||
if (len > 0 && *p >= '1' && *p <= '9') {
|
||||
// Remove this case before the year 3887 AD because then it will
|
||||
// start to clash with encoded timestamps:
|
||||
TRI_voc_rid_t r = arangodb::basics::StringUtils::uint64_check(p, len);
|
||||
if (r > tickLimit) {
|
||||
if (warn && r > tickLimit) {
|
||||
// An old tick value that could be confused with a time stamp
|
||||
LOG(WARN)
|
||||
<< "Saw old _rev value that could be confused with a time stamp!";
|
||||
|
|
|
@ -113,7 +113,7 @@ void RestoreFeature::collectOptions(
|
|||
new BooleanParameter(&_overwrite));
|
||||
|
||||
options->addOption("--recycle-ids",
|
||||
"recycle collection and revision ids from dump",
|
||||
"recycle collection ids from dump",
|
||||
new BooleanParameter(&_recycleIds));
|
||||
|
||||
options->addOption("--default-number-of-shards",
|
||||
|
|
Loading…
Reference in New Issue