1
0
Fork 0

allow more control over handling of pre-3.1 _rev values

this changes the server startup option `--database.check-30-revisions` from a boolean (true/false)
parameter to a string parameter with the following possible values:

- "fail":
  will validate _rev values of 3.0 collections on collection loading and throw an exception when invalid _rev values are found.
  in this case collections with invalid _rev values are marked as corrupted and cannot be used in the ArangoDB 3.1 instance.
  the fix procedure for such collections is to export the collections from 3.0 database with arangodump and restore them in 3.1 with arangorestore.
  collections that do not contain invalid _rev values are marked as ok and will not be re-checked on following loads.
  collections that contain invalid _rev values will be re-checked on following loads.

- "true":
  will validate _rev values of 3.0 collections on collection loading and print a warning when invalid _rev values are found.
  in this case collections with invalid _rev values can be used in the ArangoDB 3.1 instance.
  however, subsequent operations on documents with invalid _rev values may silently fail or fail with explicit errors.
  the fix procedure for such collections is to export the collections from 3.0 database with arangodump and restore them in 3.1 with arangorestore.
  collections that do not contain invalid _rev values are marked as ok and will not be re-checked on following loads.
  collections that contain invalid _rev values will be re-checked on following loads.

- "false":
  will not validate _rev values on collection loading and not print warnings.
  no hint is given when invalid _rev values are found.
  subsequent operations on documents with invalid _rev values may silently fail or fail with explicit errors.
  this setting does not affect whether collections are re-checked later.
  collections will be re-checked on following loads if `--database.check-30-revisions` is later set to either `true` or `fail`.

The change also suppresses warnings that were printed when collections were restored using arangorestore, and the restore
data contained invalid _rev values. Now these warnings are suppressed, and new HLC _rev values are generated for these documents
as before.
This commit is contained in:
jsteemann 2016-11-04 23:17:01 +01:00
parent ccb251e1f5
commit 5f65a9ed4f
14 changed files with 57 additions and 51 deletions

View File

@ -601,7 +601,7 @@ int revisionOnCoordinator(std::string const& dbname,
if (r.isString()) {
VPackValueLength len;
char const* p = r.getString(len);
TRI_voc_rid_t cmp = TRI_StringToRid(p, len);
TRI_voc_rid_t cmp = TRI_StringToRid(p, len, false);
if (cmp > rid) {
// get the maximum value

View File

@ -2031,7 +2031,7 @@ int RestReplicationHandler::applyCollectionDumpMarker(
static int restoreDataParser(char const* ptr, char const* pos,
std::string const& invalidMsg, bool useRevision,
std::string& errorMsg, std::string& key,
std::string& rev, VPackBuilder& builder,
VPackBuilder& builder,
VPackSlice& doc,
TRI_replication_operation_e& type) {
builder.clear();
@ -2090,8 +2090,6 @@ static int restoreDataParser(char const* ptr, char const* pos,
if (doc.hasKey(StaticStrings::KeyString)) {
key = doc.get(StaticStrings::KeyString).copyString();
} else if (useRevision && doc.hasKey(StaticStrings::RevString)) {
rev = doc.get(StaticStrings::RevString).copyString();
}
}
}
@ -2150,6 +2148,7 @@ int RestReplicationHandler::processRestoreDataBatch(
{
VPackArrayBuilder guard(&allMarkers);
std::string key;
while (ptr < end) {
char const* pos = strchr(ptr, '\n');
@ -2161,13 +2160,12 @@ int RestReplicationHandler::processRestoreDataBatch(
if (pos - ptr > 1) {
// found something
std::string key;
std::string rev;
key.clear();
VPackSlice doc;
TRI_replication_operation_e type = REPLICATION_INVALID;
int res = restoreDataParser(ptr, pos, invalidMsg, useRevision, errorMsg,
key, rev, builder, doc, type);
key, builder, doc, type);
if (res != TRI_ERROR_NO_ERROR) {
return res;
}
@ -2492,11 +2490,10 @@ void RestReplicationHandler::handleCommandRestoreDataCoordinator() {
// found something
//
std::string key;
std::string rev;
VPackSlice doc;
TRI_replication_operation_e type = REPLICATION_INVALID;
res = restoreDataParser(ptr, pos, invalidMsg, false, errorMsg, key, rev,
res = restoreDataParser(ptr, pos, invalidMsg, false, errorMsg, key,
builder, doc, type);
if (res != TRI_ERROR_NO_ERROR) {
// We might need to clean up buffers

View File

@ -597,7 +597,7 @@ TRI_voc_rid_t RestVocbaseBaseHandler::extractRevision(char const* header,
TRI_voc_rid_t rid = 0;
bool isOld;
rid = TRI_StringToRidWithCheck(s, e - s, isOld);
rid = TRI_StringToRidWithCheck(s, e - s, isOld, false);
isValid = (rid != 0);
return rid;
@ -610,7 +610,7 @@ TRI_voc_rid_t RestVocbaseBaseHandler::extractRevision(char const* header,
TRI_voc_rid_t rid = 0;
bool isOld;
rid = TRI_StringToRidWithCheck(etag2, isOld);
rid = TRI_StringToRidWithCheck(etag2, isOld, false);
isValid = (rid != 0);
return rid;

View File

@ -217,7 +217,7 @@ DatabaseFeature::DatabaseFeature(ApplicationServer* server)
_defaultWaitForSync(false),
_forceSyncProperties(true),
_ignoreDatafileErrors(false),
_check30Revisions(true),
_check30Revisions("true"),
_throwCollectionNotLoadedError(false),
_vocbase(nullptr),
_databasesLists(new DatabasesLists()),
@ -281,7 +281,8 @@ void DatabaseFeature::collectOptions(std::shared_ptr<ProgramOptions> options) {
options->addHiddenOption("--database.check-30-revisions",
"check _rev values in collections created before 3.1",
new BooleanParameter(&_check30Revisions));
new DiscreteValuesParameter<StringParameter>(&_check30Revisions,
std::unordered_set<std::string>{ "true", "false", "fail" }));
}
void DatabaseFeature::validateOptions(std::shared_ptr<ProgramOptions> options) {

View File

@ -118,7 +118,8 @@ class DatabaseFeature final : public application_features::ApplicationFeature {
void enableUpgrade() { _upgrade = true; }
bool throwCollectionNotLoadedError() const { return _throwCollectionNotLoadedError.load(std::memory_order_relaxed); }
void throwCollectionNotLoadedError(bool value) { _throwCollectionNotLoadedError.store(value); }
bool check30Revisions() const { return _check30Revisions; }
bool check30Revisions() const { return _check30Revisions == "true" || _check30Revisions == "fail"; }
bool fail30Revisions() const { return _check30Revisions == "fail"; }
void isInitiallyEmpty(bool value) { _isInitiallyEmpty = value; }
private:
@ -156,7 +157,7 @@ class DatabaseFeature final : public application_features::ApplicationFeature {
bool _defaultWaitForSync;
bool _forceSyncProperties;
bool _ignoreDatafileErrors;
bool _check30Revisions;
std::string _check30Revisions;
std::atomic<bool> _throwCollectionNotLoadedError;
TRI_vocbase_t* _vocbase;

View File

@ -1089,9 +1089,13 @@ int MMFilesCollection::iterateMarkersOnLoad(arangodb::Transaction* trx) {
_lastRevision >= static_cast<TRI_voc_rid_t>(2016 - 1970) * 1000 * 60 * 60 * 24 * 365 &&
application_features::ApplicationServer::server->getFeature<DatabaseFeature>("Database")->check30Revisions()) {
// a collection from 3.0 or earlier with a _rev value that is higher than we can handle safely
LOG(WARN) << "collection '" << _logicalCollection->name() << "' contains _rev values that are higher than expected for an ArangoDB 3.0 database. If this collection was created or used with a pre-release or development version of ArangoDB 3.1, please restart the server with option '--database.check-30-revisions false' to suppress this warning.";
}
_logicalCollection->setRevisionError();
LOG(WARN) << "collection '" << _logicalCollection->name() << "' contains _rev values that are higher than expected for an ArangoDB 3.1 database. If this collection was created or used with a pre-release or development version of ArangoDB 3.1, please restart the server with option '--database.check-30-revisions false' to suppress this warning. If this collection was created with an ArangoDB 3.0, please dump the 3.0 database with arangodump and restore it in 3.1 with arangorestore.";
if (application_features::ApplicationServer::server->getFeature<DatabaseFeature>("Database")->fail30Revisions()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_ARANGO_CORRUPTED_DATAFILE, std::string("collection '") + _logicalCollection->name() + "' contains _rev values from 3.0 and needs to be migrated using dump/restore");
}
}
// update the real statistics for the collection
try {

View File

@ -996,7 +996,7 @@ void Transaction::extractKeyAndRevFromDocument(VPackSlice slice,
if (revSlice.isString()) {
VPackValueLength l;
char const* p = revSlice.getString(l);
revisionId = TRI_StringToRid(p, l);
revisionId = TRI_StringToRid(p, l, false);
} else if (revSlice.isNumber()) {
revisionId = revSlice.getNumericValue<TRI_voc_rid_t>();
}
@ -1016,7 +1016,7 @@ void Transaction::extractKeyAndRevFromDocument(VPackSlice slice,
keySlice = slice.get(StaticStrings::KeyString);
VPackValueLength l;
char const* p = slice.get(StaticStrings::RevString).getString(l);
revisionId = TRI_StringToRid(p, l);
revisionId = TRI_StringToRid(p, l, false);
}
}
@ -1037,7 +1037,7 @@ TRI_voc_rid_t Transaction::extractRevFromDocument(VPackSlice slice) {
if (revSlice.isString()) {
VPackValueLength l;
char const* p = revSlice.getString(l);
return TRI_StringToRid(p, l);
return TRI_StringToRid(p, l, false);
} else if (revSlice.isNumber()) {
return revSlice.getNumericValue<TRI_voc_rid_t>();
}
@ -1054,7 +1054,7 @@ TRI_voc_rid_t Transaction::extractRevFromDocument(VPackSlice slice) {
{
VPackValueLength l;
char const* p = slice.get(StaticStrings::RevString).getString(l);
return TRI_StringToRid(p, l);
return TRI_StringToRid(p, l, false);
}
}

View File

@ -172,7 +172,7 @@ bool ExtractDocumentHandle(v8::Isolate* isolate,
}
v8::String::Utf8Value str(revObj);
bool isOld;
uint64_t rid = TRI_StringToRidWithCheck(*str, str.length(), isOld);
uint64_t rid = TRI_StringToRidWithCheck(*str, str.length(), isOld, false);
if (rid == 0) {
return false;

View File

@ -331,10 +331,9 @@ LogicalCollection::LogicalCollection(
_nextCompactionStartIndex(0),
_lastCompactionStatus(nullptr),
_lastCompactionStamp(0.0),
_uncollectedLogfileEntries(0) {
_uncollectedLogfileEntries(0),
_revisionError(false) {
_keyGenerator.reset(KeyGenerator::factory(other.keyOptions()));
// TODO Only DBServer? Is this correct?
if (ServerState::instance()->isDBServer()) {
@ -397,7 +396,8 @@ LogicalCollection::LogicalCollection(TRI_vocbase_t* vocbase,
_nextCompactionStartIndex(0),
_lastCompactionStatus(nullptr),
_lastCompactionStamp(0.0),
_uncollectedLogfileEntries(0) {
_uncollectedLogfileEntries(0),
_revisionError(false) {
if (!IsAllowedName(info)) {
THROW_ARANGO_EXCEPTION(TRI_ERROR_ARANGO_ILLEGAL_NAME);
@ -1317,7 +1317,9 @@ void LogicalCollection::open(bool ignoreErrors) {
<< _name << " }";
// successfully opened collection. now adjust version number
if (_version != VERSION_31) {
if (_version != VERSION_31 &&
!_revisionError &&
application_features::ApplicationServer::server->getFeature<DatabaseFeature>("Database")->check30Revisions()) {
_version = VERSION_31;
bool const doSync =
application_features::ApplicationServer::getFeature<DatabaseFeature>(
@ -2010,7 +2012,7 @@ int LogicalCollection::update(Transaction* trx, VPackSlice const newSlice,
bool isOld;
VPackValueLength l;
char const* p = oldRev.getString(l);
revisionId = TRI_StringToRid(p, l, isOld);
revisionId = TRI_StringToRid(p, l, isOld, false);
if (isOld) {
// Do not tolerate old revision IDs
revisionId = TRI_HybridLogicalClock();
@ -2170,7 +2172,7 @@ int LogicalCollection::replace(Transaction* trx, VPackSlice const newSlice,
bool isOld;
VPackValueLength l;
char const* p = oldRev.getString(l);
revisionId = TRI_StringToRid(p, l, isOld);
revisionId = TRI_StringToRid(p, l, isOld, false);
if (isOld) {
// Do not tolerate old revision ticks:
revisionId = TRI_HybridLogicalClock();
@ -2298,7 +2300,7 @@ int LogicalCollection::remove(arangodb::Transaction* trx,
bool isOld;
VPackValueLength l;
char const* p = oldRev.getString(l);
revisionId = TRI_StringToRid(p, l, isOld);
revisionId = TRI_StringToRid(p, l, isOld, false);
if (isOld) {
// Do not tolerate old revisions
revisionId = TRI_HybridLogicalClock();
@ -3296,7 +3298,7 @@ int LogicalCollection::newObjectForInsert(
bool isOld;
VPackValueLength l;
char const* p = oldRev.getString(l);
TRI_voc_rid_t oldRevision = TRI_StringToRid(p, l, isOld);
TRI_voc_rid_t oldRevision = TRI_StringToRid(p, l, isOld, false);
if (isOld) {
oldRevision = TRI_HybridLogicalClock();
}

View File

@ -126,6 +126,8 @@ class LogicalCollection {
void setCompactionStatus(char const*);
double lastCompactionStamp() const { return _lastCompactionStamp; }
void lastCompactionStamp(double value) { _lastCompactionStamp = value; }
void setRevisionError() { _revisionError = true; }
// SECTION: Meta Information
@ -573,6 +575,7 @@ class LogicalCollection {
double _lastCompactionStamp;
std::atomic<int64_t> _uncollectedLogfileEntries;
bool _revisionError;
};
} // namespace arangodb

View File

@ -1072,9 +1072,7 @@ int TRI_AddOperationTransaction(TRI_transaction_t* trx,
{
VPackSlice s(static_cast<uint8_t*>(marker->vpack()));
VPackValueLength l;
char const* p = s.get(StaticStrings::RevString).getString(l);
TRI_voc_rid_t rid = TRI_StringToRid(p, l);
TRI_voc_rid_t rid = Transaction::extractRevFromDocument(s);
collection->setRevision(rid, false);
}

View File

@ -63,19 +63,19 @@ typedef uint64_t TRI_server_id_t;
std::string TRI_RidToString(TRI_voc_rid_t rid);
/// @brief Convert a string into a revision ID, no check variant
TRI_voc_rid_t TRI_StringToRid(std::string const& ridStr, bool& isOld);
TRI_voc_rid_t TRI_StringToRid(std::string const& ridStr, bool& isOld, bool warn);
/// @brief Convert a string into a revision ID, no check variant
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len);
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool warn);
/// @brief Convert a string into a revision ID, no check variant
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld);
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld, bool warn);
/// @brief Convert a string into a revision ID, returns 0 if format invalid
TRI_voc_rid_t TRI_StringToRidWithCheck(std::string const& ridStr, bool& isOld);
TRI_voc_rid_t TRI_StringToRidWithCheck(std::string const& ridStr, bool& isOld, bool warn);
/// @brief Convert a string into a revision ID, returns 0 if format invalid
TRI_voc_rid_t TRI_StringToRidWithCheck(char const* p, size_t len, bool& isOld);
TRI_voc_rid_t TRI_StringToRidWithCheck(char const* p, size_t len, bool& isOld, bool warn);
/// @brief enum for write operations
enum TRI_voc_document_operation_e : uint8_t {

View File

@ -1246,7 +1246,7 @@ TRI_voc_rid_t TRI_ExtractRevisionId(VPackSlice slice) {
if (r.isString()) {
VPackValueLength l;
char const* p = r.getString(l);
return TRI_StringToRid(p, l);
return TRI_StringToRid(p, l, false);
}
if (r.isInteger()) {
return r.getNumber<TRI_voc_rid_t>();
@ -1312,18 +1312,18 @@ std::string TRI_RidToString(TRI_voc_rid_t rid) {
}
/// @brief Convert a string into a revision ID, no check variant
TRI_voc_rid_t TRI_StringToRid(std::string const& ridStr, bool& isOld) {
return TRI_StringToRid(ridStr.c_str(), ridStr.size(), isOld);
TRI_voc_rid_t TRI_StringToRid(std::string const& ridStr, bool& isOld, bool warn) {
return TRI_StringToRid(ridStr.c_str(), ridStr.size(), isOld, warn);
}
/// @brief Convert a string into a revision ID, no check variant
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len) {
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool warn) {
bool isOld;
return TRI_StringToRid(p, len, isOld);
return TRI_StringToRid(p, len, isOld, warn);
}
/// @brief Convert a string into a revision ID, no check variant
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld) {
TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld, bool warn) {
if (len > 0 && *p >= '1' && *p <= '9') {
// Remove this case before the year 3887 AD because then it will
// start to clash with encoded timestamps:
@ -1336,7 +1336,7 @@ TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld) {
}
r *= 10;
} while (true);
if (r > tickLimit) {
if (warn && r > tickLimit) {
// An old tick value that could be confused with a time stamp
LOG(WARN)
<< "Saw old _rev value that could be confused with a time stamp!";
@ -1349,17 +1349,17 @@ TRI_voc_rid_t TRI_StringToRid(char const* p, size_t len, bool& isOld) {
}
/// @brief Convert a string into a revision ID, returns 0 if format invalid
TRI_voc_rid_t TRI_StringToRidWithCheck(std::string const& ridStr, bool& isOld) {
return TRI_StringToRidWithCheck(ridStr.c_str(), ridStr.size(), isOld);
TRI_voc_rid_t TRI_StringToRidWithCheck(std::string const& ridStr, bool& isOld, bool warn) {
return TRI_StringToRidWithCheck(ridStr.c_str(), ridStr.size(), isOld, warn);
}
/// @brief Convert a string into a revision ID, returns 0 if format invalid
TRI_voc_rid_t TRI_StringToRidWithCheck(char const* p, size_t len, bool& isOld) {
TRI_voc_rid_t TRI_StringToRidWithCheck(char const* p, size_t len, bool& isOld, bool warn) {
if (len > 0 && *p >= '1' && *p <= '9') {
// Remove this case before the year 3887 AD because then it will
// start to clash with encoded timestamps:
TRI_voc_rid_t r = arangodb::basics::StringUtils::uint64_check(p, len);
if (r > tickLimit) {
if (warn && r > tickLimit) {
// An old tick value that could be confused with a time stamp
LOG(WARN)
<< "Saw old _rev value that could be confused with a time stamp!";

View File

@ -113,7 +113,7 @@ void RestoreFeature::collectOptions(
new BooleanParameter(&_overwrite));
options->addOption("--recycle-ids",
"recycle collection and revision ids from dump",
"recycle collection ids from dump",
new BooleanParameter(&_recycleIds));
options->addOption("--default-number-of-shards",