mirror of https://gitee.com/bigwinds/arangodb
more robust recovery
This commit is contained in:
parent
307766cf13
commit
1016debd2e
|
@ -208,6 +208,8 @@ unittests-recovery:
|
||||||
@echo "================================================================================"
|
@echo "================================================================================"
|
||||||
@echo
|
@echo
|
||||||
|
|
||||||
|
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="leftover-database-directory"
|
||||||
|
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="leftover-collection-directory"
|
||||||
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="collection-unload"
|
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="collection-unload"
|
||||||
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="resume-recovery-multi-flush"
|
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="resume-recovery-multi-flush"
|
||||||
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="resume-recovery-simple"
|
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="resume-recovery-simple"
|
||||||
|
|
|
@ -417,6 +417,8 @@ static bool DropCollectionCallback (TRI_collection_t* col,
|
||||||
// perform the rename
|
// perform the rename
|
||||||
res = TRI_RenameFile(collection->_path, newFilename);
|
res = TRI_RenameFile(collection->_path, newFilename);
|
||||||
|
|
||||||
|
LOG_TRACE("renaming collection directory from '%s' to '%s'", collection->_path, newFilename);
|
||||||
|
|
||||||
if (res != TRI_ERROR_NO_ERROR) {
|
if (res != TRI_ERROR_NO_ERROR) {
|
||||||
LOG_ERROR("cannot rename dropped collection '%s' from '%s' to '%s': %s",
|
LOG_ERROR("cannot rename dropped collection '%s' from '%s' to '%s': %s",
|
||||||
collection->_name,
|
collection->_name,
|
||||||
|
@ -2048,7 +2050,11 @@ int TRI_DropCollectionVocBase (TRI_vocbase_t* vocbase,
|
||||||
if (! info._deleted) {
|
if (! info._deleted) {
|
||||||
info._deleted = true;
|
info._deleted = true;
|
||||||
|
|
||||||
res = TRI_SaveCollectionInfo(collection->_path, &info, vocbase->_settings.forceSyncProperties);
|
// we don't need to fsync if we are in the recovery phase
|
||||||
|
bool doSync = (vocbase->_settings.forceSyncProperties &&
|
||||||
|
! triagens::wal::LogfileManager::instance()->isInRecovery());
|
||||||
|
|
||||||
|
res = TRI_SaveCollectionInfo(collection->_path, &info, doSync);
|
||||||
TRI_FreeCollectionInfoOptions(&info);
|
TRI_FreeCollectionInfoOptions(&info);
|
||||||
|
|
||||||
if (res != TRI_ERROR_NO_ERROR) {
|
if (res != TRI_ERROR_NO_ERROR) {
|
||||||
|
|
|
@ -92,13 +92,26 @@ static std::string GetCollectionDirectory (TRI_vocbase_t* vocbase,
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static int WaitForDeletion (TRI_server_t* server,
|
static int WaitForDeletion (TRI_server_t* server,
|
||||||
TRI_voc_tick_t databaseId) {
|
TRI_voc_tick_t databaseId,
|
||||||
|
int statusCode) {
|
||||||
std::string const result = GetDatabaseDirectory(server, databaseId);
|
std::string const result = GetDatabaseDirectory(server, databaseId);
|
||||||
|
|
||||||
int iterations = 0;
|
int iterations = 0;
|
||||||
// wait for at most 30 seconds for the directory to be removed
|
// wait for at most 30 seconds for the directory to be removed
|
||||||
while (TRI_IsDirectory(result.c_str())) {
|
while (TRI_IsDirectory(result.c_str())) {
|
||||||
if (++iterations >= 30 * 10) {
|
if (iterations == 0) {
|
||||||
|
LOG_TRACE("waiting for deletion of database directory '%s', called with status code %d",
|
||||||
|
result.c_str(),
|
||||||
|
statusCode);
|
||||||
|
|
||||||
|
if (statusCode != TRI_ERROR_FORBIDDEN &&
|
||||||
|
(statusCode == TRI_ERROR_ARANGO_DATABASE_NOT_FOUND ||
|
||||||
|
statusCode != TRI_ERROR_NO_ERROR)) {
|
||||||
|
LOG_WARNING("forcefully deleting database directory '%s'", result.c_str());
|
||||||
|
TRI_RemoveDirectory(result.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (++iterations >= 30 * 10) {
|
||||||
LOG_WARNING("unable to remove database directory '%s'", result.c_str());
|
LOG_WARNING("unable to remove database directory '%s'", result.c_str());
|
||||||
return TRI_ERROR_INTERNAL;
|
return TRI_ERROR_INTERNAL;
|
||||||
}
|
}
|
||||||
|
@ -114,13 +127,26 @@ static int WaitForDeletion (TRI_server_t* server,
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static int WaitForDeletion (TRI_vocbase_t* vocbase,
|
static int WaitForDeletion (TRI_vocbase_t* vocbase,
|
||||||
TRI_voc_cid_t collectionId) {
|
TRI_voc_cid_t collectionId,
|
||||||
|
int statusCode) {
|
||||||
std::string const result = GetCollectionDirectory(vocbase, collectionId);
|
std::string const result = GetCollectionDirectory(vocbase, collectionId);
|
||||||
|
|
||||||
int iterations = 0;
|
int iterations = 0;
|
||||||
// wait for at most 30 seconds for the directory to be removed
|
// wait for at most 30 seconds for the directory to be removed
|
||||||
while (TRI_IsDirectory(result.c_str())) {
|
while (TRI_IsDirectory(result.c_str())) {
|
||||||
if (++iterations >= 30 * 10) {
|
if (iterations == 0) {
|
||||||
|
LOG_TRACE("waiting for deletion of collection directory '%s', called with status code %d",
|
||||||
|
result.c_str(),
|
||||||
|
statusCode);
|
||||||
|
|
||||||
|
if (statusCode != TRI_ERROR_FORBIDDEN &&
|
||||||
|
(statusCode == TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND ||
|
||||||
|
statusCode != TRI_ERROR_NO_ERROR)) {
|
||||||
|
LOG_WARNING("forcefully deleting collection directory '%s'", result.c_str());
|
||||||
|
TRI_RemoveDirectory(result.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (++iterations >= 30 * 10) {
|
||||||
LOG_WARNING("unable to remove collection directory '%s'", result.c_str());
|
LOG_WARNING("unable to remove collection directory '%s'", result.c_str());
|
||||||
return TRI_ERROR_INTERNAL;
|
return TRI_ERROR_INTERNAL;
|
||||||
}
|
}
|
||||||
|
@ -1025,8 +1051,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
||||||
if (other != nullptr) {
|
if (other != nullptr) {
|
||||||
TRI_voc_cid_t otherCid = other->_cid;
|
TRI_voc_cid_t otherCid = other->_cid;
|
||||||
state->releaseCollection(otherCid);
|
state->releaseCollection(otherCid);
|
||||||
TRI_DropCollectionVocBase(vocbase, other, false);
|
int statusCode = TRI_DropCollectionVocBase(vocbase, other, false);
|
||||||
WaitForDeletion(vocbase, otherCid);
|
WaitForDeletion(vocbase, otherCid, statusCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
int res = TRI_RenameCollectionVocBase(vocbase, collection, name, true, false);
|
int res = TRI_RenameCollectionVocBase(vocbase, collection, name, true, false);
|
||||||
|
@ -1233,8 +1259,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
||||||
|
|
||||||
if (collection != nullptr) {
|
if (collection != nullptr) {
|
||||||
// drop an existing collection
|
// drop an existing collection
|
||||||
TRI_DropCollectionVocBase(vocbase, collection, false);
|
int statusCode = TRI_DropCollectionVocBase(vocbase, collection, false);
|
||||||
WaitForDeletion(vocbase, collectionId);
|
WaitForDeletion(vocbase, collectionId, statusCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
char const* properties = reinterpret_cast<char const*>(m) + sizeof(collection_create_marker_t);
|
char const* properties = reinterpret_cast<char const*>(m) + sizeof(collection_create_marker_t);
|
||||||
|
@ -1261,8 +1287,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
||||||
TRI_voc_cid_t otherCid = collection->_cid;
|
TRI_voc_cid_t otherCid = collection->_cid;
|
||||||
|
|
||||||
state->releaseCollection(otherCid);
|
state->releaseCollection(otherCid);
|
||||||
TRI_DropCollectionVocBase(vocbase, collection, false);
|
int statusCode = TRI_DropCollectionVocBase(vocbase, collection, false);
|
||||||
WaitForDeletion(vocbase, otherCid);
|
WaitForDeletion(vocbase, otherCid, statusCode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1276,7 +1302,7 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
||||||
// fake transaction to satisfy assertions
|
// fake transaction to satisfy assertions
|
||||||
triagens::arango::TransactionBase trx(true);
|
triagens::arango::TransactionBase trx(true);
|
||||||
|
|
||||||
WaitForDeletion(vocbase, collectionId);
|
WaitForDeletion(vocbase, collectionId, TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND);
|
||||||
collection = TRI_CreateCollectionVocBase(vocbase, &info, collectionId, false);
|
collection = TRI_CreateCollectionVocBase(vocbase, &info, collectionId, false);
|
||||||
|
|
||||||
TRI_FreeCollectionInfoOptions(&info);
|
TRI_FreeCollectionInfoOptions(&info);
|
||||||
|
@ -1301,9 +1327,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
||||||
|
|
||||||
if (vocbase != nullptr) {
|
if (vocbase != nullptr) {
|
||||||
// remove already existing database
|
// remove already existing database
|
||||||
TRI_DropByIdDatabaseServer(state->server, databaseId, false, false);
|
int statusCode = TRI_DropByIdDatabaseServer(state->server, databaseId, false, false);
|
||||||
|
WaitForDeletion(state->server, databaseId, statusCode);
|
||||||
WaitForDeletion(state->server, databaseId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
char const* properties = reinterpret_cast<char const*>(m) + sizeof(database_create_marker_t);
|
char const* properties = reinterpret_cast<char const*>(m) + sizeof(database_create_marker_t);
|
||||||
|
@ -1335,8 +1360,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
||||||
TRI_voc_tick_t otherId = vocbase->_id;
|
TRI_voc_tick_t otherId = vocbase->_id;
|
||||||
|
|
||||||
state->releaseDatabase(otherId);
|
state->releaseDatabase(otherId);
|
||||||
TRI_DropDatabaseServer(state->server, nameString.c_str(), false, false);
|
int statusCode = TRI_DropDatabaseServer(state->server, nameString.c_str(), false, false);
|
||||||
WaitForDeletion(state->server, otherId);
|
WaitForDeletion(state->server, otherId, statusCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
TRI_vocbase_defaults_t defaults;
|
TRI_vocbase_defaults_t defaults;
|
||||||
|
@ -1346,6 +1371,7 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
||||||
triagens::arango::TransactionBase trx(true);
|
triagens::arango::TransactionBase trx(true);
|
||||||
|
|
||||||
vocbase = nullptr;
|
vocbase = nullptr;
|
||||||
|
WaitForDeletion(state->server, databaseId, TRI_ERROR_ARANGO_DATABASE_NOT_FOUND);
|
||||||
int res = TRI_CreateDatabaseServer(state->server, databaseId, nameString.c_str(), &defaults, &vocbase, false);
|
int res = TRI_CreateDatabaseServer(state->server, databaseId, nameString.c_str(), &defaults, &vocbase, false);
|
||||||
|
|
||||||
if (res != TRI_ERROR_NO_ERROR) {
|
if (res != TRI_ERROR_NO_ERROR) {
|
||||||
|
@ -1430,8 +1456,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
||||||
// fake transaction to satisfy assertions
|
// fake transaction to satisfy assertions
|
||||||
triagens::arango::TransactionBase trx(true);
|
triagens::arango::TransactionBase trx(true);
|
||||||
|
|
||||||
TRI_DropCollectionVocBase(vocbase, collection, false);
|
int statusCode = TRI_DropCollectionVocBase(vocbase, collection, false);
|
||||||
WaitForDeletion(vocbase, collectionId);
|
WaitForDeletion(vocbase, collectionId, statusCode);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
|
||||||
|
var db = require("org/arangodb").db;
|
||||||
|
var internal = require("internal");
|
||||||
|
var fs = require("fs");
|
||||||
|
var jsunity = require("jsunity");
|
||||||
|
|
||||||
|
|
||||||
|
function runSetup () {
|
||||||
|
internal.debugClearFailAt();
|
||||||
|
|
||||||
|
var i, c, ids = [ ];
|
||||||
|
for (i = 0; i < 10; ++i) {
|
||||||
|
db._drop("UnitTestsRecovery" + i);
|
||||||
|
c = db._create("UnitTestsRecovery" + i);
|
||||||
|
ids[i] = c._id;
|
||||||
|
c.drop();
|
||||||
|
}
|
||||||
|
db._drop("test");
|
||||||
|
c = db._create("test");
|
||||||
|
c.save({ _key: "crashme" }, true);
|
||||||
|
|
||||||
|
internal.wait(3, true);
|
||||||
|
for (i = 0; i < 10; ++i) {
|
||||||
|
var path = fs.join(db._path(), "collection-" + ids[i]);
|
||||||
|
fs.makeDirectory(path);
|
||||||
|
|
||||||
|
if (i < 5) {
|
||||||
|
// create a leftover parameter.json.tmp file
|
||||||
|
fs.write(fs.join(path, "parameter.json.tmp"), "");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// create an empty parameter.json file
|
||||||
|
fs.write(fs.join(path, "parameter.json"), "");
|
||||||
|
// create some other file
|
||||||
|
fs.write(fs.join(path, "foobar"), "foobar");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
internal.debugSegfault("crashing server");
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief test suite
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
function recoverySuite () {
|
||||||
|
jsunity.jsUnity.attachAssertions();
|
||||||
|
|
||||||
|
return {
|
||||||
|
setUp: function () {
|
||||||
|
},
|
||||||
|
tearDown: function () {
|
||||||
|
},
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief test whether we can recover the collections even with the
|
||||||
|
/// leftover directories present
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
testLeftoverCollectionDirectory : function () {
|
||||||
|
var i;
|
||||||
|
for (i = 0; i < 5; ++i) {
|
||||||
|
assertEqual(null, db._collection("UnitTestsRecovery" + i));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief executes the test suite
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
function main (argv) {
|
||||||
|
if (argv[1] === "setup") {
|
||||||
|
runSetup();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
jsunity.run(recoverySuite);
|
||||||
|
return jsunity.done() ? 0 : 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
|
||||||
|
var db = require("org/arangodb").db;
|
||||||
|
var internal = require("internal");
|
||||||
|
var fs = require("fs");
|
||||||
|
var jsunity = require("jsunity");
|
||||||
|
|
||||||
|
|
||||||
|
function runSetup () {
|
||||||
|
internal.debugClearFailAt();
|
||||||
|
|
||||||
|
var i, paths = [ ];
|
||||||
|
for (i = 0; i < 3; ++i) {
|
||||||
|
db._useDatabase("_system");
|
||||||
|
try {
|
||||||
|
db._dropDatabase("UnitTestsRecovery" + i);
|
||||||
|
}
|
||||||
|
catch (err) {
|
||||||
|
}
|
||||||
|
|
||||||
|
db._createDatabase("UnitTestsRecovery" + i);
|
||||||
|
db._useDatabase("UnitTestsRecovery" + i);
|
||||||
|
paths[i] = db._path();
|
||||||
|
|
||||||
|
db._useDatabase("_system");
|
||||||
|
db._dropDatabase("UnitTestsRecovery" + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
db._drop("test");
|
||||||
|
c = db._create("test");
|
||||||
|
c.save({ _key: "crashme" }, true);
|
||||||
|
|
||||||
|
internal.wait(3, true);
|
||||||
|
for (i = 0; i < 3; ++i) {
|
||||||
|
// re-create database directory
|
||||||
|
try {
|
||||||
|
fs.makeDirectory(paths[i]);
|
||||||
|
}
|
||||||
|
catch (err2) {
|
||||||
|
}
|
||||||
|
|
||||||
|
fs.write(fs.join(paths[i], "parameter.json"), "");
|
||||||
|
fs.write(fs.join(paths[i], "parameter.json.tmp"), "");
|
||||||
|
|
||||||
|
// create some collection directory
|
||||||
|
fs.makeDirectory(fs.join(paths[i], "collection-123"));
|
||||||
|
if (i < 2) {
|
||||||
|
// create a leftover parameter.json.tmp file
|
||||||
|
fs.write(fs.join(paths[i], "collection-123", "parameter.json.tmp"), "");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// create an empty parameter.json file
|
||||||
|
fs.write(fs.join(paths[i], "collection-123", "parameter.json"), "");
|
||||||
|
// create some other file
|
||||||
|
fs.write(fs.join(paths[i], "collection-123", "foobar"), "foobar");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
internal.debugSegfault("crashing server");
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief test suite
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
function recoverySuite () {
|
||||||
|
jsunity.jsUnity.attachAssertions();
|
||||||
|
|
||||||
|
return {
|
||||||
|
setUp: function () {
|
||||||
|
},
|
||||||
|
tearDown: function () {
|
||||||
|
},
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief test whether we can recover the databases even with the
|
||||||
|
/// leftover directories present
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
testLeftoverDatabaseDirectory : function () {
|
||||||
|
assertEqual([ "_system" ], db._listDatabases());
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief executes the test suite
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
function main (argv) {
|
||||||
|
if (argv[1] === "setup") {
|
||||||
|
runSetup();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
jsunity.run(recoverySuite);
|
||||||
|
return jsunity.done() ? 0 : 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -1079,7 +1079,9 @@ bool TRI_SaveJson (char const* filename,
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove a potentially existing temporary file
|
// remove a potentially existing temporary file
|
||||||
TRI_UnlinkFile(tmp);
|
if (TRI_ExistsFile(tmp)) {
|
||||||
|
TRI_UnlinkFile(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
fd = TRI_CREATE(tmp, O_CREAT | O_TRUNC | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
|
fd = TRI_CREATE(tmp, O_CREAT | O_TRUNC | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
|
||||||
|
|
||||||
|
@ -1098,8 +1100,10 @@ bool TRI_SaveJson (char const* filename,
|
||||||
TRI_FreeString(TRI_CORE_MEM_ZONE, tmp);
|
TRI_FreeString(TRI_CORE_MEM_ZONE, tmp);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (syncFile) {
|
if (syncFile) {
|
||||||
|
LOG_TRACE("syncing tmp file '%s'", tmp);
|
||||||
|
|
||||||
if (! TRI_fsync(fd)) {
|
if (! TRI_fsync(fd)) {
|
||||||
TRI_CLOSE(fd);
|
TRI_CLOSE(fd);
|
||||||
TRI_set_errno(TRI_ERROR_SYS_ERROR);
|
TRI_set_errno(TRI_ERROR_SYS_ERROR);
|
||||||
|
|
Loading…
Reference in New Issue