From 1016debd2e88fdbbcc46ac05034b40456a5dca5b Mon Sep 17 00:00:00 2001 From: Jan Steemann Date: Thu, 18 Sep 2014 17:47:37 +0200 Subject: [PATCH] more robust recovery --- UnitTests/Makefile.unittests | 2 + arangod/VocBase/vocbase.cpp | 8 +- arangod/Wal/RecoverState.cpp | 62 +++++++---- .../recovery/leftover-collection-directory.js | 85 +++++++++++++++ .../recovery/leftover-database-directory.js | 100 ++++++++++++++++++ lib/Basics/json.cpp | 8 +- 6 files changed, 244 insertions(+), 21 deletions(-) create mode 100644 js/server/tests/recovery/leftover-collection-directory.js create mode 100644 js/server/tests/recovery/leftover-database-directory.js diff --git a/UnitTests/Makefile.unittests b/UnitTests/Makefile.unittests index b4558597f8..9bd0524293 100755 --- a/UnitTests/Makefile.unittests +++ b/UnitTests/Makefile.unittests @@ -208,6 +208,8 @@ unittests-recovery: @echo "================================================================================" @echo + $(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="leftover-database-directory" + $(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="leftover-collection-directory" $(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="collection-unload" $(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="resume-recovery-multi-flush" $(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="resume-recovery-simple" diff --git a/arangod/VocBase/vocbase.cpp b/arangod/VocBase/vocbase.cpp index 2095d47662..920008726a 100644 --- a/arangod/VocBase/vocbase.cpp +++ b/arangod/VocBase/vocbase.cpp @@ -417,6 +417,8 @@ static bool DropCollectionCallback (TRI_collection_t* col, // perform the rename res = TRI_RenameFile(collection->_path, newFilename); + LOG_TRACE("renaming collection directory from '%s' to '%s'", collection->_path, newFilename); + if (res != TRI_ERROR_NO_ERROR) { LOG_ERROR("cannot rename dropped collection '%s' from '%s' to '%s': %s", collection->_name, @@ -2048,7 +2050,11 @@ int TRI_DropCollectionVocBase (TRI_vocbase_t* vocbase, if (! info._deleted) { info._deleted = true; - res = TRI_SaveCollectionInfo(collection->_path, &info, vocbase->_settings.forceSyncProperties); + // we don't need to fsync if we are in the recovery phase + bool doSync = (vocbase->_settings.forceSyncProperties && + ! triagens::wal::LogfileManager::instance()->isInRecovery()); + + res = TRI_SaveCollectionInfo(collection->_path, &info, doSync); TRI_FreeCollectionInfoOptions(&info); if (res != TRI_ERROR_NO_ERROR) { diff --git a/arangod/Wal/RecoverState.cpp b/arangod/Wal/RecoverState.cpp index f468a3fb8d..838f3b12db 100644 --- a/arangod/Wal/RecoverState.cpp +++ b/arangod/Wal/RecoverState.cpp @@ -92,13 +92,26 @@ static std::string GetCollectionDirectory (TRI_vocbase_t* vocbase, //////////////////////////////////////////////////////////////////////////////// static int WaitForDeletion (TRI_server_t* server, - TRI_voc_tick_t databaseId) { + TRI_voc_tick_t databaseId, + int statusCode) { std::string const result = GetDatabaseDirectory(server, databaseId); int iterations = 0; // wait for at most 30 seconds for the directory to be removed while (TRI_IsDirectory(result.c_str())) { - if (++iterations >= 30 * 10) { + if (iterations == 0) { + LOG_TRACE("waiting for deletion of database directory '%s', called with status code %d", + result.c_str(), + statusCode); + + if (statusCode != TRI_ERROR_FORBIDDEN && + (statusCode == TRI_ERROR_ARANGO_DATABASE_NOT_FOUND || + statusCode != TRI_ERROR_NO_ERROR)) { + LOG_WARNING("forcefully deleting database directory '%s'", result.c_str()); + TRI_RemoveDirectory(result.c_str()); + } + } + else if (++iterations >= 30 * 10) { LOG_WARNING("unable to remove database directory '%s'", result.c_str()); return TRI_ERROR_INTERNAL; } @@ -114,13 +127,26 @@ static int WaitForDeletion (TRI_server_t* server, //////////////////////////////////////////////////////////////////////////////// static int WaitForDeletion (TRI_vocbase_t* vocbase, - TRI_voc_cid_t collectionId) { + TRI_voc_cid_t collectionId, + int statusCode) { std::string const result = GetCollectionDirectory(vocbase, collectionId); int iterations = 0; // wait for at most 30 seconds for the directory to be removed while (TRI_IsDirectory(result.c_str())) { - if (++iterations >= 30 * 10) { + if (iterations == 0) { + LOG_TRACE("waiting for deletion of collection directory '%s', called with status code %d", + result.c_str(), + statusCode); + + if (statusCode != TRI_ERROR_FORBIDDEN && + (statusCode == TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND || + statusCode != TRI_ERROR_NO_ERROR)) { + LOG_WARNING("forcefully deleting collection directory '%s'", result.c_str()); + TRI_RemoveDirectory(result.c_str()); + } + } + else if (++iterations >= 30 * 10) { LOG_WARNING("unable to remove collection directory '%s'", result.c_str()); return TRI_ERROR_INTERNAL; } @@ -1025,8 +1051,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker, if (other != nullptr) { TRI_voc_cid_t otherCid = other->_cid; state->releaseCollection(otherCid); - TRI_DropCollectionVocBase(vocbase, other, false); - WaitForDeletion(vocbase, otherCid); + int statusCode = TRI_DropCollectionVocBase(vocbase, other, false); + WaitForDeletion(vocbase, otherCid, statusCode); } int res = TRI_RenameCollectionVocBase(vocbase, collection, name, true, false); @@ -1233,8 +1259,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker, if (collection != nullptr) { // drop an existing collection - TRI_DropCollectionVocBase(vocbase, collection, false); - WaitForDeletion(vocbase, collectionId); + int statusCode = TRI_DropCollectionVocBase(vocbase, collection, false); + WaitForDeletion(vocbase, collectionId, statusCode); } char const* properties = reinterpret_cast(m) + sizeof(collection_create_marker_t); @@ -1261,8 +1287,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker, TRI_voc_cid_t otherCid = collection->_cid; state->releaseCollection(otherCid); - TRI_DropCollectionVocBase(vocbase, collection, false); - WaitForDeletion(vocbase, otherCid); + int statusCode = TRI_DropCollectionVocBase(vocbase, collection, false); + WaitForDeletion(vocbase, otherCid, statusCode); } } @@ -1276,7 +1302,7 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker, // fake transaction to satisfy assertions triagens::arango::TransactionBase trx(true); - WaitForDeletion(vocbase, collectionId); + WaitForDeletion(vocbase, collectionId, TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND); collection = TRI_CreateCollectionVocBase(vocbase, &info, collectionId, false); TRI_FreeCollectionInfoOptions(&info); @@ -1301,9 +1327,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker, if (vocbase != nullptr) { // remove already existing database - TRI_DropByIdDatabaseServer(state->server, databaseId, false, false); - - WaitForDeletion(state->server, databaseId); + int statusCode = TRI_DropByIdDatabaseServer(state->server, databaseId, false, false); + WaitForDeletion(state->server, databaseId, statusCode); } char const* properties = reinterpret_cast(m) + sizeof(database_create_marker_t); @@ -1335,8 +1360,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker, TRI_voc_tick_t otherId = vocbase->_id; state->releaseDatabase(otherId); - TRI_DropDatabaseServer(state->server, nameString.c_str(), false, false); - WaitForDeletion(state->server, otherId); + int statusCode = TRI_DropDatabaseServer(state->server, nameString.c_str(), false, false); + WaitForDeletion(state->server, otherId, statusCode); } TRI_vocbase_defaults_t defaults; @@ -1346,6 +1371,7 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker, triagens::arango::TransactionBase trx(true); vocbase = nullptr; + WaitForDeletion(state->server, databaseId, TRI_ERROR_ARANGO_DATABASE_NOT_FOUND); int res = TRI_CreateDatabaseServer(state->server, databaseId, nameString.c_str(), &defaults, &vocbase, false); if (res != TRI_ERROR_NO_ERROR) { @@ -1430,8 +1456,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker, // fake transaction to satisfy assertions triagens::arango::TransactionBase trx(true); - TRI_DropCollectionVocBase(vocbase, collection, false); - WaitForDeletion(vocbase, collectionId); + int statusCode = TRI_DropCollectionVocBase(vocbase, collection, false); + WaitForDeletion(vocbase, collectionId, statusCode); } break; } diff --git a/js/server/tests/recovery/leftover-collection-directory.js b/js/server/tests/recovery/leftover-collection-directory.js new file mode 100644 index 0000000000..9b639d11f1 --- /dev/null +++ b/js/server/tests/recovery/leftover-collection-directory.js @@ -0,0 +1,85 @@ + +var db = require("org/arangodb").db; +var internal = require("internal"); +var fs = require("fs"); +var jsunity = require("jsunity"); + + +function runSetup () { + internal.debugClearFailAt(); + + var i, c, ids = [ ]; + for (i = 0; i < 10; ++i) { + db._drop("UnitTestsRecovery" + i); + c = db._create("UnitTestsRecovery" + i); + ids[i] = c._id; + c.drop(); + } + db._drop("test"); + c = db._create("test"); + c.save({ _key: "crashme" }, true); + + internal.wait(3, true); + for (i = 0; i < 10; ++i) { + var path = fs.join(db._path(), "collection-" + ids[i]); + fs.makeDirectory(path); + + if (i < 5) { + // create a leftover parameter.json.tmp file + fs.write(fs.join(path, "parameter.json.tmp"), ""); + } + else { + // create an empty parameter.json file + fs.write(fs.join(path, "parameter.json"), ""); + // create some other file + fs.write(fs.join(path, "foobar"), "foobar"); + } + } + + internal.debugSegfault("crashing server"); +} + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test suite +//////////////////////////////////////////////////////////////////////////////// + +function recoverySuite () { + jsunity.jsUnity.attachAssertions(); + + return { + setUp: function () { + }, + tearDown: function () { + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test whether we can recover the collections even with the +/// leftover directories present +//////////////////////////////////////////////////////////////////////////////// + + testLeftoverCollectionDirectory : function () { + var i; + for (i = 0; i < 5; ++i) { + assertEqual(null, db._collection("UnitTestsRecovery" + i)); + } + + } + + }; +} + +//////////////////////////////////////////////////////////////////////////////// +/// @brief executes the test suite +//////////////////////////////////////////////////////////////////////////////// + +function main (argv) { + if (argv[1] === "setup") { + runSetup(); + return 0; + } + else { + jsunity.run(recoverySuite); + return jsunity.done() ? 0 : 1; + } +} + diff --git a/js/server/tests/recovery/leftover-database-directory.js b/js/server/tests/recovery/leftover-database-directory.js new file mode 100644 index 0000000000..c33326de09 --- /dev/null +++ b/js/server/tests/recovery/leftover-database-directory.js @@ -0,0 +1,100 @@ + +var db = require("org/arangodb").db; +var internal = require("internal"); +var fs = require("fs"); +var jsunity = require("jsunity"); + + +function runSetup () { + internal.debugClearFailAt(); + + var i, paths = [ ]; + for (i = 0; i < 3; ++i) { + db._useDatabase("_system"); + try { + db._dropDatabase("UnitTestsRecovery" + i); + } + catch (err) { + } + + db._createDatabase("UnitTestsRecovery" + i); + db._useDatabase("UnitTestsRecovery" + i); + paths[i] = db._path(); + + db._useDatabase("_system"); + db._dropDatabase("UnitTestsRecovery" + i); + } + + db._drop("test"); + c = db._create("test"); + c.save({ _key: "crashme" }, true); + + internal.wait(3, true); + for (i = 0; i < 3; ++i) { + // re-create database directory + try { + fs.makeDirectory(paths[i]); + } + catch (err2) { + } + + fs.write(fs.join(paths[i], "parameter.json"), ""); + fs.write(fs.join(paths[i], "parameter.json.tmp"), ""); + + // create some collection directory + fs.makeDirectory(fs.join(paths[i], "collection-123")); + if (i < 2) { + // create a leftover parameter.json.tmp file + fs.write(fs.join(paths[i], "collection-123", "parameter.json.tmp"), ""); + } + else { + // create an empty parameter.json file + fs.write(fs.join(paths[i], "collection-123", "parameter.json"), ""); + // create some other file + fs.write(fs.join(paths[i], "collection-123", "foobar"), "foobar"); + } + } + + internal.debugSegfault("crashing server"); +} + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test suite +//////////////////////////////////////////////////////////////////////////////// + +function recoverySuite () { + jsunity.jsUnity.attachAssertions(); + + return { + setUp: function () { + }, + tearDown: function () { + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test whether we can recover the databases even with the +/// leftover directories present +//////////////////////////////////////////////////////////////////////////////// + + testLeftoverDatabaseDirectory : function () { + assertEqual([ "_system" ], db._listDatabases()); + } + + }; +} + +//////////////////////////////////////////////////////////////////////////////// +/// @brief executes the test suite +//////////////////////////////////////////////////////////////////////////////// + +function main (argv) { + if (argv[1] === "setup") { + runSetup(); + return 0; + } + else { + jsunity.run(recoverySuite); + return jsunity.done() ? 0 : 1; + } +} + diff --git a/lib/Basics/json.cpp b/lib/Basics/json.cpp index 4e742a7c56..faf3a33882 100644 --- a/lib/Basics/json.cpp +++ b/lib/Basics/json.cpp @@ -1079,7 +1079,9 @@ bool TRI_SaveJson (char const* filename, } // remove a potentially existing temporary file - TRI_UnlinkFile(tmp); + if (TRI_ExistsFile(tmp)) { + TRI_UnlinkFile(tmp); + } fd = TRI_CREATE(tmp, O_CREAT | O_TRUNC | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR); @@ -1098,8 +1100,10 @@ bool TRI_SaveJson (char const* filename, TRI_FreeString(TRI_CORE_MEM_ZONE, tmp); return false; } - + if (syncFile) { + LOG_TRACE("syncing tmp file '%s'", tmp); + if (! TRI_fsync(fd)) { TRI_CLOSE(fd); TRI_set_errno(TRI_ERROR_SYS_ERROR);