mirror of https://gitee.com/bigwinds/arangodb
more robust recovery
This commit is contained in:
parent
307766cf13
commit
1016debd2e
|
@ -208,6 +208,8 @@ unittests-recovery:
|
|||
@echo "================================================================================"
|
||||
@echo
|
||||
|
||||
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="leftover-database-directory"
|
||||
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="leftover-collection-directory"
|
||||
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="collection-unload"
|
||||
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="resume-recovery-multi-flush"
|
||||
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="resume-recovery-simple"
|
||||
|
|
|
@ -417,6 +417,8 @@ static bool DropCollectionCallback (TRI_collection_t* col,
|
|||
// perform the rename
|
||||
res = TRI_RenameFile(collection->_path, newFilename);
|
||||
|
||||
LOG_TRACE("renaming collection directory from '%s' to '%s'", collection->_path, newFilename);
|
||||
|
||||
if (res != TRI_ERROR_NO_ERROR) {
|
||||
LOG_ERROR("cannot rename dropped collection '%s' from '%s' to '%s': %s",
|
||||
collection->_name,
|
||||
|
@ -2048,7 +2050,11 @@ int TRI_DropCollectionVocBase (TRI_vocbase_t* vocbase,
|
|||
if (! info._deleted) {
|
||||
info._deleted = true;
|
||||
|
||||
res = TRI_SaveCollectionInfo(collection->_path, &info, vocbase->_settings.forceSyncProperties);
|
||||
// we don't need to fsync if we are in the recovery phase
|
||||
bool doSync = (vocbase->_settings.forceSyncProperties &&
|
||||
! triagens::wal::LogfileManager::instance()->isInRecovery());
|
||||
|
||||
res = TRI_SaveCollectionInfo(collection->_path, &info, doSync);
|
||||
TRI_FreeCollectionInfoOptions(&info);
|
||||
|
||||
if (res != TRI_ERROR_NO_ERROR) {
|
||||
|
|
|
@ -92,13 +92,26 @@ static std::string GetCollectionDirectory (TRI_vocbase_t* vocbase,
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static int WaitForDeletion (TRI_server_t* server,
|
||||
TRI_voc_tick_t databaseId) {
|
||||
TRI_voc_tick_t databaseId,
|
||||
int statusCode) {
|
||||
std::string const result = GetDatabaseDirectory(server, databaseId);
|
||||
|
||||
int iterations = 0;
|
||||
// wait for at most 30 seconds for the directory to be removed
|
||||
while (TRI_IsDirectory(result.c_str())) {
|
||||
if (++iterations >= 30 * 10) {
|
||||
if (iterations == 0) {
|
||||
LOG_TRACE("waiting for deletion of database directory '%s', called with status code %d",
|
||||
result.c_str(),
|
||||
statusCode);
|
||||
|
||||
if (statusCode != TRI_ERROR_FORBIDDEN &&
|
||||
(statusCode == TRI_ERROR_ARANGO_DATABASE_NOT_FOUND ||
|
||||
statusCode != TRI_ERROR_NO_ERROR)) {
|
||||
LOG_WARNING("forcefully deleting database directory '%s'", result.c_str());
|
||||
TRI_RemoveDirectory(result.c_str());
|
||||
}
|
||||
}
|
||||
else if (++iterations >= 30 * 10) {
|
||||
LOG_WARNING("unable to remove database directory '%s'", result.c_str());
|
||||
return TRI_ERROR_INTERNAL;
|
||||
}
|
||||
|
@ -114,13 +127,26 @@ static int WaitForDeletion (TRI_server_t* server,
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static int WaitForDeletion (TRI_vocbase_t* vocbase,
|
||||
TRI_voc_cid_t collectionId) {
|
||||
TRI_voc_cid_t collectionId,
|
||||
int statusCode) {
|
||||
std::string const result = GetCollectionDirectory(vocbase, collectionId);
|
||||
|
||||
int iterations = 0;
|
||||
// wait for at most 30 seconds for the directory to be removed
|
||||
while (TRI_IsDirectory(result.c_str())) {
|
||||
if (++iterations >= 30 * 10) {
|
||||
if (iterations == 0) {
|
||||
LOG_TRACE("waiting for deletion of collection directory '%s', called with status code %d",
|
||||
result.c_str(),
|
||||
statusCode);
|
||||
|
||||
if (statusCode != TRI_ERROR_FORBIDDEN &&
|
||||
(statusCode == TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND ||
|
||||
statusCode != TRI_ERROR_NO_ERROR)) {
|
||||
LOG_WARNING("forcefully deleting collection directory '%s'", result.c_str());
|
||||
TRI_RemoveDirectory(result.c_str());
|
||||
}
|
||||
}
|
||||
else if (++iterations >= 30 * 10) {
|
||||
LOG_WARNING("unable to remove collection directory '%s'", result.c_str());
|
||||
return TRI_ERROR_INTERNAL;
|
||||
}
|
||||
|
@ -1025,8 +1051,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
|||
if (other != nullptr) {
|
||||
TRI_voc_cid_t otherCid = other->_cid;
|
||||
state->releaseCollection(otherCid);
|
||||
TRI_DropCollectionVocBase(vocbase, other, false);
|
||||
WaitForDeletion(vocbase, otherCid);
|
||||
int statusCode = TRI_DropCollectionVocBase(vocbase, other, false);
|
||||
WaitForDeletion(vocbase, otherCid, statusCode);
|
||||
}
|
||||
|
||||
int res = TRI_RenameCollectionVocBase(vocbase, collection, name, true, false);
|
||||
|
@ -1233,8 +1259,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
|||
|
||||
if (collection != nullptr) {
|
||||
// drop an existing collection
|
||||
TRI_DropCollectionVocBase(vocbase, collection, false);
|
||||
WaitForDeletion(vocbase, collectionId);
|
||||
int statusCode = TRI_DropCollectionVocBase(vocbase, collection, false);
|
||||
WaitForDeletion(vocbase, collectionId, statusCode);
|
||||
}
|
||||
|
||||
char const* properties = reinterpret_cast<char const*>(m) + sizeof(collection_create_marker_t);
|
||||
|
@ -1261,8 +1287,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
|||
TRI_voc_cid_t otherCid = collection->_cid;
|
||||
|
||||
state->releaseCollection(otherCid);
|
||||
TRI_DropCollectionVocBase(vocbase, collection, false);
|
||||
WaitForDeletion(vocbase, otherCid);
|
||||
int statusCode = TRI_DropCollectionVocBase(vocbase, collection, false);
|
||||
WaitForDeletion(vocbase, otherCid, statusCode);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1276,7 +1302,7 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
|||
// fake transaction to satisfy assertions
|
||||
triagens::arango::TransactionBase trx(true);
|
||||
|
||||
WaitForDeletion(vocbase, collectionId);
|
||||
WaitForDeletion(vocbase, collectionId, TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND);
|
||||
collection = TRI_CreateCollectionVocBase(vocbase, &info, collectionId, false);
|
||||
|
||||
TRI_FreeCollectionInfoOptions(&info);
|
||||
|
@ -1301,9 +1327,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
|||
|
||||
if (vocbase != nullptr) {
|
||||
// remove already existing database
|
||||
TRI_DropByIdDatabaseServer(state->server, databaseId, false, false);
|
||||
|
||||
WaitForDeletion(state->server, databaseId);
|
||||
int statusCode = TRI_DropByIdDatabaseServer(state->server, databaseId, false, false);
|
||||
WaitForDeletion(state->server, databaseId, statusCode);
|
||||
}
|
||||
|
||||
char const* properties = reinterpret_cast<char const*>(m) + sizeof(database_create_marker_t);
|
||||
|
@ -1335,8 +1360,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
|||
TRI_voc_tick_t otherId = vocbase->_id;
|
||||
|
||||
state->releaseDatabase(otherId);
|
||||
TRI_DropDatabaseServer(state->server, nameString.c_str(), false, false);
|
||||
WaitForDeletion(state->server, otherId);
|
||||
int statusCode = TRI_DropDatabaseServer(state->server, nameString.c_str(), false, false);
|
||||
WaitForDeletion(state->server, otherId, statusCode);
|
||||
}
|
||||
|
||||
TRI_vocbase_defaults_t defaults;
|
||||
|
@ -1346,6 +1371,7 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
|||
triagens::arango::TransactionBase trx(true);
|
||||
|
||||
vocbase = nullptr;
|
||||
WaitForDeletion(state->server, databaseId, TRI_ERROR_ARANGO_DATABASE_NOT_FOUND);
|
||||
int res = TRI_CreateDatabaseServer(state->server, databaseId, nameString.c_str(), &defaults, &vocbase, false);
|
||||
|
||||
if (res != TRI_ERROR_NO_ERROR) {
|
||||
|
@ -1430,8 +1456,8 @@ bool RecoverState::ReplayMarker (TRI_df_marker_t const* marker,
|
|||
// fake transaction to satisfy assertions
|
||||
triagens::arango::TransactionBase trx(true);
|
||||
|
||||
TRI_DropCollectionVocBase(vocbase, collection, false);
|
||||
WaitForDeletion(vocbase, collectionId);
|
||||
int statusCode = TRI_DropCollectionVocBase(vocbase, collection, false);
|
||||
WaitForDeletion(vocbase, collectionId, statusCode);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
|
||||
var db = require("org/arangodb").db;
|
||||
var internal = require("internal");
|
||||
var fs = require("fs");
|
||||
var jsunity = require("jsunity");
|
||||
|
||||
|
||||
function runSetup () {
|
||||
internal.debugClearFailAt();
|
||||
|
||||
var i, c, ids = [ ];
|
||||
for (i = 0; i < 10; ++i) {
|
||||
db._drop("UnitTestsRecovery" + i);
|
||||
c = db._create("UnitTestsRecovery" + i);
|
||||
ids[i] = c._id;
|
||||
c.drop();
|
||||
}
|
||||
db._drop("test");
|
||||
c = db._create("test");
|
||||
c.save({ _key: "crashme" }, true);
|
||||
|
||||
internal.wait(3, true);
|
||||
for (i = 0; i < 10; ++i) {
|
||||
var path = fs.join(db._path(), "collection-" + ids[i]);
|
||||
fs.makeDirectory(path);
|
||||
|
||||
if (i < 5) {
|
||||
// create a leftover parameter.json.tmp file
|
||||
fs.write(fs.join(path, "parameter.json.tmp"), "");
|
||||
}
|
||||
else {
|
||||
// create an empty parameter.json file
|
||||
fs.write(fs.join(path, "parameter.json"), "");
|
||||
// create some other file
|
||||
fs.write(fs.join(path, "foobar"), "foobar");
|
||||
}
|
||||
}
|
||||
|
||||
internal.debugSegfault("crashing server");
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test suite
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
function recoverySuite () {
|
||||
jsunity.jsUnity.attachAssertions();
|
||||
|
||||
return {
|
||||
setUp: function () {
|
||||
},
|
||||
tearDown: function () {
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test whether we can recover the collections even with the
|
||||
/// leftover directories present
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testLeftoverCollectionDirectory : function () {
|
||||
var i;
|
||||
for (i = 0; i < 5; ++i) {
|
||||
assertEqual(null, db._collection("UnitTestsRecovery" + i));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief executes the test suite
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
function main (argv) {
|
||||
if (argv[1] === "setup") {
|
||||
runSetup();
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
jsunity.run(recoverySuite);
|
||||
return jsunity.done() ? 0 : 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
|
||||
var db = require("org/arangodb").db;
|
||||
var internal = require("internal");
|
||||
var fs = require("fs");
|
||||
var jsunity = require("jsunity");
|
||||
|
||||
|
||||
function runSetup () {
|
||||
internal.debugClearFailAt();
|
||||
|
||||
var i, paths = [ ];
|
||||
for (i = 0; i < 3; ++i) {
|
||||
db._useDatabase("_system");
|
||||
try {
|
||||
db._dropDatabase("UnitTestsRecovery" + i);
|
||||
}
|
||||
catch (err) {
|
||||
}
|
||||
|
||||
db._createDatabase("UnitTestsRecovery" + i);
|
||||
db._useDatabase("UnitTestsRecovery" + i);
|
||||
paths[i] = db._path();
|
||||
|
||||
db._useDatabase("_system");
|
||||
db._dropDatabase("UnitTestsRecovery" + i);
|
||||
}
|
||||
|
||||
db._drop("test");
|
||||
c = db._create("test");
|
||||
c.save({ _key: "crashme" }, true);
|
||||
|
||||
internal.wait(3, true);
|
||||
for (i = 0; i < 3; ++i) {
|
||||
// re-create database directory
|
||||
try {
|
||||
fs.makeDirectory(paths[i]);
|
||||
}
|
||||
catch (err2) {
|
||||
}
|
||||
|
||||
fs.write(fs.join(paths[i], "parameter.json"), "");
|
||||
fs.write(fs.join(paths[i], "parameter.json.tmp"), "");
|
||||
|
||||
// create some collection directory
|
||||
fs.makeDirectory(fs.join(paths[i], "collection-123"));
|
||||
if (i < 2) {
|
||||
// create a leftover parameter.json.tmp file
|
||||
fs.write(fs.join(paths[i], "collection-123", "parameter.json.tmp"), "");
|
||||
}
|
||||
else {
|
||||
// create an empty parameter.json file
|
||||
fs.write(fs.join(paths[i], "collection-123", "parameter.json"), "");
|
||||
// create some other file
|
||||
fs.write(fs.join(paths[i], "collection-123", "foobar"), "foobar");
|
||||
}
|
||||
}
|
||||
|
||||
internal.debugSegfault("crashing server");
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test suite
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
function recoverySuite () {
|
||||
jsunity.jsUnity.attachAssertions();
|
||||
|
||||
return {
|
||||
setUp: function () {
|
||||
},
|
||||
tearDown: function () {
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test whether we can recover the databases even with the
|
||||
/// leftover directories present
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testLeftoverDatabaseDirectory : function () {
|
||||
assertEqual([ "_system" ], db._listDatabases());
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief executes the test suite
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
function main (argv) {
|
||||
if (argv[1] === "setup") {
|
||||
runSetup();
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
jsunity.run(recoverySuite);
|
||||
return jsunity.done() ? 0 : 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -1079,7 +1079,9 @@ bool TRI_SaveJson (char const* filename,
|
|||
}
|
||||
|
||||
// remove a potentially existing temporary file
|
||||
TRI_UnlinkFile(tmp);
|
||||
if (TRI_ExistsFile(tmp)) {
|
||||
TRI_UnlinkFile(tmp);
|
||||
}
|
||||
|
||||
fd = TRI_CREATE(tmp, O_CREAT | O_TRUNC | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
|
||||
|
||||
|
@ -1098,8 +1100,10 @@ bool TRI_SaveJson (char const* filename,
|
|||
TRI_FreeString(TRI_CORE_MEM_ZONE, tmp);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if (syncFile) {
|
||||
LOG_TRACE("syncing tmp file '%s'", tmp);
|
||||
|
||||
if (! TRI_fsync(fd)) {
|
||||
TRI_CLOSE(fd);
|
||||
TRI_set_errno(TRI_ERROR_SYS_ERROR);
|
||||
|
|
Loading…
Reference in New Issue