diff --git a/arangod/Wal/CollectorThread.cpp b/arangod/Wal/CollectorThread.cpp index 7d64d773f5..40645b6357 100644 --- a/arangod/Wal/CollectorThread.cpp +++ b/arangod/Wal/CollectorThread.cpp @@ -409,6 +409,7 @@ int CollectorThread::collectLogfiles(bool& worked) { #ifdef ARANGODB_ENABLE_ROCKSDB RocksDBFeature::syncWal(); #endif + _logfileManager->setCollectionDone(logfile); } else { // return the logfile to the logfile manager in case of errors @@ -803,6 +804,11 @@ int CollectorThread::collect(Logfile* logfile) { try { res = transferMarkers(logfile, cid, state.collections[cid], state.operationsCount[cid], sortedOperations); + + TRI_IF_FAILURE("failDuringCollect") { + THROW_ARANGO_EXCEPTION(TRI_ERROR_DEBUG); + } + } catch (arangodb::basics::Exception const& ex) { res = ex.code(); } catch (...) { @@ -872,6 +878,11 @@ int CollectorThread::transferMarkers(Logfile* logfile, try { res = executeTransferMarkers(document, cache, operations); + + TRI_IF_FAILURE("transferMarkersCrash") { + // intentionally kill the server + TRI_SegfaultDebugging("CollectorThreadTransfer"); + } if (res == TRI_ERROR_NO_ERROR && !cache->operations->empty()) { // now sync the datafile diff --git a/arangod/Wal/LogfileManager.cpp b/arangod/Wal/LogfileManager.cpp index f3d830a3f5..c5a9da68fa 100644 --- a/arangod/Wal/LogfileManager.cpp +++ b/arangod/Wal/LogfileManager.cpp @@ -1500,6 +1500,10 @@ void LogfileManager::setCollectionRequested(Logfile* logfile) { // mark a file as being done with collection void LogfileManager::setCollectionDone(Logfile* logfile) { + TRI_IF_FAILURE("setCollectionDone") { + return; + } + TRI_ASSERT(logfile != nullptr); Logfile::IdType id = logfile->id(); diff --git a/arangod/Wal/RecoverState.cpp b/arangod/Wal/RecoverState.cpp index 9f4a6ee5c0..a7295fdbc9 100644 --- a/arangod/Wal/RecoverState.cpp +++ b/arangod/Wal/RecoverState.cpp @@ -547,16 +547,30 @@ bool RecoverState::ReplayMarker(TRI_df_marker_t const* marker, void* data, options.silent = true; options.recoveryMarker = envelope; options.waitForSync = false; + options.ignoreRevs = false; - OperationResult opRes = trx->remove(collectionName, VPackSlice(ptr), options); - int res = opRes.code; - - return res; + try { + OperationResult opRes = trx->remove(collectionName, VPackSlice(ptr), options); + if (opRes.code == TRI_ERROR_ARANGO_DOCUMENT_NOT_FOUND) { + // document to delete is not present. this error can be ignored + return TRI_ERROR_NO_ERROR; + } + return opRes.code; + } catch (arangodb::basics::Exception const& ex) { + if (ex.code() == TRI_ERROR_ARANGO_DOCUMENT_NOT_FOUND) { + // document to delete is not present. this error can be ignored + return TRI_ERROR_NO_ERROR; + } + return ex.code(); + } + // should not get here... + return TRI_ERROR_INTERNAL; }); if (res != TRI_ERROR_NO_ERROR && res != TRI_ERROR_ARANGO_CONFLICT && res != TRI_ERROR_ARANGO_DATABASE_NOT_FOUND && - res != TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND) { + res != TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND && + res != TRI_ERROR_ARANGO_DOCUMENT_NOT_FOUND) { LOG(WARN) << "unable to remove document in collection " << collectionId << " of database " << databaseId << ": " << TRI_errno_string(res); ++state->errorCount; return state->canContinue(); diff --git a/js/client/modules/@arangodb/testing.js b/js/client/modules/@arangodb/testing.js index c02841068c..a32d13d036 100644 --- a/js/client/modules/@arangodb/testing.js +++ b/js/client/modules/@arangodb/testing.js @@ -3062,6 +3062,7 @@ function runArangodRecovery (instanceInfo, options, script, setup) { } const recoveryTests = [ + 'die-during-collector', 'disk-full-logfile', 'disk-full-logfile-data', 'disk-full-datafile', diff --git a/js/server/tests/recovery/die-during-collector.js b/js/server/tests/recovery/die-during-collector.js new file mode 100644 index 0000000000..d4566f5326 --- /dev/null +++ b/js/server/tests/recovery/die-during-collector.js @@ -0,0 +1,116 @@ +/* jshint globalstrict:false, strict:false, unused : false */ +/* global assertEqual, assertFalse */ +// ////////////////////////////////////////////////////////////////////////////// +// / @brief tests for transactions +// / +// / @file +// / +// / DISCLAIMER +// / +// / Copyright 2010-2012 triagens GmbH, Cologne, Germany +// / +// / Licensed under the Apache License, Version 2.0 (the "License") +// / you may not use this file except in compliance with the License. +// / You may obtain a copy of the License at +// / +// / http://www.apache.org/licenses/LICENSE-2.0 +// / +// / Unless required by applicable law or agreed to in writing, software +// / distributed under the License is distributed on an "AS IS" BASIS, +// / WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// / See the License for the specific language governing permissions and +// / limitations under the License. +// / +// / Copyright holder is triAGENS GmbH, Cologne, Germany +// / +// / @author Jan Steemann +// / @author Copyright 2013, triAGENS GmbH, Cologne, Germany +// ////////////////////////////////////////////////////////////////////////////// + +var db = require('@arangodb').db; +var internal = require('internal'); +var jsunity = require('jsunity'); + +function runSetup () { + 'use strict'; + internal.debugClearFailAt(); + + db._drop('UnitTestsRecovery'); + var c = db._create('UnitTestsRecovery'), i; + + for (i = 0; i < 1000; ++i) { + c.save({ _key: 'test' + i }); + } + + internal.wal.flush(true, true, true); + + for (i = 0; i < 1000; ++i) { + c.remove('test' + i); + } + internal.debugSetFailAt('setCollectionDone'); + internal.wal.flush(true, false, false); + + // wait until datafile appears + while (true) { + if (c.figures().dead.deletion === 1000) { + break; + } + internal.wait(0.5, false); + } + + c.rotate(); + + // wait until compactor has finished + while (true) { + if (c.figures().datafiles.count === 0) { + break; + } + internal.wait(0.5, false); + } + + internal.debugSegfault('crashing server'); +} + +// ////////////////////////////////////////////////////////////////////////////// +// / @brief test suite +// ////////////////////////////////////////////////////////////////////////////// + +function recoverySuite () { + 'use strict'; + jsunity.jsUnity.attachAssertions(); + + return { + setUp: function () {}, + tearDown: function () {}, + + // ////////////////////////////////////////////////////////////////////////////// + // / @brief test whether we can restore the trx data + // ////////////////////////////////////////////////////////////////////////////// + + testDieDuringCollector: function () { + var c = db._collection('UnitTestsRecovery'); + + assertEqual(0, c.count()); + + for (var i = 0; i < 1000; ++i) { + assertFalse(c.exists('test' + i)); + } + } + + }; +} + +// ////////////////////////////////////////////////////////////////////////////// +// / @brief executes the test suite +// ////////////////////////////////////////////////////////////////////////////// + +function main (argv) { + 'use strict'; + if (argv[1] === 'setup') { + runSetup(); + return 0; + } else { + jsunity.run(recoverySuite); + return jsunity.done().status ? 0 : 1; + } +}