From ff494cc8977b8eac0e07d3d30b3292e69e1b14a5 Mon Sep 17 00:00:00 2001 From: Jan Steemann Date: Mon, 16 Jun 2014 16:52:26 +0200 Subject: [PATCH] added recovery tests (not functional yet) --- UnitTests/Makefile.unittests | 40 ++++++++++ arangod/Wal/CollectorThread.cpp | 17 ++++ arangod/Wal/LogfileManager.cpp | 5 ++ arangod/Wal/Slot.cpp | 7 +- .../tests/recovery/corrupt-wal-marker.js | 77 +++++++++++++++++++ lib/BasicsC/debugging.c | 5 ++ 6 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 js/server/tests/recovery/corrupt-wal-marker.js diff --git a/UnitTests/Makefile.unittests b/UnitTests/Makefile.unittests index 93a91c4d00..51cb9d3456 100755 --- a/UnitTests/Makefile.unittests +++ b/UnitTests/Makefile.unittests @@ -29,6 +29,7 @@ unittests: all unittests-verbose unittests-brief unittests-brief: \ jslint \ unittests-config \ + unittests-recovery \ unittests-boost \ unittests-shell-server \ unittests-shell-server-ahuacatl \ @@ -184,6 +185,45 @@ start-slave: curl-request-get: @declare -x ACTUAL=$$(curl --dump - -X GET "$(CURL_PROTO)://$(CURL_HOST):$(CURL_PORT)$(CURL_URL)" 2> /dev/null | head -n 1 | tr "\r" " " | sed -e 's/^HTTP\/1\.1 \([0-9]*\).*$$/\1/g'); if [ "x$$ACTUAL" != "x$(EXPECTED)" ]; then echo "request to $(CURL_PROTO)://$(CURL_HOST):$(CURL_PORT)$(CURL_URL) returned $$ACTUAL, expected: $(EXPECTED)"; false; else true; fi +################################################################################ +### @brief test recovery +################################################################################ + +.PHONY: unittests-recovery +.PHONY: execute-recovery-test + +if ENABLE_MAINTAINER_MODE + +RECOVERY_TESTS = \ + @top_srcdir@/js/server/tests/recovery/corrupt-wal-marker.js + +execute-recovery-test: + $(VALGRIND) @builddir@/bin/arangod "$(VOCDIR)" $(SERVER_OPT) --server.endpoint tcp://$(VOCHOST):$(VOCPORT) --server.disable-authentication true --javascript.script "@top_srcdir@/js/server/tests/recovery/$(RECOVERY_SCRIPT).js" --javascript.script-parameter setup || true # the server will crash with segfault intentionally in this test + $(VALGRIND) @builddir@/bin/arangod "$(VOCDIR)" $(SERVER_OPT) --server.endpoint tcp://$(VOCHOST):$(VOCPORT) --server.disable-authentication true --javascript.script "@top_srcdir@/js/server/tests/recovery/$(RECOVERY_SCRIPT).js" --javascript.script-parameter recover || test "x$(FORCE)" == "x1" + +unittests-recovery: + @echo + @echo "================================================================================" + @echo "<< RECOVERY TESTS >>" + @echo "================================================================================" + @echo + + @rm -rf "$(VOCDIR)" + @mkdir -p "$(VOCDIR)/databases" + + $(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="corrupt-wal-marker" + + @rm -rf "$(VOCDIR)" + @echo + +else + +unittests-recovery: + +execute-recovery-test: + +endif + ################################################################################ ### @brief test configuration files ################################################################################ diff --git a/arangod/Wal/CollectorThread.cpp b/arangod/Wal/CollectorThread.cpp index 769c30a3f4..6b8aabc567 100644 --- a/arangod/Wal/CollectorThread.cpp +++ b/arangod/Wal/CollectorThread.cpp @@ -792,6 +792,11 @@ int CollectorThread::executeTransferMarkers (TRI_document_collection_t* document CollectorCache* cache, OperationsType const& operations) { +#ifdef TRI_ENABLE_MAINTAINER_MODE + // used only for crash / recovery tests + int numMarkers = 0; +#endif + TRI_voc_tick_t const minTransferTick = document->_tickMax; for (auto it2 = operations.begin(); it2 != operations.end(); ++it2) { @@ -801,6 +806,13 @@ int CollectorThread::executeTransferMarkers (TRI_document_collection_t* document // we have already transferred this marker in a previous run, nothing to do continue; } + + TRI_DEBUG_INTENTIONAL_FAIL_IF("CollectorThreadTransfer") { + if (++numMarkers > 5) { + // intentionally kill the server + TRI_SegfaultDebugging("CollectorThreadTransfer"); + } + } char const* base = reinterpret_cast(source); @@ -976,6 +988,11 @@ int CollectorThread::executeTransferMarkers (TRI_document_collection_t* document } } } + + TRI_DEBUG_INTENTIONAL_FAIL_IF("CollectorThreadTransferFinal") { + // intentionally kill the server + TRI_SegfaultDebugging("CollectorThreadTransferFinal"); + } return TRI_ERROR_NO_ERROR; } diff --git a/arangod/Wal/LogfileManager.cpp b/arangod/Wal/LogfileManager.cpp index 5adac06355..18441e0345 100644 --- a/arangod/Wal/LogfileManager.cpp +++ b/arangod/Wal/LogfileManager.cpp @@ -477,6 +477,11 @@ void LogfileManager::stop () { // close all open logfiles LOG_TRACE("closing logfiles"); closeLogfiles(); + + TRI_DEBUG_INTENTIONAL_FAIL_IF("LogfileManagerStop") { + // intentionally kill the server + TRI_SegfaultDebugging("LogfileManagerStop"); + } int res = writeShutdownInfo(true); diff --git a/arangod/Wal/Slot.cpp b/arangod/Wal/Slot.cpp index 7e8e0b1d29..cd2956014a 100644 --- a/arangod/Wal/Slot.cpp +++ b/arangod/Wal/Slot.cpp @@ -94,10 +94,15 @@ void Slot::fill (void* src, marker->_size = static_cast(size); // calculate the crc - marker->_crc = 0; + marker->_crc = 0; TRI_voc_crc_t crc = TRI_InitialCrc32(); crc = TRI_BlockCrc32(crc, (char const*) marker, static_cast(size)); marker->_crc = TRI_FinalCrc32(crc); + + TRI_DEBUG_INTENTIONAL_FAIL_IF("WalSlotCrc") { + // intentionally corrupt the marker + marker->_crc = 0xdeadbeef; + } // copy data into marker memcpy(_mem, src, size); diff --git a/js/server/tests/recovery/corrupt-wal-marker.js b/js/server/tests/recovery/corrupt-wal-marker.js new file mode 100644 index 0000000000..aaa4cc499b --- /dev/null +++ b/js/server/tests/recovery/corrupt-wal-marker.js @@ -0,0 +1,77 @@ + +var db = require("org/arangodb").db; +var internal = require("internal"); +var jsunity = require("jsunity"); + + +function runSetup () { + internal.debugClearFailAt(); + + var c = [ ], i, j; + for (i = 0; i < 10; ++i) { + c[i] = db._create("UnitTestsRecovery" + i); + + for (j = 0; j < 49; ++j) { + c[i].save({ a: j, b: "test" + j }); + } + + c[i].save({ a: 49, b: "test49" }, true); // sync + } + + internal.debugSetFailAt("WalSlotCrc"); + + // now corrupt all the collections + for (i = 0; i < 10; ++i) { + c[i].save({ a: 49, b: "test49" }); + } + + internal.debugSegfault("crashing server"); +} + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test suite +//////////////////////////////////////////////////////////////////////////////// + +function recoverySuite () { + jsunity.jsUnity.attachAssertions(); + + return { + setUp: function () { + }, + tearDown: function () { + }, + +//////////////////////////////////////////////////////////////////////////////// +/// @brief test whether we can restore the 10 collections +//////////////////////////////////////////////////////////////////////////////// + + testRecovery : function () { + var i, j, c; + for (i = 0; i < 10; ++i) { + c = db._collection("UnitTestsRecovery" + i); + + assertEqual(50, c.count()); + for (j = 0; j < 50; ++j) { + assertEqual(j, c.document("test" + j).a); + } + } + } + + }; +} + +//////////////////////////////////////////////////////////////////////////////// +/// @brief executes the test suite +//////////////////////////////////////////////////////////////////////////////// + +function main (argv) { + if (argv[1] === "setup") { + runSetup(); + return 0; + } + else { + jsunity.run(recoverySuite); + return jsunity.done(); + } +} + diff --git a/lib/BasicsC/debugging.c b/lib/BasicsC/debugging.c index e6fc86dd0e..1b06675e82 100644 --- a/lib/BasicsC/debugging.c +++ b/lib/BasicsC/debugging.c @@ -112,6 +112,11 @@ void TRI_SegfaultDebugging (char const* message) { bool TRI_ShouldFailDebugging (char const* value) { char* found; char* checkValue; + + if (FailurePoints == NULL) { + // try without the lock first + return false; + } checkValue = MakeValue(value);