1
0
Fork 0

create fat datafiles

This prevents SIGBUS when a sparse datafile is accessed and the disk is full. In
this case the mmapped region is not necessarily backed by physical memory, and
accessing the memory may crash the program
This commit is contained in:
Jan Steemann 2015-09-02 10:18:24 +02:00
parent 9c713ce181
commit a80d6635ab
22 changed files with 987 additions and 224 deletions

View File

@ -214,7 +214,7 @@ if ENABLE_FAILURE_TESTS
execute-recovery-test:
@rm -rf "$(VOCDIR)"
@mkdir -p "$(VOCDIR)/databases"
@builddir@/bin/arangod "$(VOCDIR)" --no-server $(SERVER_OPT) --server.threads 1 --wal.reserve-logfiles 1 --javascript.script "@top_srcdir@/js/server/tests/recovery/$(RECOVERY_SCRIPT).js" --javascript.script-parameter setup || true # the server will crash with segfault intentionally in this test
@builddir@/bin/arangod "$(VOCDIR)" --no-server $(SERVER_OPT) --server.threads 1 --wal.reserve-logfiles 1 --javascript.script "@top_srcdir@/js/server/tests/recovery/$(RECOVERY_SCRIPT).js" --javascript.script-parameter setup --log.level warning || true # the server will crash with segfault intentionally in this test
@rm -f core
$(VALGRIND) @builddir@/bin/arangod --no-server "$(VOCDIR)" $(SERVER_OPT) --server.threads 1 --wal.ignore-logfile-errors true --wal.reserve-logfiles 1 --javascript.script "@top_srcdir@/js/server/tests/recovery/$(RECOVERY_SCRIPT).js" --javascript.script-parameter recover || test "x$(FORCE)" == "x1"
@ -225,6 +225,10 @@ unittests-recovery:
@echo "================================================================================"
@echo
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="disk-full-logfile"
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="disk-full-logfile-data"
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="disk-full-datafile"
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="disk-full-datafile"
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="create-with-temp"
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="create-collection-fail"
$(MAKE) execute-recovery-test PID=$(PID) RECOVERY_SCRIPT="create-database-fail"
@ -500,8 +504,9 @@ SHELL_COMMON = \
SHELL_SERVER_ONLY = \
@top_srcdir@/js/server/tests/shell-readonly-noncluster-disabled.js \
@top_srcdir@/js/server/tests/shell-collection-not-loaded-timecritical-noncluster.js \
@top_srcdir@/js/server/tests/shell-wal-noncluster-memoryintense.js \
@top_srcdir@/js/server/tests/shell-datafile-timecritical-noncluster.js \
@top_srcdir@/js/server/tests/shell-collection-not-loaded-timecritical-noncluster.js \
@top_srcdir@/js/server/tests/shell-sharding-helpers.js \
@top_srcdir@/js/server/tests/shell-compaction-noncluster-timecritical.js \
@top_srcdir@/js/server/tests/shell-shaped-noncluster.js \

View File

@ -32,18 +32,16 @@
#endif
#include "datafile.h"
#include "Basics/conversions.h"
#include "Basics/files.h"
#include "Basics/hashes.h"
#include "Basics/logging.h"
#include "Basics/memory-map.h"
#include "Basics/tri-strings.h"
#include "Basics/files.h"
#include "VocBase/server.h"
// #define DEBUG_DATAFILE 1
// -----------------------------------------------------------------------------
// --SECTION-- private functions
// -----------------------------------------------------------------------------
@ -228,33 +226,78 @@ static bool CheckCrcMarker (TRI_df_marker_t const* marker,
}
////////////////////////////////////////////////////////////////////////////////
/// @brief creates a new sparse datafile
/// @brief creates a new datafile
///
/// returns the file descriptor or -1 if the file cannot be created
////////////////////////////////////////////////////////////////////////////////
static int CreateSparseFile (char const* filename,
const TRI_voc_size_t maximalSize) {
static int CreateDatafile (char const* filename,
TRI_voc_size_t maximalSize) {
TRI_ERRORBUF;
TRI_lseek_t offset;
char zero;
ssize_t res;
int fd;
// open the file
fd = TRI_CREATE(filename, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
int fd = TRI_CREATE(filename, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
TRI_IF_FAILURE("CreateDatafile1") {
// intentionally fail
TRI_CLOSE(fd);
fd = -1;
errno = ENOSPC;
}
if (fd < 0) {
TRI_SYSTEM_ERROR();
if (errno == ENOSPC) {
TRI_set_errno(TRI_ERROR_ARANGO_FILESYSTEM_FULL);
LOG_ERROR("cannot create datafile '%s': %s", filename, TRI_last_error());
}
else {
TRI_SYSTEM_ERROR();
TRI_set_errno(TRI_ERROR_SYS_ERROR);
LOG_ERROR("cannot create datafile '%s': %s", filename, TRI_GET_ERRORBUF);
TRI_set_errno(TRI_ERROR_SYS_ERROR);
LOG_ERROR("cannot create datafile '%s': %s", filename, TRI_GET_ERRORBUF);
}
return -1;
}
// create sparse file
offset = TRI_LSEEK(fd, (TRI_lseek_t) (maximalSize - 1), SEEK_SET);
// fill file with zeros from FileNullBuffer
size_t written = 0;
while (written < maximalSize) {
size_t writeSize = TRI_GetNullBufferSizeFiles();
if (writeSize +written > maximalSize) {
writeSize = maximalSize - written;
}
ssize_t writeResult = TRI_WRITE(fd, TRI_GetNullBufferFiles(), writeSize);
TRI_IF_FAILURE("CreateDatafile2") {
// intentionally fail
writeResult = -1;
errno = ENOSPC;
}
if (writeResult < 0) {
if (errno == ENOSPC) {
TRI_set_errno(TRI_ERROR_ARANGO_FILESYSTEM_FULL);
LOG_ERROR("cannot create datafile '%s': %s", filename, TRI_last_error());
}
else {
TRI_SYSTEM_ERROR();
TRI_set_errno(TRI_ERROR_SYS_ERROR);
LOG_ERROR("cannot create datafile '%s': %s", filename, TRI_GET_ERRORBUF);
}
TRI_CLOSE(fd);
TRI_UnlinkFile(filename);
return -1;
}
written += static_cast<size_t>(writeResult);
}
// go back to offset 0
TRI_lseek_t offset = TRI_LSEEK(fd, (TRI_lseek_t) 0, SEEK_SET);
if (offset == (TRI_lseek_t) -1) {
TRI_SYSTEM_ERROR();
@ -268,21 +311,6 @@ static int CreateSparseFile (char const* filename,
return -1;
}
zero = 0;
res = TRI_WRITE(fd, &zero, 1);
if (res < 0) {
TRI_SYSTEM_ERROR();
TRI_set_errno(TRI_ERROR_SYS_ERROR);
TRI_CLOSE(fd);
// remove empty file
TRI_UnlinkFile(filename);
LOG_ERROR("cannot create sparse datafile '%s': '%s'", filename, TRI_GET_ERRORBUF);
return -1;
}
return fd;
}
@ -355,8 +383,6 @@ static int TruncateAndSealDatafile (TRI_datafile_t* datafile,
TRI_voc_size_t vocSize) {
TRI_ERRORBUF;
char* oldname;
char zero;
int res;
void* data;
void* mmHandle;
@ -368,7 +394,7 @@ static int TruncateAndSealDatafile (TRI_datafile_t* datafile,
// sanity check
if (sizeof(TRI_df_header_marker_t) + sizeof(TRI_df_footer_marker_t) > maximalSize) {
LOG_ERROR("cannot create datafile '%s', maximal size '%u' is too small", datafile->getName(datafile), (unsigned int) maximalSize);
LOG_ERROR("cannot create datafile '%s', maximal size %u is too small", datafile->getName(datafile), (unsigned int) maximalSize);
return TRI_set_errno(TRI_ERROR_ARANGO_MAXIMAL_SIZE_TOO_SMALL);
}
@ -379,13 +405,13 @@ static int TruncateAndSealDatafile (TRI_datafile_t* datafile,
if (fd < 0) {
TRI_SYSTEM_ERROR();
LOG_ERROR("cannot create new datafile '%s': '%s'", filename, TRI_GET_ERRORBUF);
LOG_ERROR("cannot create new datafile '%s': %s", filename, TRI_GET_ERRORBUF);
TRI_FreeString(TRI_CORE_MEM_ZONE, filename);
return TRI_set_errno(TRI_ERROR_SYS_ERROR);
}
// create sparse file
// go back to the beginning of the file
TRI_lseek_t offset = TRI_LSEEK(fd, (TRI_lseek_t) (maximalSize - 1), SEEK_SET);
if (offset == (TRI_lseek_t) -1) {
@ -396,14 +422,14 @@ static int TruncateAndSealDatafile (TRI_datafile_t* datafile,
// remove empty file
TRI_UnlinkFile(filename);
LOG_ERROR("cannot seek in new datafile '%s': '%s'", filename, TRI_GET_ERRORBUF);
LOG_ERROR("cannot seek in new datafile '%s': %s", filename, TRI_GET_ERRORBUF);
TRI_FreeString(TRI_CORE_MEM_ZONE, filename);
return TRI_ERROR_SYS_ERROR;
}
zero = 0;
res = TRI_WRITE(fd, &zero, 1);
char zero = 0;
int res = TRI_WRITE(fd, &zero, 1);
if (res < 0) {
TRI_SYSTEM_ERROR();
@ -413,7 +439,7 @@ static int TruncateAndSealDatafile (TRI_datafile_t* datafile,
// remove empty file
TRI_UnlinkFile(filename);
LOG_ERROR("cannot create sparse datafile '%s': '%s'", filename, TRI_GET_ERRORBUF);
LOG_ERROR("cannot create datafile '%s': %s", filename, TRI_GET_ERRORBUF);
TRI_FreeString(TRI_CORE_MEM_ZONE, filename);
return TRI_ERROR_SYS_ERROR;
@ -430,7 +456,7 @@ static int TruncateAndSealDatafile (TRI_datafile_t* datafile,
// remove empty file
TRI_UnlinkFile(filename);
LOG_ERROR("cannot memory map file '%s': '%s'", filename, TRI_GET_ERRORBUF);
LOG_ERROR("cannot memory map file '%s': %s", filename, TRI_GET_ERRORBUF);
TRI_FreeString(TRI_CORE_MEM_ZONE, filename);
return TRI_errno();
@ -1250,7 +1276,6 @@ TRI_datafile_t* TRI_CreateDatafile (char const* filename,
return nullptr;
}
datafile->_state = TRI_DF_STATE_WRITE;
if (withInitialMarkers) {
@ -1358,12 +1383,9 @@ TRI_datafile_t* TRI_CreateAnonymousDatafile (TRI_voc_fid_t fid,
TRI_datafile_t* TRI_CreatePhysicalDatafile (char const* filename,
TRI_voc_fid_t fid,
TRI_voc_size_t maximalSize) {
void* data;
void* mmHandle;
TRI_ASSERT(filename != nullptr);
int fd = CreateSparseFile(filename, maximalSize);
int fd = CreateDatafile(filename, maximalSize);
if (fd < 0) {
// an error occurred
@ -1371,7 +1393,14 @@ TRI_datafile_t* TRI_CreatePhysicalDatafile (char const* filename,
}
// memory map the data
ssize_t res = TRI_MMFile(0, maximalSize, PROT_WRITE | PROT_READ, MAP_SHARED, fd, &mmHandle, 0, &data);
void* data;
void* mmHandle;
int flags = MAP_SHARED;
#ifdef __linux__
// try populating the mapping already
flags |= MAP_POPULATE;
#endif
ssize_t res = TRI_MMFile(0, maximalSize, PROT_WRITE | PROT_READ, flags, fd, &mmHandle, 0, &data);
if (res != TRI_ERROR_NO_ERROR) {
TRI_set_errno(res);
@ -1620,7 +1649,7 @@ int TRI_ReserveElementDatafile (TRI_datafile_t* datafile,
LOG_TRACE("cannot write marker, not enough space");
return datafile->_lastError;
return TRI_ERROR_ARANGO_DATAFILE_FULL;
}
*position = (TRI_df_marker_t*) datafile->_next;

View File

@ -2426,9 +2426,9 @@ TRI_datafile_t* TRI_CreateDatafileDocumentCollection (TRI_document_collection_t*
TRI_IF_FAILURE("CreateJournalDocumentCollection") {
// simulate disk full
TRI_FreeString(TRI_CORE_MEM_ZONE, filename);
document->_lastError = TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY_MMAP);
document->_lastError = TRI_set_errno(TRI_ERROR_ARANGO_FILESYSTEM_FULL);
EnsureErrorCode(ENOSPC);
EnsureErrorCode(TRI_ERROR_ARANGO_FILESYSTEM_FULL);
return nullptr;
}

View File

@ -60,7 +60,9 @@ AllocatorThread::AllocatorThread (LogfileManager* logfileManager)
_recoveryLock(),
_requestedSize(0),
_stop(0),
_inRecovery(true) {
_inRecovery(true),
_allocatorResultCondition(),
_allocatorResult(TRI_ERROR_NO_ERROR) {
allowAsynchronousCancelation();
}
@ -76,6 +78,22 @@ AllocatorThread::~AllocatorThread () {
// --SECTION-- public methods
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief wait for the collector result
////////////////////////////////////////////////////////////////////////////////
int AllocatorThread::waitForResult (uint64_t timeout) {
CONDITION_LOCKER(guard, _allocatorResultCondition);
if (_allocatorResult == TRI_ERROR_NO_ERROR) {
if (guard.wait(timeout)) {
return TRI_ERROR_LOCK_TIMEOUT;
}
}
return _allocatorResult;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief stops the allocator thread
////////////////////////////////////////////////////////////////////////////////
@ -109,14 +127,16 @@ void AllocatorThread::signal (uint32_t markerSize) {
guard.signal();
}
// -----------------------------------------------------------------------------
// --SECTION-- private methods
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief creates a new reserve logfile
////////////////////////////////////////////////////////////////////////////////
bool AllocatorThread::createReserveLogfile (uint32_t size) {
int res = _logfileManager->createReserveLogfile(size);
return (res == TRI_ERROR_NO_ERROR);
int AllocatorThread::createReserveLogfile (uint32_t size) {
return _logfileManager->createReserveLogfile(size);
}
// -----------------------------------------------------------------------------
@ -130,43 +150,59 @@ bool AllocatorThread::createReserveLogfile (uint32_t size) {
void AllocatorThread::run () {
while (_stop == 0) {
uint32_t requestedSize = 0;
{
CONDITION_LOCKER(guard, _condition);
requestedSize = _requestedSize;
_requestedSize = 0;
}
int res = TRI_ERROR_NO_ERROR;
try {
if (requestedSize == 0 &&
! inRecovery() &&
! _logfileManager->hasReserveLogfiles()) {
// only create reserve files if we are not in the recovery mode
if (createReserveLogfile(0)) {
res = createReserveLogfile(0);
if (res == TRI_ERROR_NO_ERROR) {
continue;
}
LOG_ERROR("unable to create new WAL reserve logfile");
LOG_ERROR("unable to create new WAL reserve logfile for sized marker: %s",
TRI_errno_string(res));
}
else if (requestedSize > 0 &&
_logfileManager->logfileCreationAllowed(requestedSize)) {
if (createReserveLogfile(requestedSize)) {
_logfileManager->logfileCreationAllowed(requestedSize)) {
res = createReserveLogfile(requestedSize);
if (res == TRI_ERROR_NO_ERROR) {
continue;
}
LOG_ERROR("unable to create new WAL reserve logfile");
LOG_ERROR("unable to create new WAL reserve logfile: %s",
TRI_errno_string(res));
}
}
catch (triagens::basics::Exception const& ex) {
int res = ex.code();
res = ex.code();
LOG_ERROR("got unexpected error in allocatorThread: %s", TRI_errno_string(res));
}
catch (...) {
LOG_ERROR("got unspecific error in allocatorThread");
}
// reset allocator status
{
CONDITION_LOCKER(guard, _allocatorResultCondition);
_allocatorResult = res;
}
{
CONDITION_LOCKER(guard, _condition);
guard.wait(Interval);
}
}

View File

@ -93,10 +93,10 @@ namespace triagens {
void signal (uint32_t);
////////////////////////////////////////////////////////////////////////////////
/// @brief creates a new reserve logfile
/// @brief wait for allocator result
////////////////////////////////////////////////////////////////////////////////
bool createReserveLogfile (uint32_t);
int waitForResult (uint64_t);
////////////////////////////////////////////////////////////////////////////////
/// @brief tell the thread that the recovery phase is over
@ -134,6 +134,12 @@ namespace triagens {
private:
////////////////////////////////////////////////////////////////////////////////
/// @brief creates a new reserve logfile
////////////////////////////////////////////////////////////////////////////////
int createReserveLogfile (uint32_t);
// -----------------------------------------------------------------------------
// --SECTION-- private variables
// -----------------------------------------------------------------------------
@ -176,6 +182,18 @@ namespace triagens {
bool _inRecovery;
////////////////////////////////////////////////////////////////////////////////
/// @brief condition variable for the allocator thread
////////////////////////////////////////////////////////////////////////////////
basics::ConditionVariable _allocatorResultCondition;
////////////////////////////////////////////////////////////////////////////////
/// @brief allocation result
////////////////////////////////////////////////////////////////////////////////
int _allocatorResult;
////////////////////////////////////////////////////////////////////////////////
/// @brief wait interval for the allocator thread when idle
////////////////////////////////////////////////////////////////////////////////

View File

@ -292,7 +292,9 @@ CollectorThread::CollectorThread (LogfileManager* logfileManager,
_operationsQueue(),
_operationsQueueInUse(false),
_stop(0),
_numPendingOperations(0) {
_numPendingOperations(0),
_collectorResultCondition(),
_collectorResult(TRI_ERROR_NO_ERROR) {
allowAsynchronousCancelation();
}
@ -308,6 +310,22 @@ CollectorThread::~CollectorThread () {
// --SECTION-- public methods
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief wait for the collector result
////////////////////////////////////////////////////////////////////////////////
int CollectorThread::waitForResult (uint64_t timeout) {
CONDITION_LOCKER(guard, _collectorResultCondition);
if (_collectorResult == TRI_ERROR_NO_ERROR) {
if (guard.wait(timeout)) {
return TRI_ERROR_LOCK_TIMEOUT;
}
}
return _collectorResult;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief stops the collector thread
////////////////////////////////////////////////////////////////////////////////
@ -347,18 +365,35 @@ void CollectorThread::run () {
while (true) {
int stop = (int) _stop;
bool worked = false;
bool hasWorked = false;
bool doDelay = false;
try {
// step 1: collect a logfile if any qualifies
if (stop == 0) {
if (stop == 0) {
// don't collect additional logfiles in case we want to shut down
worked |= this->collectLogfiles();
bool worked;
int res = this->collectLogfiles(worked);
if (res == TRI_ERROR_NO_ERROR) {
hasWorked |= worked;
}
else if (res == TRI_ERROR_ARANGO_FILESYSTEM_FULL) {
doDelay = true;
}
}
// step 2: update master pointers
try {
worked |= this->processQueuedOperations();
bool worked;
int res = this->processQueuedOperations(worked);
if (res == TRI_ERROR_NO_ERROR) {
hasWorked |= worked;
}
else if (res == TRI_ERROR_ARANGO_FILESYSTEM_FULL) {
doDelay = true;
}
}
catch (...) {
// re-activate the queue
@ -375,11 +410,19 @@ void CollectorThread::run () {
LOG_ERROR("got unspecific error in collectorThread::run");
}
if (stop == 0 && ! worked) {
uint64_t interval = Interval;
if (doDelay) {
hasWorked = false;
// wait longer before retrying in case disk is full
interval *= 2;
}
if (stop == 0 && ! hasWorked) {
// sleep only if there was nothing to do
CONDITION_LOCKER(guard, _condition);
if (! guard.wait(Interval)) {
if (! guard.wait(interval)) {
if (++counter > 10) {
LOG_TRACE("wal collector has queued operations: %d", (int) numQueuedOperations());
counter = 0;
@ -408,38 +451,68 @@ void CollectorThread::run () {
/// @brief step 1: perform collection of a logfile (if any)
////////////////////////////////////////////////////////////////////////////////
bool CollectorThread::collectLogfiles () {
int CollectorThread::collectLogfiles (bool& worked) {
// always init result variable
worked = false;
TRI_IF_FAILURE("CollectorThreadCollect") {
return false;
return TRI_ERROR_NO_ERROR;
}
Logfile* logfile = _logfileManager->getCollectableLogfile();
if (logfile == nullptr) {
return false;
return TRI_ERROR_NO_ERROR;
}
worked = true;
_logfileManager->setCollectionRequested(logfile);
int res = collect(logfile);
try {
int res = collect(logfile);
// LOG_TRACE("collected logfile: %llu. result: %d", (unsigned long long) logfile->id(), res);
if (res == TRI_ERROR_NO_ERROR) {
_logfileManager->setCollectionDone(logfile);
return true;
if (res == TRI_ERROR_NO_ERROR) {
// reset collector status
{
CONDITION_LOCKER(guard, _collectorResultCondition);
_collectorResult = TRI_ERROR_NO_ERROR;
}
_logfileManager->setCollectionDone(logfile);
}
else {
// return the logfile to the logfile manager in case of errors
_logfileManager->forceStatus(logfile, Logfile::StatusType::SEALED);
// set error in collector
{
CONDITION_LOCKER(guard, _collectorResultCondition);
_collectorResult = res;
_collectorResultCondition.broadcast();
}
}
return res;
}
catch (...) {
LOG_DEBUG("collecting logfile %llu failed", (unsigned long long) logfile->id());
_logfileManager->setCollectionDone(logfile);
return TRI_ERROR_INTERNAL;
}
// return the logfile to the logfile manager in case of errors
_logfileManager->forceStatus(logfile, Logfile::StatusType::SEALED);
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief step 2: process all still-queued collection operations
////////////////////////////////////////////////////////////////////////////////
bool CollectorThread::processQueuedOperations () {
int CollectorThread::processQueuedOperations (bool& worked) {
// always init result variable
worked = false;
TRI_IF_FAILURE("CollectorThreadProcessQueuedOperations") {
return false;
return TRI_ERROR_NO_ERROR;
}
{
@ -448,7 +521,7 @@ bool CollectorThread::processQueuedOperations () {
if (_operationsQueue.empty()) {
// nothing to do
return false;
return TRI_ERROR_NO_ERROR;
}
// this flag indicates that no one else must write to the queue
@ -542,7 +615,9 @@ bool CollectorThread::processQueuedOperations () {
_operationsQueueInUse = false;
}
return true;
worked = true;
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
@ -800,7 +875,6 @@ int CollectorThread::collect (Logfile* logfile) {
if (state.documentOperations.find(cid) != state.documentOperations.end()) {
DocumentOperationsType const& ops = state.documentOperations[cid];
for (auto it2 = ops.begin(); it2 != ops.end(); ++it2) {
sortedOperations.push_back((*it2).second);
}
@ -827,7 +901,13 @@ int CollectorThread::collect (Logfile* logfile) {
if (res != TRI_ERROR_NO_ERROR &&
res != TRI_ERROR_ARANGO_DATABASE_NOT_FOUND &&
res != TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND) {
LOG_WARNING("got unexpected error in CollectorThread::collect: %s", TRI_errno_string(res));
if (res != TRI_ERROR_ARANGO_FILESYSTEM_FULL) {
// other places already log this error, and making the logging conditional here
// prevents the log message from being shown over and over again in case the
// file system is full
LOG_WARNING("got unexpected error in CollectorThread::collect: %s", TRI_errno_string(res));
}
// abort early
return res;
}

View File

@ -242,6 +242,12 @@ namespace triagens {
public:
////////////////////////////////////////////////////////////////////////////////
/// @brief wait for the collector result
////////////////////////////////////////////////////////////////////////////////
int waitForResult (uint64_t);
////////////////////////////////////////////////////////////////////////////////
/// @brief stops the collector thread
////////////////////////////////////////////////////////////////////////////////
@ -288,13 +294,13 @@ namespace triagens {
/// @brief step 1: perform collection of a logfile (if any)
////////////////////////////////////////////////////////////////////////////////
bool collectLogfiles ();
int collectLogfiles (bool&);
////////////////////////////////////////////////////////////////////////////////
/// @brief step 2: process all still-queued collection operations
////////////////////////////////////////////////////////////////////////////////
bool processQueuedOperations ();
int processQueuedOperations (bool&);
////////////////////////////////////////////////////////////////////////////////
/// @brief process all operations for a single collection
@ -428,6 +434,18 @@ namespace triagens {
uint64_t _numPendingOperations;
////////////////////////////////////////////////////////////////////////////////
/// @brief condition variable for the collector thread result
////////////////////////////////////////////////////////////////////////////////
basics::ConditionVariable _collectorResultCondition;
////////////////////////////////////////////////////////////////////////////////
/// @brief last collector result
////////////////////////////////////////////////////////////////////////////////
int _collectorResult;
////////////////////////////////////////////////////////////////////////////////
/// @brief wait interval for the collector thread when idle
////////////////////////////////////////////////////////////////////////////////

View File

@ -86,8 +86,7 @@ Logfile* Logfile::createNew (std::string const& filename,
}
}
Logfile* logfile = new Logfile(id, df, StatusType::EMPTY);
return logfile;
return new Logfile(id, df, StatusType::EMPTY);
}
////////////////////////////////////////////////////////////////////////////////

View File

@ -971,24 +971,26 @@ int LogfileManager::flush (bool waitForSync,
if (res == TRI_ERROR_NO_ERROR) {
// we need to wait for the collector...
this->waitForCollector(lastOpenLogfileId, maxWaitTime);
// LOG_TRACE("entering waitForCollector with lastOpenLogfileId %llu", (unsigned long long) lastOpenLogfileId);
res = this->waitForCollector(lastOpenLogfileId, maxWaitTime);
}
else if (res == TRI_ERROR_ARANGO_DATAFILE_EMPTY) {
// current logfile is empty and cannot be collected
// we need to wait for the collector to collect the previously sealed datafile
if (lastSealedLogfileId > 0) {
this->waitForCollector(lastSealedLogfileId, maxWaitTime);
res = this->waitForCollector(lastSealedLogfileId, maxWaitTime);
}
}
}
if (writeShutdownFile) {
if (writeShutdownFile &&
(res == TRI_ERROR_NO_ERROR || res == TRI_ERROR_ARANGO_DATAFILE_EMPTY)) {
// update the file with the last tick, last sealed etc.
return writeShutdownInfo(false);
}
return TRI_ERROR_NO_ERROR;
return res;
}
////////////////////////////////////////////////////////////////////////////////
@ -1283,16 +1285,20 @@ Logfile* LogfileManager::getLogfile (Logfile::IdType id,
/// @brief get a logfile for writing. this may return nullptr
////////////////////////////////////////////////////////////////////////////////
Logfile* LogfileManager::getWriteableLogfile (uint32_t size,
Logfile::StatusType& status) {
int LogfileManager::getWriteableLogfile (uint32_t size,
Logfile::StatusType& status,
Logfile*& result) {
static const uint64_t SleepTime = 10 * 1000;
static const uint64_t MaxIterations = 1500;
size_t iterations = 0;
bool haveSignalled = false;
// always initialize the result
result = nullptr;
TRI_IF_FAILURE("LogfileManagerGetWriteableLogfile") {
// intentionally don't return a logfile
return nullptr;
return TRI_ERROR_DEBUG;
}
while (++iterations < MaxIterations) {
@ -1309,12 +1315,15 @@ Logfile* LogfileManager::getWriteableLogfile (uint32_t size,
// found a logfile, update the status variable and return the logfile
{
// LOG_TRACE("setting lastOpenedId %llu", (unsigned long long) logfile->id());
MUTEX_LOCKER(_idLock);
_lastOpenedId = logfile->id();
}
result = logfile;
status = logfile->status();
return logfile;
return TRI_ERROR_NO_ERROR;
}
if (logfile->status() == Logfile::StatusType::EMPTY &&
@ -1339,12 +1348,22 @@ Logfile* LogfileManager::getWriteableLogfile (uint32_t size,
_allocatorThread->signal(size);
haveSignalled = true;
}
usleep(SleepTime);
int res = _allocatorThread->waitForResult(SleepTime);
if (res != TRI_ERROR_LOCK_TIMEOUT &&
res != TRI_ERROR_NO_ERROR) {
TRI_ASSERT(result == nullptr);
// some error occurred
return res;
}
}
TRI_ASSERT(result == nullptr);
LOG_WARNING("unable to acquire writeable WAL logfile after %llu ms", (unsigned long long) (MaxIterations * SleepTime) / 1000);
return nullptr;
return TRI_ERROR_LOCK_TIMEOUT;
}
////////////////////////////////////////////////////////////////////////////////
@ -1501,6 +1520,7 @@ void LogfileManager::setCollectionDone (Logfile* logfile) {
TRI_ASSERT(logfile != nullptr);
Logfile::IdType id = logfile->id();
// LOG_ERROR("setCollectionDone setting lastCollectedId to %llu", (unsigned long long) id);
{
WRITE_LOCKER(_logfilesLock);
logfile->setStatus(Logfile::StatusType::COLLECTED);
@ -1605,8 +1625,8 @@ void LogfileManager::removeLogfile (Logfile* logfile) {
/// @brief wait until a specific logfile has been collected
////////////////////////////////////////////////////////////////////////////////
void LogfileManager::waitForCollector (Logfile::IdType logfileId,
double maxWaitTime) {
int LogfileManager::waitForCollector (Logfile::IdType logfileId,
double maxWaitTime) {
static const int64_t SingleWaitPeriod = 50 * 1000;
int64_t maxIterations = INT64_MAX; // wait forever
@ -1616,19 +1636,32 @@ void LogfileManager::waitForCollector (Logfile::IdType logfileId,
LOG_TRACE("will wait for max. %f seconds for collector to finish", maxWaitTime);
}
LOG_TRACE("waiting for collector thread to collect logfile %llu", (unsigned long long) logfileId);
LOG_TRACE("waiting for collector thread to collect logfile %llu",
(unsigned long long) logfileId);
// wait for the collector thread to finish the collection
int64_t iterations = 0;
while (++iterations < maxIterations) {
if (_lastCollectedId >= logfileId) {
return;
return TRI_ERROR_NO_ERROR;
}
LOG_TRACE("waiting for collector");
usleep(SingleWaitPeriod);
int res = _collectorThread->waitForResult(SingleWaitPeriod);
// LOG_TRACE("still waiting for collector. logfileId: %llu lastCollected: %llu, result: %d", (unsigned long long) logfileId, (unsigned long long) _lastCollectedId, (int) res);
if (res != TRI_ERROR_LOCK_TIMEOUT &&
res != TRI_ERROR_NO_ERROR) {
// some error occurred
return res;
}
// try again
}
// waited for too long
return TRI_ERROR_LOCK_TIMEOUT;
}
////////////////////////////////////////////////////////////////////////////////
@ -1763,6 +1796,7 @@ int LogfileManager::writeShutdownInfo (bool writeShutdownTime) {
if (json == nullptr) {
LOG_ERROR("unable to write WAL state file '%s'", filename.c_str());
return TRI_ERROR_OUT_OF_MEMORY;
}
@ -1801,7 +1835,6 @@ int LogfileManager::writeShutdownInfo (bool writeShutdownTime) {
if (! ok) {
LOG_ERROR("unable to write WAL state file '%s'", filename.c_str());
return TRI_ERROR_CANNOT_WRITE_FILE;
}

View File

@ -606,8 +606,9 @@ namespace triagens {
/// @brief get a logfile for writing. this may return nullptr
////////////////////////////////////////////////////////////////////////////////
Logfile* getWriteableLogfile (uint32_t,
Logfile::StatusType&);
int getWriteableLogfile (uint32_t,
Logfile::StatusType&,
Logfile*&);
////////////////////////////////////////////////////////////////////////////////
/// @brief get a logfile to collect. this may return nullptr
@ -682,8 +683,8 @@ namespace triagens {
/// @brief wait for the collector thread to collect a specific logfile
////////////////////////////////////////////////////////////////////////////////
void waitForCollector (Logfile::IdType,
double);
int waitForCollector (Logfile::IdType,
double);
////////////////////////////////////////////////////////////////////////////////
/// @brief run the recovery procedure

View File

@ -52,7 +52,7 @@ Slots::Slots (LogfileManager* logfileManager,
: _logfileManager(logfileManager),
_condition(),
_lock(),
_slots(new Slot[numberOfSlots]),
_slots(nullptr),
_numberOfSlots(numberOfSlots),
_freeSlots(numberOfSlots),
_waiting(0),
@ -63,6 +63,8 @@ Slots::Slots (LogfileManager* logfileManager,
_lastCommittedTick(0),
_lastCommittedDataTick(0),
_numEvents(0) {
_slots = new Slot[numberOfSlots];
}
////////////////////////////////////////////////////////////////////////////////
@ -70,9 +72,7 @@ Slots::Slots (LogfileManager* logfileManager,
////////////////////////////////////////////////////////////////////////////////
Slots::~Slots () {
if (_slots != nullptr) {
delete[] _slots;
}
delete[] _slots;
}
// -----------------------------------------------------------------------------
@ -110,6 +110,9 @@ int Slots::flush (bool waitForSync) {
if (! waitForTick(lastTick)) {
res = TRI_ERROR_ARANGO_SYNC_TIMEOUT;
}
else if (! worked) {
res = TRI_ERROR_ARANGO_DATAFILE_EMPTY;
}
}
else if (! worked) {
// logfile to flush was still empty and thus not flushed
@ -176,33 +179,41 @@ SlotInfo Slots::nextUnused (uint32_t size) {
_logfile = nullptr;
}
TRI_IF_FAILURE("LogfileManagerGetWriteableLogfile") {
return SlotInfo(TRI_ERROR_ARANGO_NO_JOURNAL);
}
// fetch the next free logfile (this may create a new one)
Logfile::StatusType status = newLogfile(alignedSize);
Logfile::StatusType status;
int res = newLogfile(alignedSize, status);
if (_logfile == nullptr) {
usleep(10 * 1000);
TRI_IF_FAILURE("LogfileManagerGetWriteableLogfile") {
return SlotInfo(TRI_ERROR_ARANGO_NO_JOURNAL);
}
// try again in next iteration
}
else if (status == Logfile::StatusType::EMPTY) {
// inititialise the empty logfile by writing a header marker
int res = writeHeader(slot);
if (res != TRI_ERROR_NO_ERROR) {
if (res != TRI_ERROR_NO_ERROR) {
if (res != TRI_ERROR_ARANGO_NO_JOURNAL) {
return SlotInfo(res);
}
// advance to next slot
slot = &_slots[_handoutIndex];
_logfileManager->setLogfileOpen(_logfile);
usleep(10 * 1000);
// try again in next iteration
}
else {
TRI_ASSERT(status == Logfile::StatusType::OPEN);
TRI_ASSERT(_logfile != nullptr);
if (status == Logfile::StatusType::EMPTY) {
// inititialise the empty logfile by writing a header marker
int res = writeHeader(slot);
if (res != TRI_ERROR_NO_ERROR) {
return SlotInfo(res);
}
// advance to next slot
slot = &_slots[_handoutIndex];
_logfileManager->setLogfileOpen(_logfile);
}
else {
TRI_ASSERT(status == Logfile::StatusType::OPEN);
}
}
}
@ -220,7 +231,7 @@ SlotInfo Slots::nextUnused (uint32_t size) {
return SlotInfo(slot);
}
}
// if we get here, all slots are busy
CONDITION_LOCKER(guard, _condition);
if (! hasWaited) {
@ -237,6 +248,7 @@ SlotInfo Slots::nextUnused (uint32_t size) {
if (mustWait) {
guard.wait(10 * 1000);
}
}
return SlotInfo(TRI_ERROR_ARANGO_NO_JOURNAL);
@ -295,33 +307,41 @@ SlotInfo Slots::nextUnused (uint32_t size,
_logfile = nullptr;
}
TRI_IF_FAILURE("LogfileManagerGetWriteableLogfile") {
return SlotInfo(TRI_ERROR_ARANGO_NO_JOURNAL);
}
// fetch the next free logfile (this may create a new one)
Logfile::StatusType status = newLogfile(alignedSize);
Logfile::StatusType status;
int res = newLogfile(alignedSize, status);
if (_logfile == nullptr) {
usleep(10 * 1000);
TRI_IF_FAILURE("LogfileManagerGetWriteableLogfile") {
return SlotInfo(TRI_ERROR_ARANGO_NO_JOURNAL);
}
// try again in next iteration
}
else if (status == Logfile::StatusType::EMPTY) {
// inititialise the empty logfile by writing a header marker
int res = writeHeader(slot);
if (res != TRI_ERROR_NO_ERROR) {
if (res != TRI_ERROR_NO_ERROR) {
if (res != TRI_ERROR_ARANGO_NO_JOURNAL) {
return SlotInfo(res);
}
// advance to next slot
slot = &_slots[_handoutIndex];
_logfileManager->setLogfileOpen(_logfile);
usleep(10 * 1000);
// try again in next iteration
}
else {
TRI_ASSERT(status == Logfile::StatusType::OPEN);
TRI_ASSERT(_logfile != nullptr);
if (status == Logfile::StatusType::EMPTY) {
// inititialise the empty logfile by writing a header marker
int res = writeHeader(slot);
if (res != TRI_ERROR_NO_ERROR) {
return SlotInfo(res);
}
// advance to next slot
slot = &_slots[_handoutIndex];
_logfileManager->setLogfileOpen(_logfile);
}
else {
TRI_ASSERT(status == Logfile::StatusType::OPEN);
}
}
}
@ -354,7 +374,7 @@ SlotInfo Slots::nextUnused (uint32_t size,
return SlotInfo(slot);
}
}
// if we get here, all slots are busy
CONDITION_LOCKER(guard, _condition);
if (! hasWaited) {
@ -404,7 +424,8 @@ void Slots::returnUsed (SlotInfo& slotInfo,
/// @brief get the next synchronisable region
////////////////////////////////////////////////////////////////////////////////
SyncRegion Slots::getSyncRegion () {
SyncRegion Slots::getSyncRegion () {
bool sealRequested = false;
SyncRegion region;
MUTEX_LOCKER(_lock);
@ -415,11 +436,16 @@ SyncRegion Slots::getSyncRegion () {
Slot const* slot = &_slots[slotIndex];
TRI_ASSERT(slot != nullptr);
if (sealRequested && slot->isUnused()) {
region.canSeal = true;
}
if (! slot->isReturned()) {
// found a slot that is not yet returned
// if it belongs to another logfile, we can seal the logfile we created
// the region for
auto otherId = slot->logfileId();
if (region.logfileId != 0 && otherId != 0 &&
otherId != region.logfileId) {
region.canSeal = true;
@ -440,6 +466,10 @@ SyncRegion Slots::getSyncRegion () {
region.firstSlotIndex = slotIndex;
region.lastSlotIndex = slotIndex;
region.waitForSync = slot->waitForSync();
if (status == Logfile::StatusType::SEAL_REQUESTED) {
sealRequested = true;
}
}
else {
if (slot->logfileId() != region.logfileId) {
@ -617,37 +647,46 @@ int Slots::closeLogfile (Slot::TickType& lastCommittedTick,
// fall-through intentional
}
TRI_IF_FAILURE("LogfileManagerGetWriteableLogfile") {
return TRI_ERROR_ARANGO_NO_JOURNAL;
}
TRI_ASSERT(_logfile == nullptr);
// fetch the next free logfile (this may create a new one)
// note: as we don't have a real marker to write the size does
// not matter (we use a size of 1 as it must be > 0)
Logfile::StatusType status = newLogfile(1);
Logfile::StatusType status;
int res = newLogfile(1, status);
if (_logfile == nullptr) {
TRI_IF_FAILURE("LogfileManagerGetWriteableLogfile") {
return TRI_ERROR_ARANGO_NO_JOURNAL;
if (res != TRI_ERROR_NO_ERROR) {
if (res != TRI_ERROR_ARANGO_NO_JOURNAL) {
return res;
}
usleep(10 * 1000);
// try again in next iteration
}
else if (status == Logfile::StatusType::EMPTY) {
// inititialise the empty logfile by writing a header marker
int res = writeHeader(slot);
else {
TRI_ASSERT(_logfile != nullptr);
if (res != TRI_ERROR_NO_ERROR) {
LOG_ERROR("could not write logfile header: %s", TRI_errno_string(res));
return res;
if (status == Logfile::StatusType::EMPTY) {
// inititialise the empty logfile by writing a header marker
res = writeHeader(slot);
if (res != TRI_ERROR_NO_ERROR) {
LOG_ERROR("could not write logfile header: %s", TRI_errno_string(res));
return res;
}
_logfileManager->setLogfileOpen(_logfile);
worked = true;
}
else {
TRI_ASSERT(status == Logfile::StatusType::OPEN);
worked = false;
}
_logfileManager->setLogfileOpen(_logfile);
worked = true;
return TRI_ERROR_NO_ERROR;
}
else {
TRI_ASSERT(status == Logfile::StatusType::OPEN);
worked = false;
return TRI_ERROR_NO_ERROR;
}
}
@ -757,13 +796,20 @@ bool Slots::waitForTick (Slot::TickType tick) {
/// specified size
////////////////////////////////////////////////////////////////////////////////
Logfile::StatusType Slots::newLogfile (uint32_t size) {
int Slots::newLogfile (uint32_t size,
Logfile::StatusType& status) {
TRI_ASSERT(size > 0);
Logfile::StatusType status = Logfile::StatusType::UNKNOWN;
_logfile = _logfileManager->getWriteableLogfile(size, status);
status = Logfile::StatusType::UNKNOWN;
Logfile* logfile = nullptr;
int res = _logfileManager->getWriteableLogfile(size, status, logfile);
if (res == TRI_ERROR_NO_ERROR) {
TRI_ASSERT(logfile != nullptr);
_logfile = logfile;
}
return status;
return res;
}
// -----------------------------------------------------------------------------

View File

@ -249,7 +249,8 @@ namespace triagens {
/// specified size
////////////////////////////////////////////////////////////////////////////////
Logfile::StatusType newLogfile (uint32_t);
int newLogfile (uint32_t,
Logfile::StatusType& status);
// -----------------------------------------------------------------------------
// --SECTION-- private variables
@ -279,7 +280,7 @@ namespace triagens {
/// @brief all slots
////////////////////////////////////////////////////////////////////////////////
Slot* const _slots;
Slot* _slots;
////////////////////////////////////////////////////////////////////////////////
/// @brief the total number of slots

View File

@ -1814,23 +1814,6 @@ exports.stopColorPrint = function (silent) {
// --SECTION-- public utility functions
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief exponentialBackoff
////////////////////////////////////////////////////////////////////////////////
exports.exponentialBackOff = function (n, i) {
if (i === 0) {
return 0;
}
if (n === 0) {
return 0;
}
if (n === 1) {
return Math.random() < 0.5 ? 0 : i;
}
return Math.floor(Math.random() * (n + 1)) * i;
};
////////////////////////////////////////////////////////////////////////////////
/// @brief env
////////////////////////////////////////////////////////////////////////////////

View File

@ -1814,23 +1814,6 @@ exports.stopColorPrint = function (silent) {
// --SECTION-- public utility functions
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief exponentialBackoff
////////////////////////////////////////////////////////////////////////////////
exports.exponentialBackOff = function (n, i) {
if (i === 0) {
return 0;
}
if (n === 0) {
return 0;
}
if (n === 1) {
return Math.random() < 0.5 ? 0 : i;
}
return Math.floor(Math.random() * (n + 1)) * i;
};
////////////////////////////////////////////////////////////////////////////////
/// @brief env
////////////////////////////////////////////////////////////////////////////////

View File

@ -0,0 +1,103 @@
/*jshint globalstrict:false, strict:false, unused : false */
/*global fail, assertEqual */
////////////////////////////////////////////////////////////////////////////////
/// @brief tests for dump/reload
///
/// @file
///
/// DISCLAIMER
///
/// Copyright 2010-2012 triagens GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is triAGENS GmbH, Cologne, Germany
///
/// @author Jan Steemann
/// @author Copyright 2012, triAGENS GmbH, Cologne, Germany
////////////////////////////////////////////////////////////////////////////////
var db = require("org/arangodb").db;
var internal = require("internal");
var jsunity = require("jsunity");
var fs = require("fs");
function runSetup () {
'use strict';
internal.debugClearFailAt();
db._drop("UnitTestsRecovery");
var c = db._create("UnitTestsRecovery");
internal.wal.flush(true, true);
for (var i = 0; i < 10; ++i) {
c.insert({ value: i });
}
internal.debugSetFailAt("CreateDatafile1");
internal.debugSetFailAt("LogfileManagerGetWriteableLogfile");
try {
internal.wal.flush();
fail();
}
catch (err) {
}
internal.wait(3);
internal.debugSegfault("crashing server");
}
////////////////////////////////////////////////////////////////////////////////
/// @brief test suite
////////////////////////////////////////////////////////////////////////////////
function recoverySuite () {
'use strict';
jsunity.jsUnity.attachAssertions();
return {
setUp: function () {
},
tearDown: function () {
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test whether we can start the server
////////////////////////////////////////////////////////////////////////////////
testDiskFullLogfileData : function () {
assertEqual(10, db._collection("UnitTestsRecovery").count());
}
};
}
////////////////////////////////////////////////////////////////////////////////
/// @brief executes the test suite
////////////////////////////////////////////////////////////////////////////////
function main (argv) {
'use strict';
if (argv[1] === "setup") {
runSetup();
return 0;
}
else {
jsunity.run(recoverySuite);
return jsunity.done().status ? 0 : 1;
}
}

View File

@ -0,0 +1,105 @@
/*jshint globalstrict:false, strict:false, unused : false */
/*global fail, assertEqual */
////////////////////////////////////////////////////////////////////////////////
/// @brief tests for dump/reload
///
/// @file
///
/// DISCLAIMER
///
/// Copyright 2010-2012 triagens GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is triAGENS GmbH, Cologne, Germany
///
/// @author Jan Steemann
/// @author Copyright 2012, triAGENS GmbH, Cologne, Germany
////////////////////////////////////////////////////////////////////////////////
var db = require("org/arangodb").db;
var internal = require("internal");
var jsunity = require("jsunity");
var fs = require("fs");
function runSetup () {
'use strict';
internal.debugClearFailAt();
db._drop("UnitTestsRecovery");
var c = db._create("UnitTestsRecovery");
internal.wal.flush(true, true);
internal.debugSetFailAt("CreateDatafile1");
internal.debugSetFailAt("LogfileManagerGetWriteableLogfile");
c.insert({ value: 1 });
try {
internal.wal.flush();
fail();
}
catch (err) {
}
internal.wait(3);
try {
c.save({ _key: "crashme" }, true); // wait for sync
fail();
}
catch (err) {
}
internal.debugSegfault("crashing server");
}
////////////////////////////////////////////////////////////////////////////////
/// @brief test suite
////////////////////////////////////////////////////////////////////////////////
function recoverySuite () {
'use strict';
jsunity.jsUnity.attachAssertions();
return {
setUp: function () {
},
tearDown: function () {
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test whether we can start the server
////////////////////////////////////////////////////////////////////////////////
testDiskFullLogfileData : function () {
assertEqual(1, db._collection("UnitTestsRecovery").count());
}
};
}
////////////////////////////////////////////////////////////////////////////////
/// @brief executes the test suite
////////////////////////////////////////////////////////////////////////////////
function main (argv) {
'use strict';
if (argv[1] === "setup") {
runSetup();
return 0;
}
else {
jsunity.run(recoverySuite);
return jsunity.done().status ? 0 : 1;
}
}

View File

@ -0,0 +1,97 @@
/*jshint globalstrict:false, strict:false, unused : false */
/*global fail, assertEqual */
////////////////////////////////////////////////////////////////////////////////
/// @brief tests for dump/reload
///
/// @file
///
/// DISCLAIMER
///
/// Copyright 2010-2012 triagens GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is triAGENS GmbH, Cologne, Germany
///
/// @author Jan Steemann
/// @author Copyright 2012, triAGENS GmbH, Cologne, Germany
////////////////////////////////////////////////////////////////////////////////
var db = require("org/arangodb").db;
var internal = require("internal");
var jsunity = require("jsunity");
var fs = require("fs");
function runSetup () {
'use strict';
internal.debugClearFailAt();
db._drop("UnitTestsRecovery");
var c = db._create("UnitTestsRecovery");
internal.wal.flush(true, true);
for (var i = 0; i < 1000; ++i) {
c.insert({ value: i });
}
c.save({ _key: "crashme" }, true); // wait for sync
internal.debugSetFailAt("CreateDatafile1");
internal.debugSetFailAt("LogfileManagerGetWriteableLogfile");
internal.wal.flush(false, false);
internal.wait(2);
internal.debugSegfault("crashing server");
}
////////////////////////////////////////////////////////////////////////////////
/// @brief test suite
////////////////////////////////////////////////////////////////////////////////
function recoverySuite () {
'use strict';
jsunity.jsUnity.attachAssertions();
return {
setUp: function () {
},
tearDown: function () {
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test whether we can start the server
////////////////////////////////////////////////////////////////////////////////
testDiskFullLogfile : function () {
assertEqual(1001, db._collection("UnitTestsRecovery").count());
}
};
}
////////////////////////////////////////////////////////////////////////////////
/// @brief executes the test suite
////////////////////////////////////////////////////////////////////////////////
function main (argv) {
'use strict';
if (argv[1] === "setup") {
runSetup();
return 0;
}
else {
jsunity.run(recoverySuite);
return jsunity.done().status ? 0 : 1;
}
}

View File

@ -0,0 +1,177 @@
/*jshint globalstrict:false, strict:false, maxlen : 200 */
/*global fail, assertEqual */
////////////////////////////////////////////////////////////////////////////////
/// @brief tests for transactions
///
/// @file
///
/// DISCLAIMER
///
/// Copyright 2010-2012 triagens GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is triAGENS GmbH, Cologne, Germany
///
/// @author Jan Steemann
/// @author Copyright 2013, triAGENS GmbH, Cologne, Germany
////////////////////////////////////////////////////////////////////////////////
var jsunity = require("jsunity");
var internal = require("internal");
var arangodb = require("org/arangodb");
var db = arangodb.db;
// -----------------------------------------------------------------------------
// --SECTION-- test suite
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief test suite
////////////////////////////////////////////////////////////////////////////////
function datafileFailuresSuite () {
'use strict';
var cn = "UnitTestsDatafile";
var c = null;
return {
////////////////////////////////////////////////////////////////////////////////
/// @brief set up
////////////////////////////////////////////////////////////////////////////////
setUp : function () {
internal.debugClearFailAt();
db._drop(cn);
},
////////////////////////////////////////////////////////////////////////////////
/// @brief tear down
////////////////////////////////////////////////////////////////////////////////
tearDown : function () {
internal.debugClearFailAt();
if (c !== null) {
c.drop();
}
c = null;
internal.wait(4);
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test: disk full
////////////////////////////////////////////////////////////////////////////////
testDiskFullDuringLogfileCreationNoJournal : function () {
["CreateDatafile1", "CreateDatafile2"].forEach(function(what) {
internal.debugClearFailAt();
db._drop(cn);
while (true) {
try {
internal.wal.flush(true, true);
break;
}
catch (err) {
}
internal.wait(0.5, false);
}
c = db._create(cn);
internal.debugSetFailAt(what);
internal.debugSetFailAt("LogfileManagerGetWriteableLogfile");
try {
internal.wal.flush();
fail();
}
catch (err) {
assertEqual(internal.errors.ERROR_ARANGO_NO_JOURNAL.code, err.errorNum);
}
internal.wait(3, false);
internal.debugClearFailAt();
assertEqual(0, c.count());
});
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test: disk full
////////////////////////////////////////////////////////////////////////////////
testDiskFullDuringCollectionNoJournal : function () {
["CreateDatafile1", "CreateDatafile2"].forEach(function(what) {
internal.debugClearFailAt();
db._drop(cn);
while (true) {
try {
internal.wal.flush(true, true);
break;
}
catch (err) {
}
internal.wait(0.5, false);
}
c = db._create(cn);
for (var i = 0; i < 1000; ++i) {
c.insert({ value: i });
}
internal.debugSetFailAt(what);
internal.debugSetFailAt("LogfileManagerGetWriteableLogfile");
try {
internal.wal.flush();
fail();
}
catch (err) {
assertEqual(internal.errors.ERROR_ARANGO_NO_JOURNAL.code, err.errorNum);
}
internal.wait(3, false);
internal.debugClearFailAt();
assertEqual(1000, c.count());
});
}
};
}
// -----------------------------------------------------------------------------
// --SECTION-- main
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief executes the test suites
////////////////////////////////////////////////////////////////////////////////
// only run this test suite if server-side failures are enabled
if (internal.debugCanUseFailAt()) {
jsunity.run(datafileFailuresSuite);
}
return jsunity.done();
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------
// Local Variables:
// mode: outline-minor
// outline-regexp: "\\(/// @brief\\|/// @addtogroup\\|// --SECTION--\\|/// @page\\|/// @\\}\\)"
// End:

View File

@ -5365,7 +5365,15 @@ function transactionServerFailuresSuite () {
assertEqual(100160, fig.uncollectedLogfileEntries);
internal.debugClearFailAt();
internal.wal.flush(true, true);
while (true) {
try {
internal.wal.flush(true, true);
break;
}
catch (err) {
internal.wait(0.5, false);
}
}
assertEqual(100150, c.count());

View File

@ -70,6 +70,12 @@ using namespace std;
// --SECTION-- private variables
// -----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
/// @brief a static buffer of zeros, used to initialize files
////////////////////////////////////////////////////////////////////////////////
static char NullBuffer[4096];
////////////////////////////////////////////////////////////////////////////////
/// @brief already initialised
////////////////////////////////////////////////////////////////////////////////
@ -2439,6 +2445,22 @@ char* TRI_LocateConfigDirectory () {
#endif
////////////////////////////////////////////////////////////////////////////////
/// @brief get the address of the null buffer
////////////////////////////////////////////////////////////////////////////////
char* TRI_GetNullBufferFiles () {
return &NullBuffer[0];
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get the size of the null buffer
////////////////////////////////////////////////////////////////////////////////
size_t TRI_GetNullBufferSizeFiles () {
return sizeof(NullBuffer);
}
// -----------------------------------------------------------------------------
// --SECTION-- module functions
// -----------------------------------------------------------------------------
@ -2452,6 +2474,8 @@ char* TRI_LocateConfigDirectory () {
void TRI_InitialiseFiles (void) {
// clear user-defined temp path
TempPath = nullptr;
memset(TRI_GetNullBufferFiles(), 0, TRI_GetNullBufferSizeFiles());
}
////////////////////////////////////////////////////////////////////////////////

View File

@ -355,6 +355,18 @@ std::string TRI_LocateInstallDirectory (void);
char* TRI_LocateConfigDirectory (void);
////////////////////////////////////////////////////////////////////////////////
/// @brief get the address of the null buffer
////////////////////////////////////////////////////////////////////////////////
char* TRI_GetNullBufferFiles ();
////////////////////////////////////////////////////////////////////////////////
/// @brief get the size of the null buffer
////////////////////////////////////////////////////////////////////////////////
size_t TRI_GetNullBufferSizeFiles ();
// -----------------------------------------------------------------------------
// --SECTION-- module functions
// -----------------------------------------------------------------------------

View File

@ -91,7 +91,9 @@ int TRI_MMFile (void* memoryAddress,
int64_t offset,
void** result) {
off_t offsetRetyped = (off_t)(offset);
TRI_ASSERT(memoryAddress == nullptr);
off_t offsetRetyped = (off_t) offset;
TRI_ASSERT(offsetRetyped == 0);
*mmHandle = nullptr; // only useful for Windows
@ -127,6 +129,9 @@ int TRI_UNMMFile (void* memoryAddress,
if (errno == ENOSPC) {
return TRI_ERROR_ARANGO_FILESYSTEM_FULL;
}
if (errno == ENOMEM) {
return TRI_ERROR_OUT_OF_MEMORY_MMAP;
}
return TRI_ERROR_SYS_ERROR;
}