1
0
Fork 0

provide more info in supervisor mode

This commit is contained in:
jsteemann 2017-03-10 08:11:09 +01:00
parent 8c4fca5ce5
commit 30651604c6
1 changed files with 92 additions and 23 deletions

View File

@ -34,15 +34,73 @@ using namespace arangodb::application_features;
using namespace arangodb::basics;
using namespace arangodb::options;
namespace {
static bool DONE = false;
static int CLIENT_PID = false;
static char const* restartMessage = "will now start a new child process";
static char const* noRestartMessage = "will intentionally not start a new child process";
static char const* fixErrorMessage = "please check what causes the child process to fail and fix the error first";
static char const* translateSignal(int signal) {
if (signal >= 128) {
signal -= 128;
}
switch (signal) {
#ifdef SIGHUP
case SIGHUP: return "SIGHUP";
#endif
#ifdef SIGINT
case SIGINT: return "SIGINT";
#endif
#ifdef SIGQUIT
case SIGQUIT: return "SIGQUIT";
#endif
#ifdef SIGKILL
case SIGILL: return "SIGILL";
#endif
#ifdef SIGTRAP
case SIGTRAP: return "SIGTRAP";
#endif
#ifdef SIGABRT
case SIGABRT: return "SIGABRT";
#endif
#ifdef SIGBUS
case SIGBUS: return "SIGBUS";
#endif
#ifdef SIGFPE
case SIGFPE: return "SIGFPE";
#endif
#ifdef SIGKILL
case SIGKILL: return "SIGKILL";
#endif
#ifdef SIGSEGV
case SIGSEGV: return "SIGSEGV";
#endif
#ifdef SIGPIPE
case SIGPIPE: return "SIGPIPE";
#endif
#ifdef SIGTERM
case SIGTERM: return "SIGTERM";
#endif
#ifdef SIGCONT
case SIGCONT: return "SIGCONT";
#endif
#ifdef SIGSTOP
case SIGSTOP: return "SIGSTOP";
#endif
default: return "unknown";
}
}
static void StopHandler(int) {
LOG_TOPIC(INFO, Logger::STARTUP) << "received SIGINT for supervisor";
kill(CLIENT_PID, SIGTERM);
DONE = true;
}
}
SupervisorFeature::SupervisorFeature(
application_features::ApplicationServer* server)
: ApplicationFeature(server, "Supervisor"), _supervisor(false), _clientPid(0) {
@ -157,35 +215,42 @@ void SupervisorFeature::daemonize() {
if (WIFEXITED(status)) {
// give information about cause of death
if (WEXITSTATUS(status) == 0) {
LOG_TOPIC(INFO, Logger::STARTUP) << "child " << _clientPid
<< " died of natural causes";
LOG_TOPIC(INFO, Logger::STARTUP) << "child process " << _clientPid
<< " died of natural causes. " << noRestartMessage;
done = true;
horrible = false;
} else {
t = time(0) - startTime;
LOG_TOPIC(ERR, Logger::STARTUP)
<< "child " << _clientPid
<< " died a horrible death, exit status " << WEXITSTATUS(status);
if (t < MIN_TIME_ALIVE_IN_SEC) {
LOG_TOPIC(ERR, Logger::STARTUP)
<< "child only survived for " << t
<< " seconds, this will not work - please fix the error "
"first";
<< "child process " << _clientPid << " died a horrible death, exit status"
<< WEXITSTATUS(status) << ". the child process only survived for " << t
<< " seconds. this is lower than the minimum threshold value of " << MIN_TIME_ALIVE_IN_SEC
<< " s. " << noRestartMessage << ". " << fixErrorMessage;
done = true;
} else {
LOG_TOPIC(ERR, Logger::STARTUP)
<< "child process " << _clientPid
<< " died a horrible death, exit status " << WEXITSTATUS(status)
<< ". " << restartMessage;
done = false;
}
}
} else if (WIFSIGNALED(status)) {
switch (WTERMSIG(status)) {
int const s = WTERMSIG(status);
switch (s) {
case 2: // SIGINT
case 9: // SIGKILL
case 15: // SIGTERM
LOG_TOPIC(INFO, Logger::STARTUP)
<< "child " << _clientPid
<< " died of natural causes, exit status " << WTERMSIG(status);
<< "child process " << _clientPid
<< " died of natural causes, exit status " << s
<< " (" << translateSignal(s) << "). " << noRestartMessage;
done = true;
horrible = false;
break;
@ -194,25 +259,28 @@ void SupervisorFeature::daemonize() {
TRI_ASSERT(horrible);
t = time(0) - startTime;
LOG_TOPIC(ERR, Logger::STARTUP) << "child " << _clientPid
<< " died a horrible death, signal "
<< WTERMSIG(status);
if (t < MIN_TIME_ALIVE_IN_SEC) {
LOG_TOPIC(ERR, Logger::STARTUP)
<< "child only survived for " << t
<< " seconds, this will not work - please fix the "
"error first";
<< "child process " << _clientPid << " died a horrible death, signal "
<< s << " (" << translateSignal(s) << "). the child process only survived for " << t
<< " seconds. this is lower than the minimum threshold value of " << MIN_TIME_ALIVE_IN_SEC
<< " s. " << noRestartMessage << ". " << fixErrorMessage;
done = true;
#ifdef WCOREDUMP
if (WCOREDUMP(status)) {
LOG_TOPIC(WARN, Logger::STARTUP) << "child process "
<< _clientPid
<< " produced a core dump";
<< " also produced a core dump";
}
#endif
} else {
LOG_TOPIC(ERR, Logger::STARTUP) << "child process " << _clientPid
<< " died a horrible death, signal "
<< s << " (" << translateSignal(s) << "). "
<< restartMessage;
done = false;
}
@ -220,8 +288,9 @@ void SupervisorFeature::daemonize() {
}
} else {
LOG_TOPIC(ERR, Logger::STARTUP)
<< "child " << _clientPid
<< " died a horrible death, unknown cause";
<< "child process " << _clientPid
<< " died a horrible death, unknown cause. " << restartMessage;
done = false;
}
}
@ -256,7 +325,7 @@ void SupervisorFeature::daemonize() {
return;
}
}
LOG_TOPIC(DEBUG, Logger::STARTUP) << "supervisor mode: finished";
Logger::flush();