1
0
Fork 0

improve handling when procdump detects the process is dead (#9381)

This commit is contained in:
Wilfried Goesgens 2019-07-04 12:38:57 +02:00 committed by Jan
parent b70d7372ef
commit 5bab188907
2 changed files with 23 additions and 17 deletions

View File

@ -185,30 +185,30 @@ Crash analysis of: ` + JSON.stringify(instanceInfo) + '\n';
return 'cdb ' + args.join(' ');
}
function checkMonitorAlive (binary, arangod, options, res) {
if (arangod.hasOwnProperty('monitor') ) {
function checkMonitorAlive (binary, instanceInfo, options, res) {
if (instanceInfo.hasOwnProperty('monitor') ) {
// Windows: wait for procdump to do its job...
if (!arangod.monitor.hasOwnProperty('status')) {
let rc = statusExternal(arangod.monitor.pid, false);
if (!instanceInfo.monitor.hasOwnProperty('status')) {
let rc = statusExternal(instanceInfo.monitor.pid, false);
if (rc.status !== 'RUNNING') {
arangod.monitor = rc;
instanceInfo.monitor = rc;
// procdump doesn't set propper exit codes, check for
// dumps that may exist:
if (fs.exists(arangod.coreFilePattern)) {
if (fs.exists(instanceInfo.coreFilePattern)) {
print("checkMonitorAlive: marking crashy");
arangod.monitor.monitorExited = true;
arangod.monitor.pid = null;
instanceInfo.monitor.monitorExited = true;
instanceInfo.monitor.pid = null;
pu.serverCrashed = true;
options.cleanup = false;
arangod['exitStatus'] = {};
analyzeCrash(binary, arangod, options, "the process monitor commanded error");
Object.assign(arangod.exitStatus,
killExternal(arangod.pid, abortSignal));
instanceInfo['exitStatus'] = {};
analyzeCrash(binary, instanceInfo, options, "the process monitor commanded error");
Object.assign(instanceInfo.exitStatus,
killExternal(instanceInfo.pid, abortSignal));
return false;
}
}
}
else return arangod.monitor.exitStatus;
else return instanceInfo.monitor.exitStatus;
}
return true;
}

View File

@ -1238,7 +1238,13 @@ function shutdownInstance (instanceInfo, options, forceTerminate) {
}
if (arangod.exitStatus.status === 'RUNNING') {
arangod.exitStatus = statusExternal(arangod.pid, false);
crashUtils.checkMonitorAlive(ARANGOD_BIN, arangod, options, arangod.exitStatus);
if (!crashUtils.checkMonitorAlive(ARANGOD_BIN, arangod, options, arangod.exitStatus)) {
if (arangod.role !== 'agent') {
nonAgenciesCount--;
}
print(Date() + ' Server "' + arangod.role + '" shutdown: detected irregular death by monitor: pid', arangod.pid);
return false;
}
}
if (arangod.exitStatus.status === 'RUNNING') {
let localTimeout = timeout;
@ -1259,7 +1265,7 @@ function shutdownInstance (instanceInfo, options, forceTerminate) {
'" forcefully KILLED after 60s - ' +
arangod.exitStatus.signal);
if (arangod.role !== 'agent') {
nonAgenciesCount --;
nonAgenciesCount--;
}
return false;
} else {
@ -1267,7 +1273,7 @@ function shutdownInstance (instanceInfo, options, forceTerminate) {
}
} else if (arangod.exitStatus.status !== 'TERMINATED') {
if (arangod.role !== 'agent') {
nonAgenciesCount --;
nonAgenciesCount--;
}
if (arangod.exitStatus.hasOwnProperty('signal') || arangod.exitStatus.hasOwnProperty('monitor')) {
analyzeServerCrash(arangod, options, 'instance "' + arangod.role + '" Shutdown - ' + arangod.exitStatus.signal);
@ -1278,7 +1284,7 @@ function shutdownInstance (instanceInfo, options, forceTerminate) {
stopProcdump(options, arangod);
} else {
if (arangod.role !== 'agent') {
nonAgenciesCount --;
nonAgenciesCount--;
}
print(Date() + ' Server "' + arangod.role + '" shutdown: Success: pid', arangod.pid);
stopProcdump(options, arangod);