mirror of https://gitee.com/bigwinds/arangodb
improve handling when procdump detects the process is dead (#9381)
This commit is contained in:
parent
b70d7372ef
commit
5bab188907
|
@ -185,30 +185,30 @@ Crash analysis of: ` + JSON.stringify(instanceInfo) + '\n';
|
|||
return 'cdb ' + args.join(' ');
|
||||
}
|
||||
|
||||
function checkMonitorAlive (binary, arangod, options, res) {
|
||||
if (arangod.hasOwnProperty('monitor') ) {
|
||||
function checkMonitorAlive (binary, instanceInfo, options, res) {
|
||||
if (instanceInfo.hasOwnProperty('monitor') ) {
|
||||
// Windows: wait for procdump to do its job...
|
||||
if (!arangod.monitor.hasOwnProperty('status')) {
|
||||
let rc = statusExternal(arangod.monitor.pid, false);
|
||||
if (!instanceInfo.monitor.hasOwnProperty('status')) {
|
||||
let rc = statusExternal(instanceInfo.monitor.pid, false);
|
||||
if (rc.status !== 'RUNNING') {
|
||||
arangod.monitor = rc;
|
||||
instanceInfo.monitor = rc;
|
||||
// procdump doesn't set propper exit codes, check for
|
||||
// dumps that may exist:
|
||||
if (fs.exists(arangod.coreFilePattern)) {
|
||||
if (fs.exists(instanceInfo.coreFilePattern)) {
|
||||
print("checkMonitorAlive: marking crashy");
|
||||
arangod.monitor.monitorExited = true;
|
||||
arangod.monitor.pid = null;
|
||||
instanceInfo.monitor.monitorExited = true;
|
||||
instanceInfo.monitor.pid = null;
|
||||
pu.serverCrashed = true;
|
||||
options.cleanup = false;
|
||||
arangod['exitStatus'] = {};
|
||||
analyzeCrash(binary, arangod, options, "the process monitor commanded error");
|
||||
Object.assign(arangod.exitStatus,
|
||||
killExternal(arangod.pid, abortSignal));
|
||||
instanceInfo['exitStatus'] = {};
|
||||
analyzeCrash(binary, instanceInfo, options, "the process monitor commanded error");
|
||||
Object.assign(instanceInfo.exitStatus,
|
||||
killExternal(instanceInfo.pid, abortSignal));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else return arangod.monitor.exitStatus;
|
||||
else return instanceInfo.monitor.exitStatus;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1238,7 +1238,13 @@ function shutdownInstance (instanceInfo, options, forceTerminate) {
|
|||
}
|
||||
if (arangod.exitStatus.status === 'RUNNING') {
|
||||
arangod.exitStatus = statusExternal(arangod.pid, false);
|
||||
crashUtils.checkMonitorAlive(ARANGOD_BIN, arangod, options, arangod.exitStatus);
|
||||
if (!crashUtils.checkMonitorAlive(ARANGOD_BIN, arangod, options, arangod.exitStatus)) {
|
||||
if (arangod.role !== 'agent') {
|
||||
nonAgenciesCount--;
|
||||
}
|
||||
print(Date() + ' Server "' + arangod.role + '" shutdown: detected irregular death by monitor: pid', arangod.pid);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (arangod.exitStatus.status === 'RUNNING') {
|
||||
let localTimeout = timeout;
|
||||
|
@ -1259,7 +1265,7 @@ function shutdownInstance (instanceInfo, options, forceTerminate) {
|
|||
'" forcefully KILLED after 60s - ' +
|
||||
arangod.exitStatus.signal);
|
||||
if (arangod.role !== 'agent') {
|
||||
nonAgenciesCount --;
|
||||
nonAgenciesCount--;
|
||||
}
|
||||
return false;
|
||||
} else {
|
||||
|
@ -1267,7 +1273,7 @@ function shutdownInstance (instanceInfo, options, forceTerminate) {
|
|||
}
|
||||
} else if (arangod.exitStatus.status !== 'TERMINATED') {
|
||||
if (arangod.role !== 'agent') {
|
||||
nonAgenciesCount --;
|
||||
nonAgenciesCount--;
|
||||
}
|
||||
if (arangod.exitStatus.hasOwnProperty('signal') || arangod.exitStatus.hasOwnProperty('monitor')) {
|
||||
analyzeServerCrash(arangod, options, 'instance "' + arangod.role + '" Shutdown - ' + arangod.exitStatus.signal);
|
||||
|
@ -1278,7 +1284,7 @@ function shutdownInstance (instanceInfo, options, forceTerminate) {
|
|||
stopProcdump(options, arangod);
|
||||
} else {
|
||||
if (arangod.role !== 'agent') {
|
||||
nonAgenciesCount --;
|
||||
nonAgenciesCount--;
|
||||
}
|
||||
print(Date() + ' Server "' + arangod.role + '" shutdown: Success: pid', arangod.pid);
|
||||
stopProcdump(options, arangod);
|
||||
|
|
Loading…
Reference in New Issue