1
0
Fork 0

Make procdump abort on std::bad_cast; kill SUT if it occurs; Make procdump optional. (#6602)

This commit is contained in:
Wilfried Goesgens 2018-10-10 17:55:53 +02:00 committed by Jan
parent 282a1a7193
commit 676f61805e
4 changed files with 213 additions and 96 deletions

105
js/client/modules/@arangodb/crash-utils.js Normal file → Executable file
View File

@ -27,16 +27,20 @@
const fs = require('fs');
const yaml = require('js-yaml');
const executeExternalAndWait = require('internal').executeExternalAndWait;
const statusExternal = require('internal').statusExternal;
const sleep = require('internal').sleep;
const internal = require('internal');
const executeExternalAndWait = internal.executeExternalAndWait;
const statusExternal = internal.statusExternal;
const killExternal = internal.killExternal;
const sleep = internal.sleep;
const pu = require('@arangodb/process-utils');
let GDB_OUTPUT = '';
const abortSignal = 6;
const platform = require('internal').platform;
const platform = internal.platform;
const RED = require('internal').COLORS.COLOR_RED;
const RESET = require('internal').COLORS.COLOR_RESET;
const RED = internal.COLORS.COLOR_RED;
const RESET = internal.COLORS.COLOR_RESET;
// //////////////////////////////////////////////////////////////////////////////
// / @brief analyzes a core dump using gdb (Unix)
@ -80,11 +84,14 @@ function analyzeCoreDump (instanceInfo, options, storeArangodPath, pid) {
sleep(5);
executeExternalAndWait('/bin/bash', args);
GDB_OUTPUT += fs.read(gdbOutputFile);
GDB_OUTPUT += `--------------------------------------------------------------------------------
Crash analysis of: ` + JSON.stringify(instanceInfo) + '\n';
let thisDump = fs.read(gdbOutputFile);
GDB_OUTPUT += thisDump;
if (options.extremeVerbosity === true) {
print(GDB_OUTPUT);
print(thisDump);
}
command = 'gdb ' + storeArangodPath + ' ';
if (options.coreDirectory === '') {
@ -124,9 +131,12 @@ function analyzeCoreDumpMac (instanceInfo, options, storeArangodPath, pid) {
sleep(5);
executeExternalAndWait('/bin/bash', args);
GDB_OUTPUT += fs.read(lldbOutputFile);
GDB_OUTPUT += `--------------------------------------------------------------------------------
Crash analysis of: ` + JSON.stringify(instanceInfo) + '\n';
let thisDump = fs.read(lldbOutputFile);
GDB_OUTPUT += thisDump;
if (options.extremeVerbosity === true) {
print(GDB_OUTPUT);
print(thisDump);
}
return 'lldb ' + storeArangodPath + ' -c /cores/core.' + pid;
}
@ -137,6 +147,7 @@ function analyzeCoreDumpMac (instanceInfo, options, storeArangodPath, pid) {
// //////////////////////////////////////////////////////////////////////////////
function analyzeCoreDumpWindows (instanceInfo) {
let cdbOutputFile = fs.getTempFile();
const coreFN = instanceInfo.rootDir + '\\' + 'core.dmp';
if (!fs.exists(coreFN)) {
@ -144,7 +155,9 @@ function analyzeCoreDumpWindows (instanceInfo) {
return;
}
const dbgCmds = [
'.logopen ' + cdbOutputFile,
'kp', // print curren threads backtrace with arguments
'~*kb', // print all threads stack traces
'dv', // analyze local variables (if)
@ -155,24 +168,57 @@ function analyzeCoreDumpWindows (instanceInfo) {
const args = [
'-z',
coreFN,
'-list',
'-lines',
'-logo',
cdbOutputFile,
'-c',
dbgCmds.join('; ')
];
sleep(5);
print('running cdb ' + JSON.stringify(args));
process.env['_NT_DEBUG_LOG_FILE_OPEN'] = cdbOutputFile;
executeExternalAndWait('cdb', args);
GDB_OUTPUT += `--------------------------------------------------------------------------------
Crash analysis of: ` + JSON.stringify(instanceInfo) + '\n';
// cdb will output to stdout anyways, so we can't turn this off here.
GDB_OUTPUT += fs.read(cdbOutputFile);
return 'cdb ' + args.join(' ');
}
function checkMonitorAlive (binary, arangod, options, res) {
if (arangod.hasOwnProperty('monitor') ) {
// Windows: wait for procdump to do its job...
if (!arangod.monitor.hasOwnProperty('status')) {
let rc = statusExternal(arangod.monitor.pid, false);
if (rc.status !== 'RUNNING') {
arangod.monitor = rc;
if (arangod.monitor.exit !== 0) {
// ok, procdump exited with a failure,
// this means it wrote an exception dump.
print("checkMonitorAlive: marking crashy");
arangod.monitor.monitorExited = true;
arangod.monitor.pid = null;
pu.serverCrashed = true;
arangod['exitStatus'] = {};
analyzeCrash(binary, arangod, options, "the process monitor commanded error");
Object.assign(arangod.exitStatus,
killExternal(arangod.pid, abortSignal));
return false;
}
}
}
else return arangod.monitor.exitStatus;
}
return true;
}
// //////////////////////////////////////////////////////////////////////////////
// / @brief the bad has happened, tell it the user and try to gather more
// / information about the incident.
// //////////////////////////////////////////////////////////////////////////////
function analyzeCrash (binary, arangod, options, checkStr) {
if (!options.coreCheck || arangod.exitStatus.hasOwnProperty('gdbHint')) {
function analyzeCrash (binary, instanceInfo, options, checkStr) {
if (!options.coreCheck || instanceInfo.exitStatus.hasOwnProperty('gdbHint')) {
print(RESET);
return;
}
@ -185,15 +231,15 @@ function analyzeCrash (binary, arangod, options, checkStr) {
var corePattern = fs.readBuffer(cpf);
var cp = corePattern.asciiSlice(0, corePattern.length);
if (matchApport.exec(cp) != null) {
if (matchApport.exec(cp) !== null) {
print(RED + 'apport handles corefiles on your system. Uninstall it if you want us to get corefiles for analysis.' + RESET);
return;
}
if (matchSystemdCoredump.exec(cp) !== null) {
options.coreDirectory = '/var/lib/systemd/coredump/*core*' + arangod.pid + '*';
options.coreDirectory = '/var/lib/systemd/coredump/*core*' + instanceInfo.pid + '*';
} else if (matchVarTmp.exec(cp) !== null) {
options.coreDirectory = cp.replace('%e', '*').replace('%t', '*').replace('%p', arangod.pid);
options.coreDirectory = cp.replace('%e', '*').replace('%t', '*').replace('%p', instanceInfo.pid);
} else {
print(RED + 'Don\'t know howto locate corefiles in your system. "' + cpf + '" contains: "' + cp + '"' + RESET);
return;
@ -205,7 +251,7 @@ function analyzeCrash (binary, arangod, options, checkStr) {
if (pathParts.length > 0) {
bareBinary = pathParts[pathParts.length - 1];
}
const storeArangodPath = arangod.rootDir + '/' + bareBinary + '_' + arangod.pid;
const storeArangodPath = instanceInfo.rootDir + '/' + bareBinary + '_' + instanceInfo.pid;
print(RED +
'during: ' + checkStr + ': Core dump written; ' +
@ -214,26 +260,33 @@ function analyzeCrash (binary, arangod, options, checkStr) {
storeArangodPath + ' for later analysis.\n' +
*/
'Process facts :\n' +
yaml.safeDump(arangod) +
yaml.safeDump(instanceInfo) +
'marking build as crashy.' + RESET);
sleep(5);
let hint = '';
if (platform.substr(0, 3) === 'win') {
// Windows: wait for procdump to do its job...
statusExternal(arangod.monitor, true);
hint = analyzeCoreDumpWindows(arangod);
if (!instanceInfo.hasOwnProperty('monitor')) {
print("your process wasn't monitored by procdump, won't have a coredump!");
instanceInfo.exitStatus['gdbHint'] = "coredump unavailable";
return;
}
if (instanceInfo.monitor.pid !== null) {
instanceInfo.monitor = statusExternal(instanceInfo.monitor.pid, true);
}
hint = analyzeCoreDumpWindows(instanceInfo);
} else if (platform === 'darwin') {
// fs.copyFile(binary, storeArangodPath);
hint = analyzeCoreDumpMac(arangod, options, binary, arangod.pid);
hint = analyzeCoreDumpMac(instanceInfo, options, binary, instanceInfo.pid);
} else {
// fs.copyFile(binary, storeArangodPath);
hint = analyzeCoreDump(arangod, options, binary, arangod.pid);
hint = analyzeCoreDump(instanceInfo, options, binary, instanceInfo.pid);
}
arangod.exitStatus.gdbHint = 'Run debugger with "' + hint + '"';
instanceInfo.exitStatus.gdbHint = 'Run debugger with "' + hint + '"';
}
exports.checkMonitorAlive = checkMonitorAlive;
exports.analyzeCrash = analyzeCrash;
Object.defineProperty(exports, 'GDB_OUTPUT', {get: () => GDB_OUTPUT});

133
js/client/modules/@arangodb/process-utils.js Normal file → Executable file
View File

@ -374,6 +374,45 @@ function makeArgsArangod (options, appDir, role, tmpDir) {
return args;
}
function runProcdump (options, instanceInfo, rootDir, pid) {
let procdumpArgs = [ ];
if (options.exceptionFilter != null) {
procdumpArgs = [
'-accepteula',
'-64',
'-e',
options.exceptionCount
];
let filters = options.exceptionFilter.split(',');
for (let which in filters) {
procdumpArgs.push('-f');
procdumpArgs.push(filters[which]);
}
procdumpArgs.push('-ma');
procdumpArgs.push(pid);
procdumpArgs.push(fs.join(rootDir, 'core.dmp'));
} else {
procdumpArgs = [
'-accepteula',
'-e',
'-ma',
pid,
fs.join(rootDir, 'core.dmp')
];
}
try {
if (options.extremeVerbosity) {
print("Starting procdump: " + JSON.stringify(procdumpArgs));
}
instanceInfo.monitor = executeExternal('procdump', procdumpArgs);
} catch (x) {
print('failed to start procdump - is it installed?');
// throw x;
}
}
// //////////////////////////////////////////////////////////////////////////////
// / @brief executes a command and waits for result
// //////////////////////////////////////////////////////////////////////////////
@ -424,15 +463,33 @@ function executeAndWait (cmd, args, options, valgrindTest, rootDir, circumventCo
};
}
const res = executeExternalAndWait(cmd, args);
let instanceInfo = {
rootDir: rootDir,
pid: 0,
exitStatus: {}
};
let res = {};
if (platform.substr(0, 3) === 'win') {
res = executeExternal(cmd, args);
instanceInfo.pid = res.pid;
instanceInfo.exitStatus = res;
runProcdump(options, instanceInfo, rootDir, res.pid);
Object.assign(instanceInfo.exitStatus,
statusExternal(res.pid, true));
} else {
res = executeExternalAndWait(cmd, args);
instanceInfo.pid = res.pid;
instanceInfo.exitStatus = res;
}
const deltaTime = time() - startTime;
let errorMessage = ' - ';
if (coreCheck &&
res.hasOwnProperty('signal') &&
((res.signal === 11) ||
(res.signal === 6) ||
instanceInfo.exitStatus.hasOwnProperty('signal') &&
((instanceInfo.exitStatus.signal === 11) ||
(instanceInfo.exitStatus.signal === 6) ||
// Windows sometimes has random numbers in signal...
(platform.substr(0, 3) === 'win')
)
@ -446,21 +503,21 @@ function executeAndWait (cmd, args, options, valgrindTest, rootDir, circumventCo
crashUtils.analyzeCrash(cmd,
instanceInfo,
options,
'execution of ' + cmd + ' - ' + res.signal);
'execution of ' + cmd + ' - ' + instanceInfo.exitStatus.signal);
if (options.coreCheck) {
print(instanceInfo.exitStatus.gdbHint);
}
serverCrashed = true;
}
if (res.status === 'TERMINATED') {
const color = (res.exit === 0 ? GREEN : RED);
if (instanceInfo.exitStatus.status === 'TERMINATED') {
const color = (instanceInfo.exitStatus.exit === 0 ? GREEN : RED);
print(color + 'Finished: ' + res.status +
' exit code: ' + res.exit +
print(color + 'Finished: ' + instanceInfo.exitStatus.status +
' exit code: ' + instanceInfo.exitStatus.exit +
' Time elapsed: ' + deltaTime + RESET);
if (res.exit === 0) {
if (instanceInfo.exitStatus.exit === 0) {
return {
status: true,
message: '',
@ -469,38 +526,38 @@ function executeAndWait (cmd, args, options, valgrindTest, rootDir, circumventCo
} else {
return {
status: false,
message: 'exit code was ' + res.exit,
message: 'exit code was ' + instanceInfo.exitStatus.exit,
duration: deltaTime
};
}
} else if (res.status === 'ABORTED') {
if (typeof (res.errorMessage) !== 'undefined') {
errorMessage += res.errorMessage;
} else if (instanceInfo.exitStatus.status === 'ABORTED') {
if (typeof (instanceInfo.exitStatus.errorMessage) !== 'undefined') {
errorMessage += instanceInfo.exitStatus.errorMessage;
}
print('Finished: ' + res.status +
' Signal: ' + res.signal +
print('Finished: ' + instanceInfo.exitStatus.status +
' Signal: ' + instanceInfo.exitStatus.signal +
' Time elapsed: ' + deltaTime + errorMessage);
return {
status: false,
message: 'irregular termination: ' + res.status +
' exit signal: ' + res.signal + errorMessage,
message: 'irregular termination: ' + instanceInfo.exitStatus.status +
' exit signal: ' + instanceInfo.exitStatus.signal + errorMessage,
duration: deltaTime
};
} else {
if (typeof (res.errorMessage) !== 'undefined') {
errorMessage += res.errorMessage;
if (typeof (instanceInfo.exitStatus.errorMessage) !== 'undefined') {
errorMessage += instanceInfo.exitStatus.errorMessage;
}
print('Finished: ' + res.status +
' exit code: ' + res.signal +
print('Finished: ' + instanceInfo.exitStatus.status +
' exit code: ' + instanceInfo.exitStatus.signal +
' Time elapsed: ' + deltaTime + errorMessage);
return {
status: false,
message: 'irregular termination: ' + res.status +
' exit code: ' + res.exit + errorMessage,
message: 'irregular termination: ' + instanceInfo.exitStatus.status +
' exit code: ' + instanceInfo.exitStatus.exit + errorMessage,
duration: deltaTime
};
}
@ -675,11 +732,13 @@ function analyzeServerCrash (arangod, options, checkStr) {
// //////////////////////////////////////////////////////////////////////////////
function checkArangoAlive (arangod, options) {
const res = statusExternal(arangod.pid, false);
const ret = res.status === 'RUNNING';
const ret = res.status === 'RUNNING' && crashUtils.checkMonitorAlive(ARANGOD_BIN, arangod, options, res);
if (!ret) {
print('ArangoD with PID ' + arangod.pid + ' gone:');
arangod.exitStatus = res;
if (!arangod.hasOwnProperty('exitStatus')) {
arangod.exitStatus = res;
}
print(arangod);
if (res.hasOwnProperty('signal') &&
@ -894,6 +953,7 @@ function shutdownInstance (instanceInfo, options, forceTerminate) {
}
if (arangod.exitStatus.status === 'RUNNING') {
arangod.exitStatus = statusExternal(arangod.pid, false);
crashUtils.checkMonitorAlive(ARANGOD_BIN, arangod, options, arangod.exitStatus);
}
if (arangod.exitStatus.status === 'RUNNING') {
let localTimeout = timeout;
@ -922,7 +982,7 @@ function shutdownInstance (instanceInfo, options, forceTerminate) {
if (arangod.role !== 'agent') {
nonAgenciesCount --;
}
if (arangod.exitStatus.hasOwnProperty('signal')) {
if (arangod.exitStatus.hasOwnProperty('signal') || arangod.exitStatus.hasOwnProperty('monitor')) {
analyzeServerCrash(arangod, options, 'instance "' + arangod.role + '" Shutdown - ' + arangod.exitStatus.signal);
print("shutdownInstance: Marking crashy - " + JSON.stringify(arangod));
serverCrashed = true;
@ -1203,21 +1263,8 @@ function startArango (protocol, options, addArgs, rootDir, role) {
}
instanceInfo.role = role;
if (platform.substr(0, 3) === 'win') {
const procdumpArgs = [
'-accepteula',
'-e',
'-ma',
instanceInfo.pid,
fs.join(rootDir, 'core.dmp')
];
try {
instanceInfo.monitor = executeExternal('procdump', procdumpArgs);
} catch (x) {
print('failed to start procdump - is it installed?');
// throw x;
}
if (platform.substr(0, 3) === 'win' && !options.disableMonitor) {
runProcdump(options, instanceInfo, rootDir, instanceInfo.pid);
}
return instanceInfo;
}
@ -1437,4 +1484,4 @@ Object.defineProperty(exports, 'UNITTESTS_DIR', {get: () => UNITTESTS_DIR});
Object.defineProperty(exports, 'BIN_DIR', {get: () => BIN_DIR});
Object.defineProperty(exports, 'CONFIG_ARANGODB_DIR', {get: () => CONFIG_ARANGODB_DIR});
Object.defineProperty(exports, 'CONFIG_RELATIVE_DIR', {get: () => CONFIG_RELATIVE_DIR});
Object.defineProperty(exports, 'serverCrashed', {get: () => serverCrashed});
Object.defineProperty(exports, 'serverCrashed', {get: () => serverCrashed, set: () => serverCrashed});

60
js/client/modules/@arangodb/test-utils.js Normal file → Executable file
View File

@ -214,40 +214,48 @@ function performTests (options, testList, testname, runFn, serverOptions, startS
}
}
continueTesting = pu.arangod.check.instanceAlive(instanceInfo, options);
if (pu.arangod.check.instanceAlive(instanceInfo, options)) {
continueTesting = true;
// Check whether some collections were left behind, and if mark test as failed.
let collectionsAfter = [];
db._collections().forEach(collection => {
collectionsAfter.push(collection._name);
});
let delta = diffArray(collectionsBefore, collectionsAfter).filter(function(name) {
return (name[0] !== '_'); // exclude system collections from the comparison
});
if (delta.length !== 0) {
results[te] = {
status: false,
message: 'Cleanup missing - test left over collections: ' + delta + '. Original test status: ' + JSON.stringify(results[te])
};
collectionsBefore = [];
// Check whether some collections were left behind, and if mark test as failed.
let collectionsAfter = [];
db._collections().forEach(collection => {
collectionsBefore.push(collection._name);
collectionsAfter.push(collection._name);
});
let delta = diffArray(collectionsBefore, collectionsAfter).filter(function(name) {
return (name[0] !== '_'); // exclude system collections from the comparison
});
}
let graphs = db._collection('_graphs');
if (graphs && graphs.count() !== graphCount) {
if (delta.length !== 0) {
results[te] = {
status: false,
message: 'Cleanup missing - test left over collections: ' + delta + '. Original test status: ' + JSON.stringify(results[te])
};
collectionsBefore = [];
db._collections().forEach(collection => {
collectionsBefore.push(collection._name);
});
}
let graphs = db._collection('_graphs');
if (graphs && graphs.count() !== graphCount) {
results[te] = {
status: false,
message: 'Cleanup of graphs missing - found graph definitions: [ ' +
JSON.stringify(graphs.toArray()) +
' ] - Original test status: ' +
JSON.stringify(results[te])
};
graphCount = graphs.count();
}
} else {
continueTesting = false;
results[te] = {
status: false,
message: 'Cleanup of graphs missing - found graph definitions: [ ' +
JSON.stringify(graphs.toArray()) +
' ] - Original test status: ' +
JSON.stringify(results[te])
message: 'server is dead.'
};
graphCount = graphs.count();
}
if (startStopHandlers !== undefined && startStopHandlers.hasOwnProperty('alive')) {
customInstanceInfos['alive'] = startStopHandlers.alive(options,
serverOptions,

11
js/client/modules/@arangodb/testing.js Normal file → Executable file
View File

@ -84,7 +84,13 @@ let optionsDocumentation = [
' - `writeXmlReport`: Write junit xml report files',
' - `prefix`: prefix for the tests in the xml reports',
'',
' - `disableMonitor`: if set to true on windows, procdump will not be attached.',
' - `rr`: if set to true arangod instances are run with rr',
' - `exceptionFilter`: on windows you can use this to abort tests on specific exceptions',
' i.e. `bad_cast` to abort on throwing of std::bad_cast',
' or a coma separated list for multiple exceptions; ',
' filtering by asterisk is possible',
' - `exceptionCount`: how many exceptions should procdump be able to capture?',
' - `coreCheck`: if set to true, we will attempt to locate a coredump to ',
' produce a backtrace in the event of a crash',
'',
@ -142,6 +148,8 @@ const optionsDefaults = {
'protocol': 'tcp',
'replication': false,
'rr': false,
'exceptionFilter': null,
'exceptionCount': 1,
'sanitizer': false,
'activefailover': false,
'skipLogAnalysis': true,
@ -162,7 +170,8 @@ const optionsDefaults = {
'walFlushTimeout': 30000,
'writeXmlReport': true,
'testFailureText': 'testfailures.txt',
'testCase': undefined
'testCase': undefined,
'disableMonitor': false
};
const _ = require('lodash');