mirror of https://gitee.com/bigwinds/arangodb
Work on cluster shutdown:
- when running in valgrind be more gracefull - fix incidents when somebody else already picked exit the result - fix write access to the results array - run.pids is an object. - fix copying arangod in case of crash in cluster case - fix cluster shutdown structure analysis
This commit is contained in:
parent
537b115eac
commit
a571f12634
|
@ -419,7 +419,7 @@ launchActions.startServers = function (dispatchers, cmd, isRelaunch) {
|
|||
for (i = 0;i < endpoints.length;i++) {
|
||||
var timeout = 50;
|
||||
if (cmd.valgrind !== '') {
|
||||
timeout *= 1000;
|
||||
timeout *= 10000;
|
||||
}
|
||||
if (! waitForServerUp(endpoints[i], timeout)) {
|
||||
error = true;
|
||||
|
@ -542,26 +542,48 @@ shutdownActions.startServers = function (dispatchers, cmd, run) {
|
|||
// we cannot do much with the result...
|
||||
}
|
||||
|
||||
console.info("Waiting 8 seconds for servers to shutdown gracefully...");
|
||||
wait(8);
|
||||
var shutdownWait = 8;
|
||||
if (cmd.valgrind !== '') {
|
||||
shutdownWait *= 10000;
|
||||
}
|
||||
console.info("Waiting " + shutdownWait + " seconds for servers to shutdown gracefully...");
|
||||
var j = 0;
|
||||
var runpids = run.pids.length;
|
||||
while ((j < shutdownWait) && (runpids > 0)) {
|
||||
wait(1);
|
||||
j++;
|
||||
for (i = 0; i < run.pids.length; i++) {
|
||||
|
||||
for (i = 0;i < run.pids.length;i++) {
|
||||
var s = statusExternal(run.pids[i]);
|
||||
if (s.status !== "TERMINATED") {
|
||||
if (s.hasOwnProperty('signal')) {
|
||||
error = true;
|
||||
console.error("shuting down %s %s done - with problems: " + s,
|
||||
run.roles[i],
|
||||
run.endpointNames[i],
|
||||
JSON.stringify(run.pids[i]));
|
||||
if (serverStates[JSON.stringify(run.pids[i].pid)] === undefined) {
|
||||
var s = statusExternal(run.pids[i]);
|
||||
|
||||
if ((s.status === "NOT-FOUND") ||
|
||||
(s.status === "TERMINATED") ||
|
||||
s.hasOwnProperty('signal')) {
|
||||
runpids -=1;
|
||||
serverStates[JSON.stringify(run.pids[i])] = s;
|
||||
error = true;
|
||||
}
|
||||
else if (j > shutdownWait) {
|
||||
if (s.status !== "TERMINATED") {
|
||||
if (s.hasOwnProperty('signal')) {
|
||||
error = true;
|
||||
console.error("shuting down %s %s done - with problems: " + s,
|
||||
run.roles[i],
|
||||
run.endpointNames[i],
|
||||
JSON.stringify(run.pids[i]));
|
||||
}
|
||||
else {
|
||||
console.info("Shutting down %s the hard way...",
|
||||
JSON.stringify(run.pids[i]));
|
||||
s.killedState = killExternal(run.pids[i]);
|
||||
console.info("done.");
|
||||
runpids -=1;
|
||||
}
|
||||
serverStates[JSON.stringify(run.pids[i])] = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
console.info("Shutting down %s the hard way...",
|
||||
JSON.stringify(run.pids[i]));
|
||||
s.killedState = killExternal(run.pids[i]);
|
||||
console.info("done.");
|
||||
}
|
||||
serverStates[run.pids[i]] = s;
|
||||
}
|
||||
}
|
||||
return {"error": error, "isStartServers": true, "serverStates" : serverStates};
|
||||
|
|
|
@ -462,7 +462,6 @@ function checkInstanceAlive(instanceInfo, options) {
|
|||
var ret = res.status === "RUNNING";
|
||||
if (! ret) {
|
||||
print("ArangoD with PID " + instanceInfo.pid.pid + " gone:");
|
||||
instanceInfo.exitStatus = res;
|
||||
print(instanceInfo);
|
||||
if (res.hasOwnProperty('signal') &&
|
||||
((res.signal === 11) ||
|
||||
|
@ -485,9 +484,10 @@ function checkInstanceAlive(instanceInfo, options) {
|
|||
statusExternal(instanceInfo.monitor, true);
|
||||
}
|
||||
else {
|
||||
copy("bin/arangod", instanceInfo.tmpDataDir);
|
||||
copy("bin/arangod", storeArangodPath);
|
||||
}
|
||||
}
|
||||
instanceInfo.exitStatus = res;
|
||||
}
|
||||
if (!ret) {
|
||||
serverCrashed = true;
|
||||
|
@ -506,11 +506,21 @@ function checkInstanceAlive(instanceInfo, options) {
|
|||
storeArangodPath = "/var/tmp/arangod_" + checkpid.pid;
|
||||
print("Core dump written; copying arangod to " +
|
||||
storeArangodPath + " for later analysis.");
|
||||
instanceInfo.exitStatus = ress;
|
||||
ress.gdbHint = "Run debugger with 'gdb " +
|
||||
storeArangodPath +
|
||||
" /var/tmp/core*" + checkpid.pid + "*'";
|
||||
copy("bin/arangod", storeArangodPath);
|
||||
|
||||
if (require("internal").platform.substr(0,3) === 'win') {
|
||||
copy("bin\\arangod.exe", instanceInfo.tmpDataDir + "\\arangod.exe");
|
||||
copy("bin\\arangod.pdb", instanceInfo.tmpDataDir + "\\arangod.pdb");
|
||||
// Windows: wait for procdump to do its job...
|
||||
statusExternal(instanceInfo.monitor, true);
|
||||
}
|
||||
else {
|
||||
copy("bin/arangod", storeArangodPath);
|
||||
}
|
||||
|
||||
instanceInfo.exitStatus = ress;
|
||||
ClusterFit = false;
|
||||
}
|
||||
}
|
||||
|
@ -539,15 +549,24 @@ function shutdownInstance (instanceInfo, options) {
|
|||
instanceInfo.kickstarter.cleanup();
|
||||
}
|
||||
if (rc.error) {
|
||||
for (var i in rc.serverStates) {
|
||||
if (rc.serverStates.hasOwnProperty(i)){
|
||||
if (rc.serverStates[i].hasOwnProperty('signal')) {
|
||||
print("Server shut down with : " + yaml.safeDump(rc.serverStates[i]) + " marking run as crashy.");
|
||||
serverCrashed = true;
|
||||
for (var i = 0; i < rc.results.length; i++ ) {
|
||||
if (rc.results[i].hasOwnProperty('isStartServers') &&
|
||||
(rc.results[i].isStartServers === true)) {
|
||||
for (var serverState in rc.results[i].serverStates) {
|
||||
if (rc.results[i].serverStates.hasOwnProperty(serverState)){
|
||||
if ((rc.results[i].serverStates[serverState].status === "NOT-FOUND") ||
|
||||
(rc.results[i].serverStates[serverState].hasOwnProperty('signal'))) {
|
||||
print("Server " + serverState + " shut down with:\n" +
|
||||
yaml.safeDump(rc.results[i].serverStates[serverState]) +
|
||||
" marking run as crashy.");
|
||||
serverCrashed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
if (typeof(instanceInfo.exitStatus) === 'undefined') {
|
||||
|
|
Loading…
Reference in New Issue