1
0
Fork 0

Merge branch 'devel' of https://github.com/arangodb/arangodb into devel

This commit is contained in:
Jan Steemann 2016-02-05 18:20:32 +01:00
commit 99b79acb61
10 changed files with 115 additions and 259 deletions

View File

@ -359,57 +359,69 @@ function analyzeCoreDumpWindows(instanceInfo) {
/// @brief checks of an instance is still alive
////////////////////////////////////////////////////////////////////////////////
function checkInstanceAlive(instanceInfo, options) {
if (options.cluster === false) {
if (instanceInfo.hasOwnProperty('exitStatus')) {
return false;
}
const res = statusExternal(instanceInfo.pid, false);
const ret = res.status === "RUNNING";
if (!ret) {
print("ArangoD with PID " + instanceInfo.pid.pid + " gone:");
print(instanceInfo);
if (res.hasOwnProperty('signal') &&
((res.signal === 11) ||
(res.signal === 6) ||
// Windows sometimes has random numbers in signal...
(require("internal").platform.substr(0, 3) === 'win')
)
) {
const storeArangodPath = "/var/tmp/arangod_" + instanceInfo.pid.pid;
print("Core dump written; copying arangod to " +
instanceInfo.tmpDataDir + " for later analysis.");
res.gdbHint = "Run debugger with 'gdb " +
storeArangodPath + " " + options.coreDirectory +
"/core*" + instanceInfo.pid.pid + "*'";
if (require("internal").platform.substr(0, 3) === 'win') {
// Windows: wait for procdump to do its job...
statusExternal(instanceInfo.monitor, true);
analyzeCoreDumpWindows(instanceInfo);
} else {
fs.copyFile("bin/arangod", storeArangodPath);
analyzeCoreDump(instanceInfo, options, storeArangodPath, instanceInfo.pid.pid);
}
}
instanceInfo.exitStatus = res;
}
if (!ret) {
print("marking crashy");
serverCrashed = true;
}
return ret;
function checkInstanceAliveSingleServer(instanceInfo, options) {
if (instanceInfo.hasOwnProperty('exitStatus')) {
return false;
}
// cluster tests
const res = statusExternal(instanceInfo.pid, false);
const ret = res.status === "RUNNING";
if (!ret) {
print("ArangoD with PID " + instanceInfo.pid.pid + " gone:");
print(instanceInfo);
if (res.hasOwnProperty('signal') &&
((res.signal === 11) ||
(res.signal === 6) ||
// Windows sometimes has random numbers in signal...
(require("internal").platform.substr(0, 3) === 'win')
)
) {
const storeArangodPath = "/var/tmp/arangod_" + instanceInfo.pid.pid;
print("Core dump written; copying arangod to " +
instanceInfo.tmpDataDir + " for later analysis.");
res.gdbHint = "Run debugger with 'gdb " +
storeArangodPath + " " + options.coreDirectory +
"/core*" + instanceInfo.pid.pid + "*'";
if (require("internal").platform.substr(0, 3) === 'win') {
// Windows: wait for procdump to do its job...
statusExternal(instanceInfo.monitor, true);
analyzeCoreDumpWindows(instanceInfo);
} else {
fs.copyFile("bin/arangod", storeArangodPath);
analyzeCoreDump(instanceInfo, options, storeArangodPath, instanceInfo.pid.pid);
}
}
instanceInfo.exitStatus = res;
}
if (!ret) {
print("marking crashy");
serverCrashed = true;
}
return ret;
}
function checkRemoteInstance(pid, wait, options) {
const debug = options.debug || false;
const p = JSON.stringify(pid);
const res = JSON.parse(arango.PUT("/_admin/execute",
`return require("internal").statusExternal(${p}, ${wait});`));
if (debug) {
print(`status of remote process ${p}: ${res.status}`);
}
return res;
}
function checkInstanceAliveCluster(instanceInfo, options) {
let clusterFit = true;
for (let part in instanceInfo.kickstarter.runInfo) {
@ -417,7 +429,7 @@ function checkInstanceAlive(instanceInfo, options) {
for (let pid in instanceInfo.kickstarter.runInfo[part].pids) {
if (instanceInfo.kickstarter.runInfo[part].pids.hasOwnProperty(pid)) {
const checkpid = instanceInfo.kickstarter.runInfo[part].pids[pid];
const ress = statusExternal(checkpid, false);
const ress = checkRemoteInstance(checkpid, false, options);
if (ress.hasOwnProperty('signal') &&
((ress.signal === 11) || (ress.signal === 6))) {
@ -432,7 +444,7 @@ function checkInstanceAlive(instanceInfo, options) {
if (require("internal").platform.substr(0, 3) === 'win') {
// Windows: wait for procdump to do its job...
statusExternal(instanceInfo.monitor, true);
checkRemoteInstance(instanceInfo.monitor, true, options);
analyzeCoreDumpWindows(instanceInfo);
} else {
fs.copyFile("bin/arangod", storeArangodPath);
@ -456,6 +468,14 @@ function checkInstanceAlive(instanceInfo, options) {
}
}
function checkInstanceAlive(instanceInfo, options) {
if (options.cluster === false) {
return checkInstanceAliveSingleServer(instanceInfo, options);
}
return checkInstanceAliveCluster(instanceInfo, options);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief waits for garbage collection using /_admin/execute
////////////////////////////////////////////////////////////////////////////////
@ -1283,7 +1303,7 @@ function startInstance(protocol, options, addArgs, testname, tmpDir) {
let roles = runInfo[j].roles;
let endpoints = runInfo[j].endpoints;
let pos = roles.indexOf("Coordinator");
let pos = roles.indexOf("COORDINATOR");
endpoint = endpoints[pos];
}

View File

@ -324,7 +324,7 @@ launchActions.startServers = function (dispatchers, cmd, isRelaunch) {
if (cmd.extremeVerbosity) {
console.info("Downloading %sLaunchers/%s", url, encode(cmd.name));
}
var res = download(url + "Launchers/" + encode(cmd.name), "", { method: "GET",
var res = download(url + "Dispatcher/Launchers/" + encode(cmd.name), "", { method: "GET",
followRedirects: true });
if (res.code !== 200) {
return {"error": true, "isStartServers": true, "suberror": res};
@ -338,30 +338,32 @@ launchActions.startServers = function (dispatchers, cmd, isRelaunch) {
var servers = info.DBservers.concat(info.Coordinators);
roles = [];
for (i = 0; i < info.DBservers.length; i++) {
roles.push("DBserver");
roles.push("PRIMARY");
}
for (i = 0; i < info.Coordinators.length; i++) {
roles.push("Coordinator");
roles.push("COORDINATOR");
}
pids = [];
endpoints = [];
endpointNames = [];
for (i = 0; i < servers.length; i++) {
id = servers[i];
var serverUrl = url + "Dispatcher/Endpoints/" + encodeURIComponent(id);
if (cmd.extremeVerbosity) {
console.info("Downloading %sTarget/MapIDToEndpoint/%s", url, id);
console.info("Downloading ", serverUrl);
}
res = download(url + "Target/MapIDToEndpoint/" + id);
res = download(serverUrl);
if (res.code !== 200) {
return {"error": true, "pids": pids,
"isStartServers": true, "suberror": res};
}
console.info("Starting server %s",id);
body = JSON.parse(res.body);
ep = JSON.parse(body.node.value);
ep = body.node.value;
port = getPort(ep);
var useSSL = false;
if (roles[i] === "DBserver") {
if (roles[i] === "PRIMARY") {
args = ["--configuration", ArangoServerState.dbserverConfig()];
useSSL = cmd.useSSLonDBservers;
}
@ -372,7 +374,9 @@ launchActions.startServers = function (dispatchers, cmd, isRelaunch) {
args = args.concat([
"--cluster.disable-dispatcher-kickstarter", "true",
"--cluster.disable-dispatcher-frontend", "true",
"--cluster.my-id", id,
"--cluster.my-local-info", id,
"--cluster.my-role", roles[i],
"--cluster.my-address", ep,
"--cluster.agency-prefix", cmd.agency.agencyPrefix,
"--cluster.agency-endpoint", cmd.agency.endpoints[0],
"--server.endpoint"]);

View File

@ -437,70 +437,25 @@ Planner.prototype.makePlan = function() {
// Set up agency data:
var agencyData = this.agencyData = {};
var prefix = agencyData[config.agencyPrefix] = {};
var tmp;
// First the Target, we collect Launchers information at the same time:
tmp = prefix.Target = {};
tmp.Lock = '"UNLOCKED"';
tmp.Version = '"1"';
var dbs = tmp.DBServers = {};
tmp.MapLocalToEndpoint = {}; // will stay empty for now
var map = tmp.MapIDToEndpoint = {};
var s;
var ep;
var prefix = agencyData[config.agencyPrefix] = {Dispatcher: {}};
var endpoints = {};
for (i = 0; i < DBservers.length; i++) {
s = DBservers[i];
dbs[s.id] = '"none"';
ep = exchangePort(dispatchers[s.dispatcher].endpoint,s.port);
ep = exchangeProtocol(ep, config.useSSLonDBservers);
map[s.id] = '"'+ep+'"';
launchers[s.dispatcher].DBservers.push(s.id);
}
var coo = tmp.Coordinators = {};
for (i = 0; i < coordinators.length; i++) {
s = coordinators[i];
coo[s.id] = '"none"';
ep = exchangePort(dispatchers[s.dispatcher].endpoint,s.port);
ep = exchangeProtocol(ep, config.useSSLonCoordinators);
map[s.id] = '"' + ep + '"';
endpoints[s.id] = ep;
launchers[s.dispatcher].DBservers.push(s.id);
}
for (i = 0; i < coordinators.length; i++) {
s = coordinators[i];
ep = exchangePort(dispatchers[s.dispatcher].endpoint,s.port);
ep = exchangeProtocol(ep, config.useSSLonCoordinators);
endpoints[s.id] = ep;
launchers[s.dispatcher].Coordinators.push(s.id);
}
tmp.Databases = { "_system" : '{"name":"_system", "id":"1"}' };
tmp.Collections = { "_system" : {} };
// Now Plan:
prefix.Plan = copy(tmp);
delete prefix.Plan.MapIDToEndpoint;
// Now Current:
prefix.Current = { "Lock" : '"UNLOCKED"',
"Version" : '"1"',
"DBservers" : {},
"Coordinators" : {},
"Databases" : {"_system":{ "name": '"name"', "id": '"1"' }},
"Collections" : {"_system":{}},
"NewServers" : {},
"ServersRegistered": {"Version":'"1"'},
"ShardsCopied" : {} };
// Now Sync:
prefix.Sync = { "ServerStates" : {},
"Problems" : {},
"LatestID" : '"1"',
"Commands" : {},
"HeartbeatIntervalMs": '5000',
"UserVersion" : '"1"' };
tmp = prefix.Sync.Commands;
for (i = 0; i < DBservers; i++) {
tmp[DBservers[i].id] = '"SERVE"';
}
// Finally Launchers:
prefix.Launchers = objmap(launchers, JSON.stringify);
prefix.InitDone = "true";
prefix.Dispatcher.Launchers = objmap(launchers, JSON.stringify);
prefix.Dispatcher.Endpoints = endpoints;
// make commands
tmp = this.commands = [];
var tmp2,j;

View File

@ -231,7 +231,6 @@ void LogAppenderFile::logMessage(LogLevel level, std::string const& message,
}
if (level == LogLevel::FATAL && _fatal2stderr) {
MUTEX_LOCKER(guard, AppendersLock);
// a fatal error. always print this on stderr, too.
WriteStderr(level, message);

View File

@ -389,5 +389,10 @@ void TRI_PrintBacktrace() {
TRI_GetBacktrace(out);
fprintf(stderr, "%s", out.c_str());
#endif
#if TRI_HAVE_PSTACK
char buf[64];
snprintf(buf, 64, "/usr/bin/pstack %i", getpid());
system(buf);
#endif
#endif
}

View File

@ -175,6 +175,7 @@ again:
}
#ifdef TRI_ENABLE_MAINTAINER_MODE
LOG(ERR) << "could not read-lock the read-write lock: " << strerror(rc);
TRI_ASSERT(false);
#endif
LOG(FATAL) << "could not read-lock the read-write lock: " << strerror(rc); FATAL_ERROR_EXIT();
@ -194,6 +195,7 @@ void TRI_ReadUnlockReadWriteLock(TRI_read_write_lock_t* lock) {
if (rc != 0) {
#ifdef TRI_ENABLE_MAINTAINER_MODE
LOG(ERR) << "could not read-unlock the read-write lock: " << strerror(rc);
TRI_ASSERT(false);
#endif
LOG(FATAL) << "could not read-unlock the read-write lock: " << strerror(rc); FATAL_ERROR_EXIT();
@ -244,6 +246,7 @@ void TRI_WriteLockReadWriteLock(TRI_read_write_lock_t* lock) {
LOG(ERR) << "rw-lock deadlock detected";
}
#ifdef TRI_ENABLE_MAINTAINER_MODE
LOG(ERR) << "could not write-lock the read-write lock: " << strerror(rc);
TRI_ASSERT(false);
#endif
LOG(FATAL) << "could not write-lock the read-write lock: " << strerror(rc); FATAL_ERROR_EXIT();
@ -263,6 +266,7 @@ void TRI_WriteUnlockReadWriteLock(TRI_read_write_lock_t* lock) {
if (rc != 0) {
#ifdef TRI_ENABLE_MAINTAINER_MODE
LOG(ERR) << "could not write-unlock the read-write lock: " << strerror(rc);
TRI_ASSERT(false);
#endif
LOG(FATAL) << "could not write-unlock the read-write lock: " << strerror(rc); FATAL_ERROR_EXIT();
@ -311,6 +315,7 @@ void TRI_SignalCondition(TRI_condition_t* cond) {
if (rc != 0) {
#ifdef TRI_ENABLE_MAINTAINER_MODE
LOG(ERR) << "could not signal the condition: " << strerror(rc);
TRI_ASSERT(false);
#endif
LOG(FATAL) << "could not signal the condition: " << strerror(rc); FATAL_ERROR_EXIT();
@ -328,6 +333,7 @@ void TRI_BroadcastCondition(TRI_condition_t* cond) {
if (rc != 0) {
#ifdef TRI_ENABLE_MAINTAINER_MODE
LOG(ERR) << "could not broadcast the condition: " << strerror(rc);
TRI_ASSERT(false);
#endif
LOG(FATAL) << "could not broadcast the condition: " << strerror(rc); FATAL_ERROR_EXIT();
@ -345,6 +351,7 @@ void TRI_WaitCondition(TRI_condition_t* cond) {
if (rc != 0) {
#ifdef TRI_ENABLE_MAINTAINER_MODE
LOG(ERR) << "could not wait for the condition: " << strerror(rc);
TRI_ASSERT(false);
#endif
LOG(FATAL) << "could not wait for the condition: " << strerror(rc); FATAL_ERROR_EXIT();
@ -382,6 +389,7 @@ bool TRI_TimedWaitCondition(TRI_condition_t* cond, uint64_t delay) {
}
#ifdef TRI_ENABLE_MAINTAINER_MODE
LOG(ERR) << "could not wait for the condition: " << strerror(rc);
TRI_ASSERT(false);
#endif
LOG(FATAL) << "could not wait for the condition: " << strerror(rc); FATAL_ERROR_EXIT();
@ -399,6 +407,7 @@ void TRI_LockCondition(TRI_condition_t* cond) {
if (rc != 0) {
#ifdef TRI_ENABLE_MAINTAINER_MODE
LOG(ERR) << "could not lock the condition: " << strerror(rc);
TRI_ASSERT(false);
#endif
LOG(FATAL) << "could not lock the condition: " << strerror(rc); FATAL_ERROR_EXIT();
@ -414,6 +423,7 @@ void TRI_UnlockCondition(TRI_condition_t* cond) {
if (rc != 0) {
#ifdef TRI_ENABLE_MAINTAINER_MODE
LOG(ERR) << "could not unlock the condition: " << strerror(rc);
TRI_ASSERT(false);
#endif
LOG(FATAL) << "could not unlock the condition: " << strerror(rc); FATAL_ERROR_EXIT();

View File

@ -47,6 +47,8 @@
#define TRI_PLATFORM "solaris"
#define TRI_HAVE_PSTACK 1
////////////////////////////////////////////////////////////////////////////////
/// @brief enabled features
////////////////////////////////////////////////////////////////////////////////

View File

@ -1,139 +0,0 @@
/*jshint globalstrict:false, strict:false */
var agencyData = {
"arango" : {
"Sync" : {
"LatestID" : "\"1\"",
"Problems" : {},
"UserVersion" : "\"1\"",
"ServerStates" : {},
"HeartbeatIntervalMs" : "5000",
"Commands" : {}
},
"Current" : {
"Collections" : {
"_system" : {}
},
"Version" : "\"1\"",
"ShardsCopied" : {},
"NewServers" : {},
"Coordinators" : {},
"Lock" : "\"UNLOCKED\"",
"DBservers" : {},
"ServersRegistered" : {
"Version" : "\"1\""
},
"Databases" : {
"_system" : {
"id" : "\"1\"",
"name" : "\"name\""
}
}
},
"Plan" : {
"Coordinators" : {
},
"Databases" : {
"_system" : "{\"name\":\"_system\", \"id\":\"1\"}"
},
"DBServers" : {
},
"Version" : "\"1\"",
"Collections" : {
"_system" : {
}
},
"Lock" : "\"UNLOCKED\""
},
"Launchers" : {
},
"Target" : {
"Coordinators" : {
},
"MapIDToEndpoint" : {
},
"Collections" : {
"_system" : {}
},
"Version" : "\"1\"",
"MapLocalToID" : {},
"Databases" : {
"_system" : "{\"name\":\"_system\", \"id\":\"1\"}"
},
"DBServers" : {
},
"Lock" : "\"UNLOCKED\""
}
}
};
var download = require("internal").download;
var print = require("internal").print;
var wait = require("internal").wait;
function encode (st) {
var st2 = "";
var i;
for (i = 0; i < st.length; i++) {
if (st[i] === "_") {
st2 += "@U";
}
else if (st[i] === "@") {
st2 += "@@";
}
else {
st2 += st[i];
}
}
return encodeURIComponent(st2);
}
function sendToAgency (agencyURL, path, obj) {
var res;
var body;
print("Sending",path," to agency...");
if (typeof obj === "string") {
var count = 0;
while (count++ <= 2) {
body = "value="+encodeURIComponent(obj);
print("Body:", body);
print("URL:", agencyURL+path);
res = download(agencyURL+path,body,
{"method":"PUT", "followRedirects": true,
"headers": { "Content-Type": "application/x-www-form-urlencoded"}});
if (res.code === 201 || res.code === 200) {
return true;
}
wait(3); // wait 3 seconds before trying again
}
return res;
}
if (typeof obj !== "object") {
return "Strange object found: not a string or object";
}
var keys = Object.keys(obj);
var i;
if (keys.length !== 0) {
for (i = 0; i < keys.length; i++) {
res = sendToAgency (agencyURL, path+encode(keys[i])+"/", obj[keys[i]]);
if (res !== true) {
return res;
}
}
return true;
}
else {
body = "dir=true";
res = download(agencyURL+path, body,
{"method": "PUT", "followRedirects": true,
"headers": { "Content-Type": "application/x-www-form-urlencoded"}});
if (res.code !== 201 && res.code !== 200) {
return res;
}
return true;
}
}
print("Starting to send data to Agency...");
var res = sendToAgency("http://localhost:4001/v2/keys", "/", agencyData);
print("Result:",res);

View File

@ -19,8 +19,6 @@ echo Starting agency...
docker run --detach=true -p 4001:4001 --name=etcd -v /tmp/cluster/etcd:/data microbox/etcd:latest etcd -name agency > /tmp/cluster/etcd.id
sleep 1
echo Initializing agency...
docker run -it --link=etcd:agency --rm neunhoef/arangodb_cluster arangosh --javascript.execute /scripts/init_agency.js > /tmp/cluster/init_agency.log
echo Starting discovery...
docker run --detach=true --link=etcd:agency -v /tmp/cluster/discovery:/discovery --name discovery neunhoef/arangodb_cluster arangosh --javascript.execute scripts/discover.js > /tmp/cluster/discovery.id

View File

@ -10,6 +10,8 @@ else
PS='/'
fi;
ulimit -n 2048
export PORT=`expr 1024 + $RANDOM`
export ETCD_NONO_WAL_SYNC=1