mirror of https://gitee.com/bigwinds/arangodb
* agents' is obtained from leader's configuration * corrections in Supervision for advertised endpoints * change log * Updated Documentation for cluster/health. * Unified naming convention. * Fixed missing update of volatile fields. * Set version in right order. * Removed debug output. * Fixed jslint - missing ;
This commit is contained in:
parent
17a95710bd
commit
52cff7ad55
|
@ -146,6 +146,9 @@ v3.4.0-rc.5 (XXXX-XX-XX)
|
||||||
* fix move leader shard: wait until all but the old leader are in sync.
|
* fix move leader shard: wait until all but the old leader are in sync.
|
||||||
This fixes some unstable tests.
|
This fixes some unstable tests.
|
||||||
|
|
||||||
|
* cluster health features more elaborate agent records
|
||||||
|
|
||||||
|
* agency's supervision edited for advertised endpoints
|
||||||
|
|
||||||
v3.4.0-rc.4 (2018-11-04)
|
v3.4.0-rc.4 (2018-11-04)
|
||||||
------------------------
|
------------------------
|
||||||
|
|
|
@ -12,14 +12,23 @@ Queries the health of the cluster for monitoring purposes. The response is a JSO
|
||||||
- `Endpoint`: A string representing the network endpoint of the server.
|
- `Endpoint`: A string representing the network endpoint of the server.
|
||||||
- `Role`: The role the server plays. Possible values are `"AGENT"`, `"COORDINATOR"`, and `"DBSERVER"`.
|
- `Role`: The role the server plays. Possible values are `"AGENT"`, `"COORDINATOR"`, and `"DBSERVER"`.
|
||||||
- `CanBeDeleted`: Boolean representing whether the node can safely be removed from the cluster.
|
- `CanBeDeleted`: Boolean representing whether the node can safely be removed from the cluster.
|
||||||
|
- `Version`: Version String of ArangoDB used by that node.
|
||||||
Additionally, if the node is a Coordinator or DBServer, it will also have the following attributes:
|
- `Engine`: Storage Engine used by that node.
|
||||||
- `AdvertisedEndpoint`: A string representing the advertised endpoint (e.g. external IP address or load balancer, optional)
|
- `Status`: A string indicating the health of the node as assessed by the supervision (agency). This should be considered primary source of truth for coordinator and dbservers node health. If the node is responding normally to requests, it is `"GOOD"`. If it has missed one heartbeat, it is `"BAD"`. If it has been declared failed by the supervision, which occurs after missing heartbeats for about 15 seconds, it will be marked `"FAILED"`.
|
||||||
- `Status`: A string indicating the health of the node as assessed by the supervision (agency). This should be considered primary source of truth for node health. If the node is responding normally to requests, it is `"GOOD"`. If it has missed one heartbeat, it is `"BAD"`. If it has been declared failed by the supervision, which occurs after missing heartbeats for about 15 seconds, it will be marked `"FAILED"`.
|
|
||||||
- `SyncStatus`: The last sync status reported by the node. This value is primarily used to determine the value of `Status`. Possible values include `"UNKNOWN"`, `"UNDEFINED"`, `"STARTUP"`, `"STOPPING"`, `"STOPPED"`, `"SERVING"`, `"SHUTDOWN"`.
|
Additionally it will also have the following attributes for
|
||||||
- `ShortName`: A string representing the shortname of the server, e.g. `"DBServer1"`.
|
- Coordinators and DBServer:
|
||||||
- `Timestamp`: ISO 8601 timestamp specifying the last heartbeat received.
|
- `SyncStatus`: The last sync status reported by the node. This value is primarily used to determine the value of `Status`. Possible values include `"UNKNOWN"`, `"UNDEFINED"`, `"STARTUP"`, `"STOPPING"`, `"STOPPED"`, `"SERVING"`, `"SHUTDOWN"`.
|
||||||
- `Host`: An optional string, specifying the host machine if known.
|
- `ShortName`: A string representing the shortname of the server, e.g. `"Coordinator0001"`.
|
||||||
|
- `Timestamp`: ISO 8601 timestamp specifying the last heartbeat received.
|
||||||
|
- `Host`: An optional string, specifying the host machine if known.
|
||||||
|
- Only Coordinators:
|
||||||
|
- `AdvertisedEndpoint`: A string representing the advertised endpoint, if set. (e.g. external IP address or load balancer, optional)
|
||||||
|
- Agents:
|
||||||
|
- "Leader": ID of the agent this node regards as leader.
|
||||||
|
- "Leading": Whether this agent is the leader (true) or not (false).
|
||||||
|
- "LastAckedTime": Time since last `acked` in seconds.
|
||||||
|
|
||||||
|
|
||||||
@RESTRETURNCODES
|
@RESTRETURNCODES
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,7 @@
|
||||||
#include "Logger/Logger.h"
|
#include "Logger/Logger.h"
|
||||||
#include "Rest/HttpRequest.h"
|
#include "Rest/HttpRequest.h"
|
||||||
#include "Rest/Version.h"
|
#include "Rest/Version.h"
|
||||||
|
#include "StorageEngine/EngineSelectorFeature.h"
|
||||||
|
|
||||||
using namespace arangodb;
|
using namespace arangodb;
|
||||||
|
|
||||||
|
@ -547,6 +548,8 @@ RestStatus RestAgencyHandler::handleConfig() {
|
||||||
body.add("commitIndex", Value(last));
|
body.add("commitIndex", Value(last));
|
||||||
_agent->lastAckedAgo(body);
|
_agent->lastAckedAgo(body);
|
||||||
body.add("configuration", _agent->config().toBuilder()->slice());
|
body.add("configuration", _agent->config().toBuilder()->slice());
|
||||||
|
body.add("engine", VPackValue(EngineSelectorFeature::engineName()));
|
||||||
|
body.add("version", VPackValue(ARANGODB_VERSION));
|
||||||
}
|
}
|
||||||
|
|
||||||
generateResult(rest::ResponseCode::OK, body.slice());
|
generateResult(rest::ResponseCode::OK, body.slice());
|
||||||
|
|
|
@ -61,18 +61,14 @@ struct HealthRecord {
|
||||||
|
|
||||||
HealthRecord(
|
HealthRecord(
|
||||||
std::string const& sn, std::string const& ep, std::string const& ho,
|
std::string const& sn, std::string const& ep, std::string const& ho,
|
||||||
std::string const& en, std::string const& sv) :
|
std::string const& en, std::string const& sv, std::string const& ae) :
|
||||||
shortName(sn), endpoint(ep), hostId(ho), serverVersion(sv),
|
shortName(sn), endpoint(ep), advertisedEndpoint(ae), hostId(ho),
|
||||||
engine(en), version(0) {}
|
serverVersion(sv), engine(en), version(0) {}
|
||||||
|
|
||||||
explicit HealthRecord(Node const& node) {
|
explicit HealthRecord(Node const& node) {
|
||||||
*this = node;
|
*this = node;
|
||||||
}
|
}
|
||||||
|
|
||||||
HealthRecord(HealthRecord const& other) {
|
|
||||||
*this = other;
|
|
||||||
}
|
|
||||||
|
|
||||||
HealthRecord& operator=(Node const& node) {
|
HealthRecord& operator=(Node const& node) {
|
||||||
version = 0;
|
version = 0;
|
||||||
if (shortName.empty()) {
|
if (shortName.empty()) {
|
||||||
|
@ -98,12 +94,16 @@ struct HealthRecord {
|
||||||
if (node.has("AdvertisedEndpoint")) {
|
if (node.has("AdvertisedEndpoint")) {
|
||||||
version = 3;
|
version = 3;
|
||||||
advertisedEndpoint = node.hasAsString("AdvertisedEndpoint").first;
|
advertisedEndpoint = node.hasAsString("AdvertisedEndpoint").first;
|
||||||
|
} else {
|
||||||
if (node.has("Engine") && node.has("Version")) {
|
advertisedEndpoint.clear();
|
||||||
version = 4;
|
}
|
||||||
engine = node.hasAsString("Engine").first;
|
if (node.has("Engine") && node.has("Version")) {
|
||||||
serverVersion = node.hasAsString("Version").first;
|
version = 4;
|
||||||
}
|
engine = node.hasAsString("Engine").first;
|
||||||
|
serverVersion = node.hasAsString("Version").first;
|
||||||
|
} else {
|
||||||
|
engine.clear();
|
||||||
|
serverVersion.clear();
|
||||||
}
|
}
|
||||||
} else if (node.has("LastHeartbeatStatus")) {
|
} else if (node.has("LastHeartbeatStatus")) {
|
||||||
version = 1;
|
version = 1;
|
||||||
|
@ -119,29 +119,18 @@ struct HealthRecord {
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
HealthRecord& operator=(HealthRecord const& other) {
|
|
||||||
shortName = other.shortName;
|
|
||||||
syncStatus = other.syncStatus;
|
|
||||||
status = other.status;
|
|
||||||
advertisedEndpoint = other.advertisedEndpoint;
|
|
||||||
endpoint = other.endpoint;
|
|
||||||
hostId = other.hostId;
|
|
||||||
engine = other.engine;
|
|
||||||
serverVersion = other.serverVersion;
|
|
||||||
version = other.version;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
void toVelocyPack(VPackBuilder& obj) const {
|
void toVelocyPack(VPackBuilder& obj) const {
|
||||||
TRI_ASSERT(obj.isOpenObject());
|
TRI_ASSERT(obj.isOpenObject());
|
||||||
obj.add("ShortName", VPackValue(shortName));
|
obj.add("ShortName", VPackValue(shortName));
|
||||||
obj.add("Endpoint", VPackValue(endpoint));
|
obj.add("Endpoint", VPackValue(endpoint));
|
||||||
obj.add("AdvertisedEndpoint", VPackValue(advertisedEndpoint));
|
|
||||||
obj.add("Host", VPackValue(hostId));
|
obj.add("Host", VPackValue(hostId));
|
||||||
obj.add("SyncStatus", VPackValue(syncStatus));
|
obj.add("SyncStatus", VPackValue(syncStatus));
|
||||||
obj.add("Status", VPackValue(status));
|
obj.add("Status", VPackValue(status));
|
||||||
obj.add("Version", VPackValue(serverVersion));
|
obj.add("Version", VPackValue(serverVersion));
|
||||||
obj.add("Engine", VPackValue(engine));
|
obj.add("Engine", VPackValue(engine));
|
||||||
|
if (!advertisedEndpoint.empty()) {
|
||||||
|
obj.add("AdvertisedEndpoint", VPackValue(advertisedEndpoint));
|
||||||
|
}
|
||||||
if (syncTime.empty()) {
|
if (syncTime.empty()) {
|
||||||
obj.add("Timestamp",
|
obj.add("Timestamp",
|
||||||
VPackValue(timepointToString(std::chrono::system_clock::now())));
|
VPackValue(timepointToString(std::chrono::system_clock::now())));
|
||||||
|
@ -152,7 +141,13 @@ struct HealthRecord {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool statusDiff(HealthRecord const& other) {
|
bool statusDiff(HealthRecord const& other) {
|
||||||
return (status != other.status || syncStatus != other.syncStatus);
|
return status != other.status ||
|
||||||
|
syncStatus != other.syncStatus ||
|
||||||
|
advertisedEndpoint != other.advertisedEndpoint ||
|
||||||
|
serverVersion != other.serverVersion ||
|
||||||
|
engine != other.engine ||
|
||||||
|
hostId != other.hostId ||
|
||||||
|
endpoint != other.endpoint;
|
||||||
}
|
}
|
||||||
|
|
||||||
friend std::ostream& operator<<(std::ostream& o, HealthRecord const& hr) {
|
friend std::ostream& operator<<(std::ostream& o, HealthRecord const& hr) {
|
||||||
|
@ -481,18 +476,18 @@ std::vector<check_t> Supervision::check(std::string const& type) {
|
||||||
if (serversRegistered.has(enPath)) {
|
if (serversRegistered.has(enPath)) {
|
||||||
engine = serversRegistered.hasAsString(enPath).first;
|
engine = serversRegistered.hasAsString(enPath).first;
|
||||||
}
|
}
|
||||||
|
//"/arango/Current/<serverId>/externalEndpoint"
|
||||||
|
std::string externalEndpoint;
|
||||||
// "/arango/Current/<serverId>/externalEndpoint"
|
std::string extEndPath = serverID + "/advertisedEndpoint";
|
||||||
/*std::string externalEndpoint;
|
|
||||||
std::string extEndPath = serverID + "/externalEndpoint";
|
|
||||||
if (serversRegistered.has(extEndPath)) {
|
if (serversRegistered.has(extEndPath)) {
|
||||||
externalEndpoint = serversRegistered.hasAsString(extEndPath).first;
|
externalEndpoint = serversRegistered.hasAsString(extEndPath).first;
|
||||||
}*/
|
}
|
||||||
|
|
||||||
// Health records from persistence, from transience and a new one
|
// Health records from persistence, from transience and a new one
|
||||||
HealthRecord transist(shortName, endpoint, hostId, engine, serverVersion);
|
HealthRecord transist(
|
||||||
HealthRecord persist(shortName, endpoint, hostId, engine, serverVersion);
|
shortName, endpoint, hostId, engine, serverVersion, externalEndpoint);
|
||||||
|
HealthRecord persist(
|
||||||
|
shortName, endpoint, hostId, engine, serverVersion, externalEndpoint);
|
||||||
|
|
||||||
// Get last health entries from transient and persistent key value stores
|
// Get last health entries from transient and persistent key value stores
|
||||||
if (_transient.has(healthPrefix + serverID)) {
|
if (_transient.has(healthPrefix + serverID)) {
|
||||||
|
@ -519,6 +514,13 @@ std::vector<check_t> Supervision::check(std::string const& type) {
|
||||||
transist.syncTime = syncTime;
|
transist.syncTime = syncTime;
|
||||||
transist.syncStatus = syncStatus;
|
transist.syncStatus = syncStatus;
|
||||||
|
|
||||||
|
// update volatile values that may change
|
||||||
|
transist.advertisedEndpoint = externalEndpoint;
|
||||||
|
transist.serverVersion = serverVersion;
|
||||||
|
transist.engine = engine;
|
||||||
|
transist.hostId = hostId;
|
||||||
|
transist.endpoint = endpoint;
|
||||||
|
|
||||||
// Calculate elapsed since lastAcked
|
// Calculate elapsed since lastAcked
|
||||||
auto elapsed = std::chrono::duration<double>(
|
auto elapsed = std::chrono::duration<double>(
|
||||||
std::chrono::system_clock::now() - lastAckedTime);
|
std::chrono::system_clock::now() - lastAckedTime);
|
||||||
|
|
|
@ -429,18 +429,13 @@ static void JS_Agency(v8::FunctionCallbackInfo<v8::Value> const& args) {
|
||||||
}
|
}
|
||||||
|
|
||||||
VPackBuilder builder;
|
VPackBuilder builder;
|
||||||
{ VPackArrayBuilder a(&builder);
|
|
||||||
{ VPackArrayBuilder b(&builder);
|
|
||||||
builder.add(VPackValue("/.agency"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
AgencyComm comm;
|
AgencyComm comm;
|
||||||
AgencyCommResult result =
|
AgencyCommResult result =
|
||||||
comm.sendWithFailover(
|
comm.sendWithFailover(
|
||||||
arangodb::rest::RequestType::POST,
|
arangodb::rest::RequestType::GET,
|
||||||
AgencyCommManager::CONNECTION_OPTIONS._requestTimeout,
|
AgencyCommManager::CONNECTION_OPTIONS._requestTimeout,
|
||||||
std::string("/_api/agency/read"), builder.slice());
|
std::string("/_api/agency/config"), builder.slice());
|
||||||
|
|
||||||
if (!result.successful()) {
|
if (!result.successful()) {
|
||||||
THROW_AGENCY_EXCEPTION(result);
|
THROW_AGENCY_EXCEPTION(result);
|
||||||
|
|
|
@ -188,7 +188,7 @@ actions.defineHttp({
|
||||||
while (true) {
|
while (true) {
|
||||||
var mode = global.ArangoAgency.read([["/arango/Supervision/State/Mode"]])[0].
|
var mode = global.ArangoAgency.read([["/arango/Supervision/State/Mode"]])[0].
|
||||||
arango.Supervision.State.Mode;
|
arango.Supervision.State.Mode;
|
||||||
|
|
||||||
if (body === "on" && mode === "Maintenance") {
|
if (body === "on" && mode === "Maintenance") {
|
||||||
res.body = JSON.stringify({
|
res.body = JSON.stringify({
|
||||||
error: false,
|
error: false,
|
||||||
|
@ -202,7 +202,7 @@ actions.defineHttp({
|
||||||
}
|
}
|
||||||
|
|
||||||
wait(0.1);
|
wait(0.1);
|
||||||
|
|
||||||
if (new Date().getTime() > waitUntil) {
|
if (new Date().getTime() > waitUntil) {
|
||||||
res.responseCode = actions.HTTP_GATEWAY_TIMEOUT;
|
res.responseCode = actions.HTTP_GATEWAY_TIMEOUT;
|
||||||
res.body = JSON.stringify({
|
res.body = JSON.stringify({
|
||||||
|
@ -212,10 +212,10 @@ actions.defineHttp({
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ;
|
return ;
|
||||||
|
|
||||||
}});
|
}});
|
||||||
// //////////////////////////////////////////////////////////////////////////////
|
// //////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -540,8 +540,50 @@ actions.defineHttp({
|
||||||
return Health;
|
return Health;
|
||||||
}, Health);
|
}, Health);
|
||||||
|
|
||||||
Object.entries(agency[0]['.agency'].pool).forEach(([key, value]) => {
|
Object.entries(agency.configuration.pool).forEach(([key, value]) => {
|
||||||
Health[key] = {Endpoint: value, Role: 'Agent', CanBeDeleted: false};
|
|
||||||
|
if (Health.hasOwnProperty(key)) {
|
||||||
|
Health[key].Endpoint = value;
|
||||||
|
Health[key].Role = 'Agent';
|
||||||
|
Health[key].CanBeDeleted = false;
|
||||||
|
} else {
|
||||||
|
Health[key] = {Endpoint: value, Role: 'Agent', CanBeDeleted: false};
|
||||||
|
}
|
||||||
|
|
||||||
|
var options = { timeout: 5 };
|
||||||
|
var op = ArangoClusterComm.asyncRequest(
|
||||||
|
'GET', value, req.database, '/_api/agency/config', '', {}, options);
|
||||||
|
var r = ArangoClusterComm.wait(op);
|
||||||
|
|
||||||
|
if (r.status === 'RECEIVED') {
|
||||||
|
var record = JSON.parse(r.body);
|
||||||
|
Health[key].Version = record.version;
|
||||||
|
Health[key].Engine = record.engine;
|
||||||
|
Health[key].Leader = record.leaderId;
|
||||||
|
if (record.hasOwnProperty("lastAcked")) {
|
||||||
|
Health[key].Leading = true;
|
||||||
|
Object.entries(record.lastAcked).forEach(([k,v]) => {
|
||||||
|
if (Health.hasOwnProperty(k)) {
|
||||||
|
Health[k].LastAckedTime = v.lastAckedTime;
|
||||||
|
} else {
|
||||||
|
Health[k] = {LastAckedTime: v.lastAckedTime};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Health[key].Status = "GOOD";
|
||||||
|
} else {
|
||||||
|
Health[key].Status = "BAD";
|
||||||
|
if (r.status === 'TIMEOUT') {
|
||||||
|
Health[key].Error = "TIMEOUT";
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
Health[key].Error = JSON.parse(r.body);
|
||||||
|
} catch (err) {
|
||||||
|
Health[key].Error = "UNKNOWN";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
actions.resultOk(req, res, actions.HTTP_OK, {Health, ClusterId: clusterId});
|
actions.resultOk(req, res, actions.HTTP_OK, {Health, ClusterId: clusterId});
|
||||||
|
|
Loading…
Reference in New Issue