1
0
Fork 0

Better shutdown handling

This commit is contained in:
Andreas Streichardt 2016-08-23 12:51:22 +02:00
parent a8780ff4bb
commit 47a0f8602a
3 changed files with 34 additions and 10 deletions

View File

@ -408,7 +408,7 @@ void Constituent::beginShutdown() {
/// Start operation
bool Constituent::start(TRI_vocbase_t* vocbase,
aql::QueryRegistry* queryRegistry) {
TRI_ASSERT(vocbase != nullptr);
_vocbase = vocbase;
_queryRegistry = queryRegistry;
@ -420,7 +420,7 @@ bool Constituent::start(TRI_vocbase_t* vocbase,
/// Get persisted information and run election process
void Constituent::run() {
LOG(WARN) << "Starting constituent";
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Starting Constituent";
_id = _agent->config().id();
TRI_ASSERT(_vocbase != nullptr);

View File

@ -132,7 +132,7 @@ std::vector<check_t> Supervision::checkDBServers() {
report->add("LastHeartbeatAcked",
VPackValue(
timepointToString(std::chrono::system_clock::now())));
report->add("Status", VPackValue("GOOD"));
report->add("Status", VPackValue(Supervision::HEALTH_STATUS_GOOD));
} else {
std::chrono::seconds t{0};
t = std::chrono::duration_cast<std::chrono::seconds>(
@ -254,17 +254,17 @@ std::vector<check_t> Supervision::checkCoordinators() {
report->add("LastHeartbeatAcked",
VPackValue(
timepointToString(std::chrono::system_clock::now())));
report->add("Status", VPackValue("GOOD"));
report->add("Status", VPackValue(Supervision::HEALTH_STATUS_GOOD));
} else {
std::chrono::seconds t{0};
t = std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now()-stringToTimepoint(lastHeartbeatAcked));
if (t.count() > _gracePeriod) { // Failure
if (lastStatus == "BAD") {
report->add("Status", VPackValue("FAILED"));
if (lastStatus == Supervision::HEALTH_STATUS_BAD) {
report->add("Status", VPackValue(Supervision::HEALTH_STATUS_FAILED));
}
} else {
report->add("Status", VPackValue("BAD"));
report->add("Status", VPackValue(Supervision::HEALTH_STATUS_BAD));
}
}
@ -346,6 +346,11 @@ void Supervision::run() {
while (!this->isStopping()) {
updateSnapshot();
// mop: always do health checks so shutdown is able to detect if a server failed otherwise
if (_agent->leading()) {
doChecks();
}
if (isShuttingDown()) {
handleShutdown();
} else if (_agent->leading()) {
@ -365,16 +370,32 @@ bool Supervision::isShuttingDown() {
}
}
bool Supervision::serverGood(const std::string& serverName) {
try {
const std::string status = _snapshot(healthPrefix + "/" + serverName + "/Status").getString();
return status == Supervision::HEALTH_STATUS_GOOD;
} catch (...) {
return false;
}
}
void Supervision::handleShutdown() {
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Initiating shutdown";
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Waiting for clients to shut down";
Node::Children const& serversRegistered = _snapshot(currentServersRegisteredPrefix).children();
bool serversCleared = true;
for (auto const& server : serversRegistered) {
if (server.first == "Version") {
continue;
}
LOG_TOPIC(DEBUG, Logger::AGENCY)
<< "Waiting for " << server.first << " to shutdown";
if (!serverGood(server.first)) {
LOG_TOPIC(WARN, Logger::AGENCY)
<< "Server " << server.first << " did not shutdown properly it seems!";
continue;
}
serversCleared = false;
}
@ -390,7 +411,6 @@ bool Supervision::handleJobs() {
}
// Do supervision
doChecks();
shrinkCluster();
workJobs();
@ -398,7 +418,6 @@ bool Supervision::handleJobs() {
}
void Supervision::workJobs() {
Node::Children const& todos = _snapshot(toDoPrefix).children();
Node::Children const& pends = _snapshot(pendingPrefix).children();

View File

@ -108,6 +108,9 @@ class Supervision : public arangodb::Thread {
void wakeUp();
private:
static constexpr const char* HEALTH_STATUS_GOOD = "GOOD";
static constexpr const char* HEALTH_STATUS_BAD = "BAD";
static constexpr const char* HEALTH_STATUS_FAILED = "FAILED";
/// @brief Update agency prefix from agency itself
bool updateAgencyPrefix (size_t nTries = 10, int intervalSec = 1);
@ -165,6 +168,8 @@ class Supervision : public arangodb::Thread {
uint64_t _jobId;
uint64_t _jobIdMax;
bool serverGood(const std::string&);
static std::string _agencyPrefix;
};