1
0
Fork 0

Fix agency shutdown bug. (#3683)

* Fix agency shutdown bug.
* Remove precondition that was not needed in AgencyComm::removeValues.
* Fail fatally if threads do not shut down.
This commit is contained in:
Max Neunhöffer 2017-11-14 16:33:46 +01:00 committed by GitHub
parent cc1fc716a4
commit 766ab7c8cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 34 additions and 12 deletions

View File

@ -913,8 +913,7 @@ AgencyCommResult AgencyComm::getValues(std::string const& key) {
AgencyCommResult AgencyComm::removeValues(std::string const& key,
bool recursive) {
AgencyWriteTransaction transaction(
AgencyOperation(key, AgencySimpleOperationType::DELETE_OP),
AgencyPrecondition(key, AgencyPrecondition::Type::EMPTY, false));
AgencyOperation(key, AgencySimpleOperationType::DELETE_OP));
return sendTransactionWithFailover(transaction);
}

View File

@ -333,11 +333,22 @@ void AgencyFeature::stop() {
}
}
// delete the Agent object here ensures it shuts down all of its threads
// this is a precondition that it must fulfill before we can go on with the
// shutdown
_agent.reset();
// Wait until all agency threads have been shut down. Note that the
// actual agent object is only destroyed in the destructor to allow
// server jobs from RestAgencyHandlers to complete without incident:
_agent->waitForThreadsStop();
}
AGENT = nullptr;
}
void AgencyFeature::unprepare() {
if (!isEnabled()) {
return;
}
// delete the Agent object here ensures it shuts down all of its threads
// this is a precondition that it must fulfill before we can go on with the
// shutdown
_agent.reset();
}

View File

@ -45,6 +45,7 @@ class AgencyFeature : virtual public application_features::ApplicationFeature {
void start() override final;
void beginShutdown() override final;
void stop() override final;
void unprepare() override final;
private:
bool _activated;

View File

@ -86,22 +86,31 @@ bool Agent::mergeConfiguration(VPackSlice const& persisted) {
/// Dtor shuts down thread
Agent::~Agent() {
waitForThreadsStop();
// This usually was already done called from AgencyFeature::unprepare,
// but since this only waits for the threads to stop, it can be done
// multiple times, and we do it just in case the Agent object was
// created but never really started. Here, we exit with a fatal error
// if the threads do not stop in time.
}
// Give up if some subthread breaks shutdown
/// Wait until threads are terminated:
void Agent::waitForThreadsStop() {
// It is allowed to call this multiple times, we do so from the constructor
// and from AgencyFeature::unprepare.
int counter = 0;
while (_constituent.isRunning() || _compactor.isRunning() ||
(_config.supervision() && _supervision.isRunning()) ||
(_inception != nullptr && _inception->isRunning())) {
usleep(100000);
// emit warning after 15 seconds
if (++counter == 10 * 15) {
// fail fatally after 5 mins:
if (++counter >= 10 * 60 * 5) {
LOG_TOPIC(FATAL, Logger::AGENCY) << "some agency thread did not finish";
FATAL_ERROR_EXIT();
}
}
shutdown();
shutdown(); // wait for the main Agent thread to terminate
}
/// State machine

View File

@ -51,6 +51,9 @@ class Agent : public arangodb::Thread,
/// @brief Clean up
~Agent();
/// @brief bring down threads, can be called multiple times.
void waitForThreadsStop();
/// @brief Get current term
term_t term() const;

View File

@ -344,7 +344,6 @@ function SynchronousReplicationSuite () {
tearDown : function () {
db._drop(cn);
global.ArangoAgency.remove('Target/FailedServers');
global.ArangoAgency.set('Target/FailedServers', {});
},