mirror of https://gitee.com/bigwinds/arangodb
Fix agency shutdown bug. (#3683)
* Fix agency shutdown bug. * Remove precondition that was not needed in AgencyComm::removeValues. * Fail fatally if threads do not shut down.
This commit is contained in:
parent
cc1fc716a4
commit
766ab7c8cf
|
@ -913,8 +913,7 @@ AgencyCommResult AgencyComm::getValues(std::string const& key) {
|
||||||
AgencyCommResult AgencyComm::removeValues(std::string const& key,
|
AgencyCommResult AgencyComm::removeValues(std::string const& key,
|
||||||
bool recursive) {
|
bool recursive) {
|
||||||
AgencyWriteTransaction transaction(
|
AgencyWriteTransaction transaction(
|
||||||
AgencyOperation(key, AgencySimpleOperationType::DELETE_OP),
|
AgencyOperation(key, AgencySimpleOperationType::DELETE_OP));
|
||||||
AgencyPrecondition(key, AgencyPrecondition::Type::EMPTY, false));
|
|
||||||
|
|
||||||
return sendTransactionWithFailover(transaction);
|
return sendTransactionWithFailover(transaction);
|
||||||
}
|
}
|
||||||
|
|
|
@ -333,11 +333,22 @@ void AgencyFeature::stop() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// delete the Agent object here ensures it shuts down all of its threads
|
// Wait until all agency threads have been shut down. Note that the
|
||||||
// this is a precondition that it must fulfill before we can go on with the
|
// actual agent object is only destroyed in the destructor to allow
|
||||||
// shutdown
|
// server jobs from RestAgencyHandlers to complete without incident:
|
||||||
_agent.reset();
|
_agent->waitForThreadsStop();
|
||||||
}
|
}
|
||||||
|
|
||||||
AGENT = nullptr;
|
AGENT = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AgencyFeature::unprepare() {
|
||||||
|
if (!isEnabled()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// delete the Agent object here ensures it shuts down all of its threads
|
||||||
|
// this is a precondition that it must fulfill before we can go on with the
|
||||||
|
// shutdown
|
||||||
|
_agent.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,7 @@ class AgencyFeature : virtual public application_features::ApplicationFeature {
|
||||||
void start() override final;
|
void start() override final;
|
||||||
void beginShutdown() override final;
|
void beginShutdown() override final;
|
||||||
void stop() override final;
|
void stop() override final;
|
||||||
|
void unprepare() override final;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool _activated;
|
bool _activated;
|
||||||
|
|
|
@ -86,22 +86,31 @@ bool Agent::mergeConfiguration(VPackSlice const& persisted) {
|
||||||
|
|
||||||
/// Dtor shuts down thread
|
/// Dtor shuts down thread
|
||||||
Agent::~Agent() {
|
Agent::~Agent() {
|
||||||
|
waitForThreadsStop();
|
||||||
|
// This usually was already done called from AgencyFeature::unprepare,
|
||||||
|
// but since this only waits for the threads to stop, it can be done
|
||||||
|
// multiple times, and we do it just in case the Agent object was
|
||||||
|
// created but never really started. Here, we exit with a fatal error
|
||||||
|
// if the threads do not stop in time.
|
||||||
|
}
|
||||||
|
|
||||||
// Give up if some subthread breaks shutdown
|
/// Wait until threads are terminated:
|
||||||
|
void Agent::waitForThreadsStop() {
|
||||||
|
// It is allowed to call this multiple times, we do so from the constructor
|
||||||
|
// and from AgencyFeature::unprepare.
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
while (_constituent.isRunning() || _compactor.isRunning() ||
|
while (_constituent.isRunning() || _compactor.isRunning() ||
|
||||||
(_config.supervision() && _supervision.isRunning()) ||
|
(_config.supervision() && _supervision.isRunning()) ||
|
||||||
(_inception != nullptr && _inception->isRunning())) {
|
(_inception != nullptr && _inception->isRunning())) {
|
||||||
usleep(100000);
|
usleep(100000);
|
||||||
|
|
||||||
// emit warning after 15 seconds
|
// fail fatally after 5 mins:
|
||||||
if (++counter == 10 * 15) {
|
if (++counter >= 10 * 60 * 5) {
|
||||||
LOG_TOPIC(FATAL, Logger::AGENCY) << "some agency thread did not finish";
|
LOG_TOPIC(FATAL, Logger::AGENCY) << "some agency thread did not finish";
|
||||||
FATAL_ERROR_EXIT();
|
FATAL_ERROR_EXIT();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
shutdown(); // wait for the main Agent thread to terminate
|
||||||
shutdown();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// State machine
|
/// State machine
|
||||||
|
|
|
@ -51,6 +51,9 @@ class Agent : public arangodb::Thread,
|
||||||
/// @brief Clean up
|
/// @brief Clean up
|
||||||
~Agent();
|
~Agent();
|
||||||
|
|
||||||
|
/// @brief bring down threads, can be called multiple times.
|
||||||
|
void waitForThreadsStop();
|
||||||
|
|
||||||
/// @brief Get current term
|
/// @brief Get current term
|
||||||
term_t term() const;
|
term_t term() const;
|
||||||
|
|
||||||
|
|
|
@ -344,7 +344,6 @@ function SynchronousReplicationSuite () {
|
||||||
|
|
||||||
tearDown : function () {
|
tearDown : function () {
|
||||||
db._drop(cn);
|
db._drop(cn);
|
||||||
global.ArangoAgency.remove('Target/FailedServers');
|
|
||||||
global.ArangoAgency.set('Target/FailedServers', {});
|
global.ArangoAgency.set('Target/FailedServers', {});
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue