1
0
Fork 0

Fix agency election lock step bug. (#9351)

* Fix agency election lockstep bug.

Reset the base point for the random election timeout to now whenever we have
cast a vote, be it for us or for some other server.

* CHANGELOG.

* Fix compilation.
This commit is contained in:
Max Neunhöffer 2019-06-27 22:06:26 +02:00 committed by GitHub
parent 2c59a2716a
commit d6d362bd3b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 1 deletions

View File

@ -40,6 +40,7 @@ devel
* fixed agency bug with TTL object writes discovered in 3.4.6
* fixed agency bug with election lock step
v3.5.0-rc.3 (2019-05-31)
------------------------

View File

@ -391,6 +391,10 @@ bool Constituent::vote(term_t termOfPeer, std::string const& id,
if (_votedFor != NO_LEADER) { // already voted in this term
if (_votedFor == id) {
LOG_TOPIC("41c49", DEBUG, Logger::AGENCY) << "repeating vote for " << id;
// Set the last heart beat seen to now, to grant the other guy some time
// to establish itself as a leader, before we call for another election:
_lastHeartbeatSeen = TRI_microtime();
LOG_TOPIC("658ba", TRACE, Logger::AGENCY) << "setting last heartbeat time to now, since we repeated a vote grant: " << _lastHeartbeatSeen;
return true;
}
LOG_TOPIC("df508", DEBUG, Logger::AGENCY)
@ -406,6 +410,10 @@ bool Constituent::vote(term_t termOfPeer, std::string const& id,
if (prevLogTerm > myLastLogEntry.term ||
(prevLogTerm == myLastLogEntry.term && prevLogIndex >= myLastLogEntry.index)) {
LOG_TOPIC("8d8da", DEBUG, Logger::AGENCY) << "voting for " << id << " in term " << _term;
// Set the last heart beat seen to now, to grant the other guy some time
// to establish itself as a leader, before we call for another election:
_lastHeartbeatSeen = TRI_microtime();
LOG_TOPIC("ffaac", TRACE, Logger::AGENCY) << "setting last heartbeat time to now, since we granted a vote: " << _lastHeartbeatSeen;
termNoLock(_term, id);
return true;
}
@ -698,6 +706,12 @@ void Constituent::run() {
} else if (role == CANDIDATE) {
callElection(); // Run for office
// Now we take this point of time as the next base point for a
// potential next random timeout, since we have just cast a vote for
// ourselves:
_lastHeartbeatSeen = TRI_microtime();
LOG_TOPIC("aeaef", TRACE, Logger::AGENCY) << "setting last heartbeat because we voted for us: " << _lastHeartbeatSeen;
} else {
double interval =
0.25 * _agent->config().minPing() * _agent->config().timeoutMult();

View File

@ -156,7 +156,10 @@ class Constituent : public Thread {
std::string _id; // My own id
// Last time an AppendEntriesRPC message has arrived, this is used to
// organize out-of-patience in the follower:
// organize out-of-patience in the follower. Note that this variable is
// also set to the current time when a vote is cast, either for ourselves
// or for somebody else. The constituent calls for an election if and only
// if the time since _lastHeartbeatSeen is greater than a random timeout:
std::atomic<double> _lastHeartbeatSeen;
role_t _role; // My role