mirror of https://gitee.com/bigwinds/arangodb
[devel] Bug fix/bad leader report current (#7585)
* Bug fix 3.4/bad leader report current (#7574) * Initialize theLeader non-empty, thus not assuming leadership. * Correct ClusterInfo to look into Target/CleanedServers. * Prevent usage of to be cleaned out servers in new collections. * After a restart, do not assume to be leader for a shard. * Do nothing in phaseTwo if leader has not been touched. (#7579) * Drop follower if it refuses to cooperate. This is important since a dbserver that is follower for a shard will after a reboot think that it is a leader, at least for a short amount of time. If it came back quickly enough, the leader might not have noticed that it was away.
This commit is contained in:
parent
4eb37c348e
commit
dd07d74d69
|
@ -123,6 +123,12 @@ JOB_STATUS CleanOutServer::status() {
|
|||
reportTrx.add("op", VPackValue("push"));
|
||||
reportTrx.add("new", VPackValue(_server));
|
||||
}
|
||||
reportTrx.add(VPackValue("/Target/ToBeCleanedServers"));
|
||||
{
|
||||
VPackObjectBuilder guard4(&reportTrx);
|
||||
reportTrx.add("op", VPackValue("erase"));
|
||||
reportTrx.add("val", VPackValue(_server));
|
||||
}
|
||||
addRemoveJobFromSomewhere(reportTrx, "Pending", _jobId);
|
||||
Builder job;
|
||||
_snapshot.hasAsBuilder(pendingPrefix + _jobId, job);
|
||||
|
@ -312,6 +318,14 @@ bool CleanOutServer::start() {
|
|||
|
||||
addBlockServer(*pending, _server, _jobId);
|
||||
|
||||
// Put ourselves in list of servers to be cleaned:
|
||||
pending->add(VPackValue("/Target/ToBeCleanedServers"));
|
||||
{
|
||||
VPackObjectBuilder guard4(pending.get());
|
||||
pending->add("op", VPackValue("push"));
|
||||
pending->add("new", VPackValue(_server));
|
||||
}
|
||||
|
||||
// Schedule shard relocations
|
||||
if (!scheduleMoveShards(pending)) {
|
||||
finish("", "", false, "Could not schedule MoveShard.");
|
||||
|
|
|
@ -3368,6 +3368,7 @@ void ClusterInfo::loadCurrentDBServers() {
|
|||
velocypack::Slice currentDBServers;
|
||||
velocypack::Slice failedDBServers;
|
||||
velocypack::Slice cleanedDBServers;
|
||||
velocypack::Slice toBeCleanedDBServers;
|
||||
|
||||
if (result.slice().length() > 0) {
|
||||
currentDBServers = result.slice()[0].get(std::vector<std::string>(
|
||||
|
@ -3379,7 +3380,10 @@ void ClusterInfo::loadCurrentDBServers() {
|
|||
{AgencyCommManager::path(), "Target", "FailedServers"}));
|
||||
cleanedDBServers =
|
||||
target.slice()[0].get(std::vector<std::string>(
|
||||
{AgencyCommManager::path(), "Target", "CleanedOutServers"}));
|
||||
{AgencyCommManager::path(), "Target", "CleanedServers"}));
|
||||
toBeCleanedDBServers =
|
||||
target.slice()[0].get(std::vector<std::string>(
|
||||
{AgencyCommManager::path(), "Target", "ToBeCleanedServers"}));
|
||||
}
|
||||
if (currentDBServers.isObject() && failedDBServers.isObject()) {
|
||||
decltype(_DBServers) newDBServers;
|
||||
|
@ -3405,9 +3409,23 @@ void ClusterInfo::loadCurrentDBServers() {
|
|||
VPackArrayIterator(cleanedDBServers)) {
|
||||
if (dbserver.key == cleanedServer) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (toBeCleanedDBServers.isArray()) {
|
||||
bool found = false;
|
||||
for (auto const& toBeCleanedServer :
|
||||
VPackArrayIterator(toBeCleanedDBServers)) {
|
||||
if (dbserver.key == toBeCleanedServer) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -519,8 +519,8 @@ static std::shared_ptr<std::unordered_map<std::string, std::vector<std::string>>
|
|||
|
||||
auto shards = std::make_shared<std::unordered_map<std::string, std::vector<std::string>>>();
|
||||
|
||||
ci->loadCurrentDBServers();
|
||||
if (dbServers.size() == 0) {
|
||||
ci->loadCurrentDBServers();
|
||||
dbServers = ci->getCurrentDBServers();
|
||||
if (dbServers.empty()) {
|
||||
return shards;
|
||||
|
@ -2565,6 +2565,7 @@ std::shared_ptr<LogicalCollection> ClusterMethods::persistCollectionInAgency(
|
|||
std::string distributeShardsLike = col->distributeShardsLike();
|
||||
std::vector<std::string> avoid = col->avoidServers();
|
||||
ClusterInfo* ci = ClusterInfo::instance();
|
||||
ci->loadCurrentDBServers();
|
||||
std::vector<std::string> dbServers = ci->getCurrentDBServers();
|
||||
std::shared_ptr<std::unordered_map<std::string, std::vector<std::string>>> shards = nullptr;
|
||||
|
||||
|
|
|
@ -90,20 +90,26 @@ Result DBServerAgencySync::getLocalCollections(VPackBuilder& collections) {
|
|||
std::string const colname = collection->name();
|
||||
|
||||
collections.add(VPackValue(colname));
|
||||
|
||||
VPackObjectBuilder col(&collections);
|
||||
|
||||
collection->properties(collections,true,false);
|
||||
|
||||
auto const& folls = collection->followers();
|
||||
auto const theLeader = folls->getLeader();
|
||||
std::string const theLeader = folls->getLeader();
|
||||
bool theLeaderTouched = folls->getLeaderTouched();
|
||||
|
||||
collections.add("theLeader", VPackValue(theLeader));
|
||||
// Note that whenever theLeader was set explicitly since the collection
|
||||
// object was created, we believe it. Otherwise, we do not accept
|
||||
// that we are the leader. This is to circumvent the problem that
|
||||
// after a restart we would implicitly be assumed to be the leader.
|
||||
collections.add("theLeader", VPackValue(theLeaderTouched ? theLeader : "NOT_YET_TOUCHED"));
|
||||
collections.add("theLeaderTouched", VPackValue(theLeaderTouched));
|
||||
|
||||
if (theLeader.empty()) { // we are the leader ourselves
|
||||
if (theLeader.empty() && theLeaderTouched) {
|
||||
// we are the leader ourselves
|
||||
// In this case we report our in-sync followers here in the format
|
||||
// of the agency: [ leader, follower1, follower2, ... ]
|
||||
collections.add(VPackValue("servers"));
|
||||
|
||||
{ VPackArrayBuilder guard(&collections);
|
||||
|
||||
collections.add(VPackValue(arangodb::ServerState::instance()->getId()));
|
||||
|
|
|
@ -39,6 +39,7 @@ class FollowerInfo {
|
|||
arangodb::LogicalCollection* _docColl;
|
||||
std::string _theLeader;
|
||||
// if the latter is empty, then we are leading
|
||||
bool _theLeaderTouched;
|
||||
|
||||
public:
|
||||
|
||||
|
@ -83,6 +84,7 @@ class FollowerInfo {
|
|||
void setTheLeader(std::string const& who) {
|
||||
MUTEX_LOCKER(locker, _mutex);
|
||||
_theLeader = who;
|
||||
_theLeaderTouched = true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -94,6 +96,15 @@ class FollowerInfo {
|
|||
return _theLeader;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief see if leader was explicitly set
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool getLeaderTouched() const {
|
||||
MUTEX_LOCKER(locker, _mutex);
|
||||
return _theLeaderTouched;
|
||||
}
|
||||
|
||||
};
|
||||
} // end namespace arangodb
|
||||
|
||||
|
|
|
@ -860,6 +860,13 @@ arangodb::Result arangodb::maintenance::reportInCurrent(
|
|||
if (cur.hasKey(servers)) {
|
||||
auto s = cur.get(servers);
|
||||
if (s.isArray() && cur.get(servers)[0].copyString() == serverId) {
|
||||
|
||||
// We are in the situation after a restart, that we do not know
|
||||
// who the leader is because FollowerInfo is not updated yet.
|
||||
// Hence, in the case we are the Leader in Plan but do not
|
||||
// know it yet, do nothing here.
|
||||
if (shSlice.get("theLeaderTouched").isTrue()) {
|
||||
|
||||
// we were previously leader and we are done resigning.
|
||||
// update current and let supervision handle the rest
|
||||
VPackBuilder ns;
|
||||
|
@ -884,6 +891,7 @@ arangodb::Result arangodb::maintenance::reportInCurrent(
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateCurrentForDatabases
|
||||
auto cdbs = cur.get(DATABASES);
|
||||
|
|
|
@ -3472,17 +3472,23 @@ Result Methods::replicateOperations(
|
|||
double const timeout = chooseTimeout(count, body->size() * followers->size());
|
||||
|
||||
size_t nrDone = 0;
|
||||
cc->performRequests(requests, timeout, nrDone, Logger::REPLICATION, false);
|
||||
// If any would-be-follower refused to follow there must be a
|
||||
// new leader in the meantime, in this case we must not allow
|
||||
// this operation to succeed, we simply return with a refusal
|
||||
// error (note that we use the follower version, since we have
|
||||
// lost leadership):
|
||||
if (findRefusal(requests)) {
|
||||
return Result{TRI_ERROR_CLUSTER_SHARD_LEADER_RESIGNED};
|
||||
}
|
||||
|
||||
// Otherwise we drop all followers that were not successful:
|
||||
cc->performRequests(requests,
|
||||
timeout,
|
||||
nrDone, Logger::REPLICATION, false);
|
||||
// If any would-be-follower refused to follow there are two possiblities:
|
||||
// (1) there is a new leader in the meantime, or
|
||||
// (2) the follower was restarted and forgot that it is a follower.
|
||||
// Unfortunately, we cannot know which is the case.
|
||||
// In case (1) case we must not allow
|
||||
// this operation to succeed, since the new leader is now responsible.
|
||||
// In case (2) we at least have to drop the follower such that it
|
||||
// resyncs and we can be sure that it is in sync again.
|
||||
// Therefore, we drop the follower here (just in case), and refuse to
|
||||
// return with a refusal error (note that we use the follower version,
|
||||
// since we have lost leadership):
|
||||
|
||||
// We drop all followers that were not successful:
|
||||
for (size_t i = 0; i < followers->size(); ++i) {
|
||||
bool replicationWorked =
|
||||
requests[i].done &&
|
||||
|
@ -3510,6 +3516,9 @@ Result Methods::replicateOperations(
|
|||
}
|
||||
}
|
||||
|
||||
// we return "ok" here still.
|
||||
if (findRefusal(requests)) {
|
||||
return Result{TRI_ERROR_CLUSTER_SHARD_LEADER_RESIGNED};
|
||||
}
|
||||
|
||||
return Result{};
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue