mirror of https://gitee.com/bigwinds/arangodb
Various agency improvements. (#8380)
* Ignore satellite collections in shrinkCluster in agency. * Abort RemoveFollower job if not enough in-sync followers or leader failure. * Break quick wait loop in supervision if leadership is lost. * In case of resigned leader, set isReady=false in clusterInventory. * Fix catch tests.
This commit is contained in:
parent
30adf5e2d9
commit
2a4f606df2
|
@ -235,6 +235,7 @@ bool RemoveFollower::start(bool&) {
|
|||
<< " does not have a leader that has confirmed leadership, waiting, "
|
||||
"jobId="
|
||||
<< _jobId;
|
||||
finish("", "", false, "job no longer sensible, leader has gone bad");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -245,7 +246,8 @@ bool RemoveFollower::start(bool&) {
|
|||
<< " does not have enough in sync followers to remove one, waiting, "
|
||||
"jobId="
|
||||
<< _jobId;
|
||||
return false;
|
||||
finish("", "", false, "job no longer sensible, do not have few enough replicas");
|
||||
return true;
|
||||
}
|
||||
|
||||
// We now know actualReplFactor >= inSyncCount + noGoodCount and
|
||||
|
|
|
@ -862,8 +862,9 @@ void Supervision::run() {
|
|||
}
|
||||
|
||||
auto result = _agent->waitFor(leaderIndex);
|
||||
if (result == Agent::raft_commit_t::UNKNOWN ||
|
||||
result == Agent::raft_commit_t::TIMEOUT) { // Oh snap
|
||||
if (result == Agent::raft_commit_t::TIMEOUT) { // Oh snap
|
||||
// Note that we can get UNKNOWN if we have lost leadership or
|
||||
// if we are shutting down. In both cases we just leave the loop.
|
||||
LOG_TOPIC(WARN, Logger::SUPERVISION) << "Waiting for commits to be done ... ";
|
||||
continue;
|
||||
} else { // Good we can continue
|
||||
|
@ -1457,11 +1458,8 @@ void Supervision::shrinkCluster() {
|
|||
if (replFact > maxReplFact) {
|
||||
maxReplFact = replFact;
|
||||
}
|
||||
} else {
|
||||
LOG_TOPIC(WARN, Logger::SUPERVISION)
|
||||
<< "Cannot retrieve replication factor for collection " << collptr.first;
|
||||
return;
|
||||
}
|
||||
// Note that this could be a satellite collection, in any case, ignore:
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -699,7 +699,8 @@ void RestReplicationHandler::handleCommandClusterInventory() {
|
|||
for (auto const& p : *shardMap) {
|
||||
auto currentServerList = cic->servers(p.first /* shardId */);
|
||||
if (currentServerList.size() == 0 || p.second.size() == 0 ||
|
||||
currentServerList[0] != p.second[0]) {
|
||||
currentServerList[0] != p.second[0] ||
|
||||
(!p.second[0].empty() && p.second[0][0] == '_')) {
|
||||
isReady = false;
|
||||
}
|
||||
if (!ClusterHelpers::compareServerLists(p.second, currentServerList)) {
|
||||
|
|
|
@ -399,7 +399,7 @@ TEST_CASE("RemoveFollower", "[agency][supervision]") {
|
|||
REQUIRE(typeName(q->slice()) == "array");
|
||||
REQUIRE(q->slice().length() == 1);
|
||||
REQUIRE(typeName(q->slice()[0]) == "array");
|
||||
REQUIRE(q->slice()[0].length() == 1); // we always simply override! no preconditions...
|
||||
REQUIRE(q->slice()[0].length() == 2); // precondition
|
||||
REQUIRE(typeName(q->slice()[0][0]) == "object");
|
||||
|
||||
auto writes = q->slice()[0][0];
|
||||
|
@ -407,8 +407,12 @@ TEST_CASE("RemoveFollower", "[agency][supervision]") {
|
|||
REQUIRE(typeName(writes.get("/arango/Target/ToDo/1").get("op")) == "string");
|
||||
CHECK(writes.get("/arango/Target/ToDo/1").get("op").copyString() == "delete");
|
||||
CHECK(writes.get("/arango/Target/Finished/1").get("collection").copyString() == COLLECTION);
|
||||
CHECK(writes.get("/arango/Target/Pending/1").get("op").copyString() == "delete");
|
||||
CHECK(typeName(writes.get("/arango/Target/Failed/1")) == "none");
|
||||
|
||||
auto precond = q->slice()[0][1];
|
||||
REQUIRE(typeName(precond) == "object");
|
||||
REQUIRE(typeName(precond.get("/arango/Supervision/Health/follower1/Status")) == "object");
|
||||
|
||||
return fakeWriteResult;
|
||||
}
|
||||
);
|
||||
|
|
|
@ -11,7 +11,7 @@ R"=(
|
|||
},
|
||||
"collection2": {
|
||||
"s2": {
|
||||
"servers": ["leader"]
|
||||
"servers": ["leader", "follower1"]
|
||||
}
|
||||
},
|
||||
"collection3": {
|
||||
|
|
Loading…
Reference in New Issue