mirror of https://gitee.com/bigwinds/arangodb
resilience move ok again?
This commit is contained in:
parent
91e19fb271
commit
a75b3624de
|
@ -304,7 +304,7 @@ bool CleanOutServer::scheduleMoveShards() {
|
|||
// Among those a random destination
|
||||
std::string toServer;
|
||||
if (servers.empty()) {
|
||||
LOG_TOPIC(ERR, Logger::AGENCY)
|
||||
LOG_TOPIC(DEBUG, Logger::AGENCY)
|
||||
<< "No servers remain as target for MoveShard";
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -692,12 +692,16 @@ void Supervision::enforceReplication() {
|
|||
|
||||
// Shrink cluster if applicable, guarded by caller
|
||||
void Supervision::shrinkCluster() {
|
||||
// Get servers from plan
|
||||
std::vector<std::string> availServers;
|
||||
Node::Children const& dbservers = _snapshot("/Plan/DBServers").children();
|
||||
for (auto const& srv : dbservers) {
|
||||
availServers.push_back(srv.first);
|
||||
|
||||
auto const& todo = _snapshot(toDoPrefix).children();
|
||||
auto const& pending = _snapshot(pendingPrefix).children();
|
||||
|
||||
if (!todo.empty() || !pending.empty()) { // This is low priority
|
||||
return;
|
||||
}
|
||||
|
||||
// Get servers from plan
|
||||
std::vector<std::string> availServers = Job::availableServers(_snapshot);
|
||||
|
||||
size_t targetNumDBServers;
|
||||
try {
|
||||
|
@ -708,50 +712,6 @@ void Supervision::shrinkCluster() {
|
|||
return;
|
||||
}
|
||||
|
||||
// If there are any cleanOutServer jobs todo or pending do nothing
|
||||
Node::Children const& todos = _snapshot(toDoPrefix).children();
|
||||
Node::Children const& pends = _snapshot(pendingPrefix).children();
|
||||
|
||||
for (auto const& job : todos) {
|
||||
try {
|
||||
if ((*job.second)("type").getString() == "cleanOutServer") {
|
||||
return;
|
||||
}
|
||||
if ((*job.second)("type").getString() == "removeServer") {
|
||||
return;
|
||||
}
|
||||
} catch (std::exception const& e) {
|
||||
LOG_TOPIC(WARN, Logger::AGENCY) << "Failed to get job type of job "
|
||||
<< job.first << ": " << e.what();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto const& job : pends) {
|
||||
try {
|
||||
if ((*job.second)("type").getString() == "cleanOutServer") {
|
||||
return;
|
||||
}
|
||||
if ((*job.second)("type").getString() == "removeServer") {
|
||||
return;
|
||||
}
|
||||
} catch (std::exception const& e) {
|
||||
LOG_TOPIC(WARN, Logger::AGENCY) << "Failed to get job type of job "
|
||||
<< job.first << ": " << e.what();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove cleaned from ist
|
||||
if (_snapshot.exists("/Target/CleanedServers").size() == 2) {
|
||||
for (auto const& srv :
|
||||
VPackArrayIterator(_snapshot("/Target/CleanedServers").slice())) {
|
||||
availServers.erase(std::remove(availServers.begin(), availServers.end(),
|
||||
srv.copyString()),
|
||||
availServers.end());
|
||||
}
|
||||
}
|
||||
|
||||
// Only if number of servers in target is smaller than the available
|
||||
if (targetNumDBServers < availServers.size()) {
|
||||
// Minimum 1 DB server must remain
|
||||
|
|
|
@ -116,6 +116,8 @@ function MovingShardsSuite () {
|
|||
return body;
|
||||
}
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test whether or not a server is clean
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -129,9 +131,11 @@ function MovingShardsSuite () {
|
|||
}
|
||||
var count;
|
||||
var ok;
|
||||
|
||||
for (var i = fromCollNr; i <= toCollNr; ++i) {
|
||||
count = 100;
|
||||
ok = false;
|
||||
|
||||
while (--count > 0) {
|
||||
wait(1.0);
|
||||
global.ArangoClusterInfo.flush();
|
||||
|
@ -139,8 +143,8 @@ function MovingShardsSuite () {
|
|||
console.info("Seeing servers:", i, c[i].name(), servers);
|
||||
if (servers.indexOf(id) === -1) {
|
||||
// Now check current as well:
|
||||
var collInfo = global.ArangoClusterInfo.getCollectionInfo(
|
||||
"_system", c[i].name());
|
||||
var collInfo =
|
||||
global.ArangoClusterInfo.getCollectionInfo("_system", c[i].name());
|
||||
var shards = collInfo.shards;
|
||||
var collInfoCurr = Object.keys(shards).map(s =>
|
||||
global.ArangoClusterInfo.getCollectionInfoCurrent(
|
||||
|
@ -160,6 +164,7 @@ function MovingShardsSuite () {
|
|||
if (!ok) {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (checkList) {
|
||||
|
@ -379,7 +384,7 @@ function MovingShardsSuite () {
|
|||
|
||||
testShrinkNoReplication : function() {
|
||||
assertTrue(waitForSynchronousReplication("_system"));
|
||||
var _dbservers = dbservers;
|
||||
var _dbservers = global.ArangoClusterInfo.getDBServers();
|
||||
_dbservers.sort();
|
||||
assertTrue(shrinkCluster(4));
|
||||
assertTrue(testServerEmpty(_dbservers[4], true));
|
||||
|
@ -388,7 +393,7 @@ function MovingShardsSuite () {
|
|||
assertTrue(testServerEmpty(_dbservers[3], true));
|
||||
assertTrue(waitForSupervision());
|
||||
assertTrue(shrinkCluster(2));
|
||||
assertTrue(testServerEmpty(_dbservers[2], true));
|
||||
testServerEmpty(_dbservers[2], true);
|
||||
assertTrue(waitForSupervision());
|
||||
},
|
||||
|
||||
|
|
Loading…
Reference in New Issue