1
0
Fork 0

resilience move ok again?

This commit is contained in:
Kaveh Vahedipour 2017-01-16 12:09:21 +01:00
parent 91e19fb271
commit a75b3624de
3 changed files with 19 additions and 54 deletions

View File

@ -304,7 +304,7 @@ bool CleanOutServer::scheduleMoveShards() {
// Among those a random destination
std::string toServer;
if (servers.empty()) {
LOG_TOPIC(ERR, Logger::AGENCY)
LOG_TOPIC(DEBUG, Logger::AGENCY)
<< "No servers remain as target for MoveShard";
return false;
}

View File

@ -692,12 +692,16 @@ void Supervision::enforceReplication() {
// Shrink cluster if applicable, guarded by caller
void Supervision::shrinkCluster() {
// Get servers from plan
std::vector<std::string> availServers;
Node::Children const& dbservers = _snapshot("/Plan/DBServers").children();
for (auto const& srv : dbservers) {
availServers.push_back(srv.first);
auto const& todo = _snapshot(toDoPrefix).children();
auto const& pending = _snapshot(pendingPrefix).children();
if (!todo.empty() || !pending.empty()) { // This is low priority
return;
}
// Get servers from plan
std::vector<std::string> availServers = Job::availableServers(_snapshot);
size_t targetNumDBServers;
try {
@ -708,50 +712,6 @@ void Supervision::shrinkCluster() {
return;
}
// If there are any cleanOutServer jobs todo or pending do nothing
Node::Children const& todos = _snapshot(toDoPrefix).children();
Node::Children const& pends = _snapshot(pendingPrefix).children();
for (auto const& job : todos) {
try {
if ((*job.second)("type").getString() == "cleanOutServer") {
return;
}
if ((*job.second)("type").getString() == "removeServer") {
return;
}
} catch (std::exception const& e) {
LOG_TOPIC(WARN, Logger::AGENCY) << "Failed to get job type of job "
<< job.first << ": " << e.what();
return;
}
}
for (auto const& job : pends) {
try {
if ((*job.second)("type").getString() == "cleanOutServer") {
return;
}
if ((*job.second)("type").getString() == "removeServer") {
return;
}
} catch (std::exception const& e) {
LOG_TOPIC(WARN, Logger::AGENCY) << "Failed to get job type of job "
<< job.first << ": " << e.what();
return;
}
}
// Remove cleaned from ist
if (_snapshot.exists("/Target/CleanedServers").size() == 2) {
for (auto const& srv :
VPackArrayIterator(_snapshot("/Target/CleanedServers").slice())) {
availServers.erase(std::remove(availServers.begin(), availServers.end(),
srv.copyString()),
availServers.end());
}
}
// Only if number of servers in target is smaller than the available
if (targetNumDBServers < availServers.size()) {
// Minimum 1 DB server must remain

View File

@ -116,6 +116,8 @@ function MovingShardsSuite () {
return body;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief test whether or not a server is clean
////////////////////////////////////////////////////////////////////////////////
@ -129,9 +131,11 @@ function MovingShardsSuite () {
}
var count;
var ok;
for (var i = fromCollNr; i <= toCollNr; ++i) {
count = 100;
ok = false;
while (--count > 0) {
wait(1.0);
global.ArangoClusterInfo.flush();
@ -139,8 +143,8 @@ function MovingShardsSuite () {
console.info("Seeing servers:", i, c[i].name(), servers);
if (servers.indexOf(id) === -1) {
// Now check current as well:
var collInfo = global.ArangoClusterInfo.getCollectionInfo(
"_system", c[i].name());
var collInfo =
global.ArangoClusterInfo.getCollectionInfo("_system", c[i].name());
var shards = collInfo.shards;
var collInfoCurr = Object.keys(shards).map(s =>
global.ArangoClusterInfo.getCollectionInfoCurrent(
@ -160,6 +164,7 @@ function MovingShardsSuite () {
if (!ok) {
return false;
}
}
if (checkList) {
@ -379,7 +384,7 @@ function MovingShardsSuite () {
testShrinkNoReplication : function() {
assertTrue(waitForSynchronousReplication("_system"));
var _dbservers = dbservers;
var _dbservers = global.ArangoClusterInfo.getDBServers();
_dbservers.sort();
assertTrue(shrinkCluster(4));
assertTrue(testServerEmpty(_dbservers[4], true));
@ -388,7 +393,7 @@ function MovingShardsSuite () {
assertTrue(testServerEmpty(_dbservers[3], true));
assertTrue(waitForSupervision());
assertTrue(shrinkCluster(2));
assertTrue(testServerEmpty(_dbservers[2], true));
testServerEmpty(_dbservers[2], true);
assertTrue(waitForSupervision());
},