1
0
Fork 0

Bug fix/supervision fixes4 (#9016)

* Try to fix agency problems with snapshots.

* Abort MoveShards jobs that have the failed server as fromServer.

* Report aborts.

* CHANGELOG.
This commit is contained in:
Lars Maier 2019-05-31 17:20:06 +02:00 committed by Max Neunhöffer
parent 44a413a9af
commit 1e94ecf414
3 changed files with 11 additions and 11 deletions

View File

@ -30,6 +30,7 @@ devel
* fixed internal issue #3919: The web UI is now using precompiled ejs templates.
* fixed agency issue in abort of cleanOutServer job
v3.5.0-rc.2 (2019-05-23)
------------------------

View File

@ -179,7 +179,7 @@ bool FailedServer::start(bool& aborts) {
for (auto const& it : VPackArrayIterator(shard.second->slice())) {
auto dbs = it.copyString();
if (dbs == _server) {
if (dbs == _server || dbs == "_" + _server) {
if (pos == 0) {
FailedLeader(
_snapshot, _agent, _jobId + "-" + std::to_string(sub++),

View File

@ -684,8 +684,8 @@ bool Supervision::updateSnapshot() {
}
_agent->executeLockedRead([&]() {
if (_agent->readDB().has(_agencyPrefix)) {
_snapshot = _agent->readDB().get(_agencyPrefix);
if (_agent->spearhead().has(_agencyPrefix)) {
_snapshot = _agent->spearhead().get(_agencyPrefix);
}
if (_agent->transient().has(_agencyPrefix)) {
_transient = _agent->transient().get(_agencyPrefix);
@ -1222,14 +1222,14 @@ void Supervision::workJobs() {
LOG_TOPIC("00567", TRACE, Logger::SUPERVISION) << "Begin ToDos of type Failed*";
bool doneFailedJob = false;
while (it != todos.end()) {
auto const& jobNode = *(it->second);
if (jobNode.hasAsString("type").first.compare(0, FAILED.length(), FAILED) == 0) {
if (selectRandom && RandomGenerator::interval(static_cast<uint64_t>(todos.size())) > maximalJobsPerRound) {
LOG_TOPIC("675fe", TRACE, Logger::SUPERVISION) << "Skipped ToDo Job";
++it;
continue;
}
auto const& jobNode = *(it->second);
if (jobNode.hasAsString("type").first.compare(0, FAILED.length(), FAILED) == 0) {
LOG_TOPIC("87812", TRACE, Logger::SUPERVISION) << "Begin JobContext::run()";
JobContext(TODO, jobNode.hasAsString("jobId").first, _snapshot, _agent)
.run(_haveAborts);
@ -1258,7 +1258,6 @@ void Supervision::workJobs() {
}
}
}
LOG_TOPIC("a55ce", DEBUG, Logger::SUPERVISION) << "Updating snapshot after ToDo";
updateSnapshot();