diff --git a/CHANGELOG b/CHANGELOG index 914f53ea13..f091a27cca 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -30,6 +30,7 @@ devel * fixed internal issue #3919: The web UI is now using precompiled ejs templates. +* fixed agency issue in abort of cleanOutServer job v3.5.0-rc.2 (2019-05-23) ------------------------ diff --git a/arangod/Agency/FailedServer.cpp b/arangod/Agency/FailedServer.cpp index 83267e9c0d..60119a04b3 100644 --- a/arangod/Agency/FailedServer.cpp +++ b/arangod/Agency/FailedServer.cpp @@ -179,7 +179,7 @@ bool FailedServer::start(bool& aborts) { for (auto const& it : VPackArrayIterator(shard.second->slice())) { auto dbs = it.copyString(); - if (dbs == _server) { + if (dbs == _server || dbs == "_" + _server) { if (pos == 0) { FailedLeader( _snapshot, _agent, _jobId + "-" + std::to_string(sub++), diff --git a/arangod/Agency/Supervision.cpp b/arangod/Agency/Supervision.cpp index 71a1ff1ec6..eee11d7c6d 100644 --- a/arangod/Agency/Supervision.cpp +++ b/arangod/Agency/Supervision.cpp @@ -684,8 +684,8 @@ bool Supervision::updateSnapshot() { } _agent->executeLockedRead([&]() { - if (_agent->readDB().has(_agencyPrefix)) { - _snapshot = _agent->readDB().get(_agencyPrefix); + if (_agent->spearhead().has(_agencyPrefix)) { + _snapshot = _agent->spearhead().get(_agencyPrefix); } if (_agent->transient().has(_agencyPrefix)) { _transient = _agent->transient().get(_agencyPrefix); @@ -1211,7 +1211,7 @@ void Supervision::workJobs() { // per second. Therefore, we have - for now - chosen to limit the number of // jobs actually worked on to 1000 in ToDo and 1000 in Pending. However, // since some jobs are just waiting, we cannot work on the same 1000 - // jobs in each round. This is where the randomization comes in. We work + // jobs in each round. This is where the randomization comes in. We work // on up to 1000 *random* jobs. This will eventually cover everything with // very high probability. Note that the snapshot does not change, so // `todos.size()` is constant for the loop, even though we do agency @@ -1222,14 +1222,14 @@ void Supervision::workJobs() { LOG_TOPIC("00567", TRACE, Logger::SUPERVISION) << "Begin ToDos of type Failed*"; bool doneFailedJob = false; while (it != todos.end()) { - if (selectRandom && RandomGenerator::interval(static_cast(todos.size())) > maximalJobsPerRound) { - LOG_TOPIC("675fe", TRACE, Logger::SUPERVISION) << "Skipped ToDo Job"; - ++it; - continue; - } - auto const& jobNode = *(it->second); if (jobNode.hasAsString("type").first.compare(0, FAILED.length(), FAILED) == 0) { + if (selectRandom && RandomGenerator::interval(static_cast(todos.size())) > maximalJobsPerRound) { + LOG_TOPIC("675fe", TRACE, Logger::SUPERVISION) << "Skipped ToDo Job"; + ++it; + continue; + } + LOG_TOPIC("87812", TRACE, Logger::SUPERVISION) << "Begin JobContext::run()"; JobContext(TODO, jobNode.hasAsString("jobId").first, _snapshot, _agent) .run(_haveAborts); @@ -1258,7 +1258,6 @@ void Supervision::workJobs() { } } } - LOG_TOPIC("a55ce", DEBUG, Logger::SUPERVISION) << "Updating snapshot after ToDo"; updateSnapshot();