1
0
Fork 0

Bug fix/supervision fixes4 (#9016)

* Try to fix agency problems with snapshots.

* Abort MoveShards jobs that have the failed server as fromServer.

* Report aborts.

* CHANGELOG.
This commit is contained in:
Lars Maier 2019-05-31 17:20:06 +02:00 committed by Max Neunhöffer
parent 44a413a9af
commit 1e94ecf414
3 changed files with 11 additions and 11 deletions

View File

@ -30,6 +30,7 @@ devel
* fixed internal issue #3919: The web UI is now using precompiled ejs templates. * fixed internal issue #3919: The web UI is now using precompiled ejs templates.
* fixed agency issue in abort of cleanOutServer job
v3.5.0-rc.2 (2019-05-23) v3.5.0-rc.2 (2019-05-23)
------------------------ ------------------------

View File

@ -179,7 +179,7 @@ bool FailedServer::start(bool& aborts) {
for (auto const& it : VPackArrayIterator(shard.second->slice())) { for (auto const& it : VPackArrayIterator(shard.second->slice())) {
auto dbs = it.copyString(); auto dbs = it.copyString();
if (dbs == _server) { if (dbs == _server || dbs == "_" + _server) {
if (pos == 0) { if (pos == 0) {
FailedLeader( FailedLeader(
_snapshot, _agent, _jobId + "-" + std::to_string(sub++), _snapshot, _agent, _jobId + "-" + std::to_string(sub++),

View File

@ -684,8 +684,8 @@ bool Supervision::updateSnapshot() {
} }
_agent->executeLockedRead([&]() { _agent->executeLockedRead([&]() {
if (_agent->readDB().has(_agencyPrefix)) { if (_agent->spearhead().has(_agencyPrefix)) {
_snapshot = _agent->readDB().get(_agencyPrefix); _snapshot = _agent->spearhead().get(_agencyPrefix);
} }
if (_agent->transient().has(_agencyPrefix)) { if (_agent->transient().has(_agencyPrefix)) {
_transient = _agent->transient().get(_agencyPrefix); _transient = _agent->transient().get(_agencyPrefix);
@ -1222,14 +1222,14 @@ void Supervision::workJobs() {
LOG_TOPIC("00567", TRACE, Logger::SUPERVISION) << "Begin ToDos of type Failed*"; LOG_TOPIC("00567", TRACE, Logger::SUPERVISION) << "Begin ToDos of type Failed*";
bool doneFailedJob = false; bool doneFailedJob = false;
while (it != todos.end()) { while (it != todos.end()) {
auto const& jobNode = *(it->second);
if (jobNode.hasAsString("type").first.compare(0, FAILED.length(), FAILED) == 0) {
if (selectRandom && RandomGenerator::interval(static_cast<uint64_t>(todos.size())) > maximalJobsPerRound) { if (selectRandom && RandomGenerator::interval(static_cast<uint64_t>(todos.size())) > maximalJobsPerRound) {
LOG_TOPIC("675fe", TRACE, Logger::SUPERVISION) << "Skipped ToDo Job"; LOG_TOPIC("675fe", TRACE, Logger::SUPERVISION) << "Skipped ToDo Job";
++it; ++it;
continue; continue;
} }
auto const& jobNode = *(it->second);
if (jobNode.hasAsString("type").first.compare(0, FAILED.length(), FAILED) == 0) {
LOG_TOPIC("87812", TRACE, Logger::SUPERVISION) << "Begin JobContext::run()"; LOG_TOPIC("87812", TRACE, Logger::SUPERVISION) << "Begin JobContext::run()";
JobContext(TODO, jobNode.hasAsString("jobId").first, _snapshot, _agent) JobContext(TODO, jobNode.hasAsString("jobId").first, _snapshot, _agent)
.run(_haveAborts); .run(_haveAborts);
@ -1258,7 +1258,6 @@ void Supervision::workJobs() {
} }
} }
} }
LOG_TOPIC("a55ce", DEBUG, Logger::SUPERVISION) << "Updating snapshot after ToDo"; LOG_TOPIC("a55ce", DEBUG, Logger::SUPERVISION) << "Updating snapshot after ToDo";
updateSnapshot(); updateSnapshot();