mirror of https://gitee.com/bigwinds/arangodb
Bug fix/supervision fixes4 (#9016)
* Try to fix agency problems with snapshots. * Abort MoveShards jobs that have the failed server as fromServer. * Report aborts. * CHANGELOG.
This commit is contained in:
parent
44a413a9af
commit
1e94ecf414
|
@ -30,6 +30,7 @@ devel
|
||||||
|
|
||||||
* fixed internal issue #3919: The web UI is now using precompiled ejs templates.
|
* fixed internal issue #3919: The web UI is now using precompiled ejs templates.
|
||||||
|
|
||||||
|
* fixed agency issue in abort of cleanOutServer job
|
||||||
|
|
||||||
v3.5.0-rc.2 (2019-05-23)
|
v3.5.0-rc.2 (2019-05-23)
|
||||||
------------------------
|
------------------------
|
||||||
|
|
|
@ -179,7 +179,7 @@ bool FailedServer::start(bool& aborts) {
|
||||||
for (auto const& it : VPackArrayIterator(shard.second->slice())) {
|
for (auto const& it : VPackArrayIterator(shard.second->slice())) {
|
||||||
auto dbs = it.copyString();
|
auto dbs = it.copyString();
|
||||||
|
|
||||||
if (dbs == _server) {
|
if (dbs == _server || dbs == "_" + _server) {
|
||||||
if (pos == 0) {
|
if (pos == 0) {
|
||||||
FailedLeader(
|
FailedLeader(
|
||||||
_snapshot, _agent, _jobId + "-" + std::to_string(sub++),
|
_snapshot, _agent, _jobId + "-" + std::to_string(sub++),
|
||||||
|
|
|
@ -684,8 +684,8 @@ bool Supervision::updateSnapshot() {
|
||||||
}
|
}
|
||||||
|
|
||||||
_agent->executeLockedRead([&]() {
|
_agent->executeLockedRead([&]() {
|
||||||
if (_agent->readDB().has(_agencyPrefix)) {
|
if (_agent->spearhead().has(_agencyPrefix)) {
|
||||||
_snapshot = _agent->readDB().get(_agencyPrefix);
|
_snapshot = _agent->spearhead().get(_agencyPrefix);
|
||||||
}
|
}
|
||||||
if (_agent->transient().has(_agencyPrefix)) {
|
if (_agent->transient().has(_agencyPrefix)) {
|
||||||
_transient = _agent->transient().get(_agencyPrefix);
|
_transient = _agent->transient().get(_agencyPrefix);
|
||||||
|
@ -1222,14 +1222,14 @@ void Supervision::workJobs() {
|
||||||
LOG_TOPIC("00567", TRACE, Logger::SUPERVISION) << "Begin ToDos of type Failed*";
|
LOG_TOPIC("00567", TRACE, Logger::SUPERVISION) << "Begin ToDos of type Failed*";
|
||||||
bool doneFailedJob = false;
|
bool doneFailedJob = false;
|
||||||
while (it != todos.end()) {
|
while (it != todos.end()) {
|
||||||
|
auto const& jobNode = *(it->second);
|
||||||
|
if (jobNode.hasAsString("type").first.compare(0, FAILED.length(), FAILED) == 0) {
|
||||||
if (selectRandom && RandomGenerator::interval(static_cast<uint64_t>(todos.size())) > maximalJobsPerRound) {
|
if (selectRandom && RandomGenerator::interval(static_cast<uint64_t>(todos.size())) > maximalJobsPerRound) {
|
||||||
LOG_TOPIC("675fe", TRACE, Logger::SUPERVISION) << "Skipped ToDo Job";
|
LOG_TOPIC("675fe", TRACE, Logger::SUPERVISION) << "Skipped ToDo Job";
|
||||||
++it;
|
++it;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto const& jobNode = *(it->second);
|
|
||||||
if (jobNode.hasAsString("type").first.compare(0, FAILED.length(), FAILED) == 0) {
|
|
||||||
LOG_TOPIC("87812", TRACE, Logger::SUPERVISION) << "Begin JobContext::run()";
|
LOG_TOPIC("87812", TRACE, Logger::SUPERVISION) << "Begin JobContext::run()";
|
||||||
JobContext(TODO, jobNode.hasAsString("jobId").first, _snapshot, _agent)
|
JobContext(TODO, jobNode.hasAsString("jobId").first, _snapshot, _agent)
|
||||||
.run(_haveAborts);
|
.run(_haveAborts);
|
||||||
|
@ -1258,7 +1258,6 @@ void Supervision::workJobs() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TOPIC("a55ce", DEBUG, Logger::SUPERVISION) << "Updating snapshot after ToDo";
|
LOG_TOPIC("a55ce", DEBUG, Logger::SUPERVISION) << "Updating snapshot after ToDo";
|
||||||
updateSnapshot();
|
updateSnapshot();
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue