mirror of https://gitee.com/bigwinds/arangodb
Bug fix/supervision fixes4 (#9016)
* Try to fix agency problems with snapshots. * Abort MoveShards jobs that have the failed server as fromServer. * Report aborts. * CHANGELOG.
This commit is contained in:
parent
44a413a9af
commit
1e94ecf414
|
@ -30,6 +30,7 @@ devel
|
|||
|
||||
* fixed internal issue #3919: The web UI is now using precompiled ejs templates.
|
||||
|
||||
* fixed agency issue in abort of cleanOutServer job
|
||||
|
||||
v3.5.0-rc.2 (2019-05-23)
|
||||
------------------------
|
||||
|
|
|
@ -179,7 +179,7 @@ bool FailedServer::start(bool& aborts) {
|
|||
for (auto const& it : VPackArrayIterator(shard.second->slice())) {
|
||||
auto dbs = it.copyString();
|
||||
|
||||
if (dbs == _server) {
|
||||
if (dbs == _server || dbs == "_" + _server) {
|
||||
if (pos == 0) {
|
||||
FailedLeader(
|
||||
_snapshot, _agent, _jobId + "-" + std::to_string(sub++),
|
||||
|
|
|
@ -684,8 +684,8 @@ bool Supervision::updateSnapshot() {
|
|||
}
|
||||
|
||||
_agent->executeLockedRead([&]() {
|
||||
if (_agent->readDB().has(_agencyPrefix)) {
|
||||
_snapshot = _agent->readDB().get(_agencyPrefix);
|
||||
if (_agent->spearhead().has(_agencyPrefix)) {
|
||||
_snapshot = _agent->spearhead().get(_agencyPrefix);
|
||||
}
|
||||
if (_agent->transient().has(_agencyPrefix)) {
|
||||
_transient = _agent->transient().get(_agencyPrefix);
|
||||
|
@ -1222,14 +1222,14 @@ void Supervision::workJobs() {
|
|||
LOG_TOPIC("00567", TRACE, Logger::SUPERVISION) << "Begin ToDos of type Failed*";
|
||||
bool doneFailedJob = false;
|
||||
while (it != todos.end()) {
|
||||
auto const& jobNode = *(it->second);
|
||||
if (jobNode.hasAsString("type").first.compare(0, FAILED.length(), FAILED) == 0) {
|
||||
if (selectRandom && RandomGenerator::interval(static_cast<uint64_t>(todos.size())) > maximalJobsPerRound) {
|
||||
LOG_TOPIC("675fe", TRACE, Logger::SUPERVISION) << "Skipped ToDo Job";
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto const& jobNode = *(it->second);
|
||||
if (jobNode.hasAsString("type").first.compare(0, FAILED.length(), FAILED) == 0) {
|
||||
LOG_TOPIC("87812", TRACE, Logger::SUPERVISION) << "Begin JobContext::run()";
|
||||
JobContext(TODO, jobNode.hasAsString("jobId").first, _snapshot, _agent)
|
||||
.run(_haveAborts);
|
||||
|
@ -1258,7 +1258,6 @@ void Supervision::workJobs() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
LOG_TOPIC("a55ce", DEBUG, Logger::SUPERVISION) << "Updating snapshot after ToDo";
|
||||
updateSnapshot();
|
||||
|
||||
|
|
Loading…
Reference in New Issue