From cf09546d939e5a5240784b7ae5b4b4b7cb49a7ad Mon Sep 17 00:00:00 2001 From: Kaveh Vahedipour Date: Fri, 7 Oct 2016 11:01:45 +0200 Subject: [PATCH] fixed erroneous break of supervision agency updates --- arangod/Agency/FailedServer.cpp | 10 +++++----- arangod/Agency/Supervision.cpp | 2 +- .../resilience/resilience-synchronous-repl-cluster.js | 5 ++--- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arangod/Agency/FailedServer.cpp b/arangod/Agency/FailedServer.cpp index 3469f386cc..d149ba82b6 100644 --- a/arangod/Agency/FailedServer.cpp +++ b/arangod/Agency/FailedServer.cpp @@ -54,7 +54,7 @@ FailedServer::~FailedServer() {} bool FailedServer::start() { LOG_TOPIC(INFO, Logger::AGENCY) - << "Trying to start FailedLeader job" + _jobId + " for server " + _server; + << "Trying to start FailedServer job" + _jobId + " for server " + _server; // Copy todo to pending Builder todo, pending; @@ -118,7 +118,7 @@ bool FailedServer::start() { if (res.accepted && res.indices.size() == 1 && res.indices[0]) { LOG_TOPIC(INFO, Logger::AGENCY) - << "Pending: DB Server " + _server + " failed."; + << "Pending job for failed DB Server " << _server; auto const& databases = _snapshot("/Plan/Collections").children(); auto const& current = _snapshot("/Current/Collections").children(); @@ -130,19 +130,19 @@ bool FailedServer::start() { for (auto const& collptr : database.second->children()) { Node const& collection = *(collptr.second); - + if (!cdatabase.find(collptr.first)->second->children().empty()) { Node const& collection = *(collptr.second); Node const& replicationFactor = collection("replicationFactor"); if (replicationFactor.slice().getUInt() > 1) { for (auto const& shard : collection("shards").children()) { VPackArrayIterator dbsit(shard.second->slice()); - + // Only proceed if leader and create job if ((*dbsit.begin()).copyString() != _server) { continue; } - + FailedLeader( _snapshot, _agent, _jobId + "-" + std::to_string(sub++), _jobId, _agencyPrefix, database.first, collptr.first, diff --git a/arangod/Agency/Supervision.cpp b/arangod/Agency/Supervision.cpp index 0f7547c050..9796239921 100644 --- a/arangod/Agency/Supervision.cpp +++ b/arangod/Agency/Supervision.cpp @@ -50,7 +50,7 @@ Supervision::Supervision() _agent(nullptr), _snapshot("Supervision"), _frequency(5), - _gracePeriod(120), + _gracePeriod(15), _jobId(0), _jobIdMax(0), _selfShutdown(false) {} diff --git a/js/server/tests/resilience/resilience-synchronous-repl-cluster.js b/js/server/tests/resilience/resilience-synchronous-repl-cluster.js index caa710cb82..1f4600a3c9 100644 --- a/js/server/tests/resilience/resilience-synchronous-repl-cluster.js +++ b/js/server/tests/resilience/resilience-synchronous-repl-cluster.js @@ -81,7 +81,6 @@ function SynchronousReplicationSuite () { console.info("Replication up and running!"); return true; } - require('internal').print("Plan:", cinfo.shards, "Current:", ccinfo.map(s => s.servers)); console.info("Plan:", cinfo.shards, "Current:", ccinfo.map(s => s.servers)); wait(0.5); global.ArangoClusterInfo.flush(); @@ -299,14 +298,14 @@ function SynchronousReplicationSuite () { c.remove([ids[0]._key, ids[1]._key]); if (healing.place === 17) { healFailure(healing); } -/* if (failure.place === 18) { makeFailure(failure); } + if (failure.place === 18) { makeFailure(failure); } docs = c.document([ids[0]._key, ids[1]._key]); assertEqual(2, docs.length); assertTrue(docs[0].error); assertTrue(docs[1].error); - if (healing.place === 18) { healFailure(healing); }*/ + if (healing.place === 18) { healFailure(healing); } } ////////////////////////////////////////////////////////////////////////////////