From 48be25bdde451e65c9a535d8fd450aa73f705e2f Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Wed, 1 Feb 2017 09:35:16 +0100 Subject: [PATCH] Add diagnostics to see background jobs that do not run. If a job is only executed more than 5 seconds later than it was scheduled, an error is logged. --- arangod/Cluster/HeartbeatThread.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arangod/Cluster/HeartbeatThread.cpp b/arangod/Cluster/HeartbeatThread.cpp index 381f3a32d1..414482062d 100644 --- a/arangod/Cluster/HeartbeatThread.cpp +++ b/arangod/Cluster/HeartbeatThread.cpp @@ -105,11 +105,24 @@ HeartbeatThread::~HeartbeatThread() { shutdown(); } class HeartbeatBackgroundJob { std::shared_ptr _heartbeatThread; + double _startTime; + std::string _schedulerInfo; public: - explicit HeartbeatBackgroundJob(std::shared_ptr hbt) - : _heartbeatThread(hbt) {} + explicit HeartbeatBackgroundJob(std::shared_ptr hbt, + double startTime) + : _heartbeatThread(hbt), _startTime(startTime) { + _schedulerInfo = SchedulerFeature::SCHEDULER->infoStatus(); + } void operator()() { + double now = TRI_microtime(); + if (now > _startTime + 5.0) { + LOG_TOPIC(ERR, Logger::HEARTBEAT) << "ALARM: Scheduling background job " + "took " << now - _startTime + << " seconds, scheduler info at schedule time: " << _schedulerInfo + << ", scheduler info now: " + << SchedulerFeature::SCHEDULER->infoStatus(); + } _heartbeatThread->runBackgroundJob(); } }; @@ -138,7 +151,8 @@ void HeartbeatThread::runBackgroundJob() { jobNr = ++_backgroundJobsPosted; LOG_TOPIC(DEBUG, Logger::HEARTBEAT) << "dispatching sync tail " << jobNr; _launchAnotherBackgroundJob = false; - _ioService->post(HeartbeatBackgroundJob(shared_from_this())); + _ioService->post(HeartbeatBackgroundJob(shared_from_this(), + TRI_microtime())); } else { _backgroundJobScheduledOrRunning = false; _launchAnotherBackgroundJob = false; @@ -790,7 +804,7 @@ void HeartbeatThread::syncDBServerStatusQuo() { uint64_t jobNr = ++_backgroundJobsPosted; LOG_TOPIC(DEBUG, Logger::HEARTBEAT) << "dispatching sync " << jobNr; _backgroundJobScheduledOrRunning = true; - _ioService->post(HeartbeatBackgroundJob(shared_from_this())); + _ioService->post(HeartbeatBackgroundJob(shared_from_this(), TRI_microtime())); } ////////////////////////////////////////////////////////////////////////////////