mirror of https://gitee.com/bigwinds/arangodb
Catch all exceptions to prevent maintenance workers from crashing. (#6645)
* Catch all exceptions to prevent maintenance workers from crashing. * Please don't free this. * Unified code paths. * Remove dub comment. * Removed debug output. * Deleted unneeded constructors. * Assignment operator deleted.
This commit is contained in:
parent
02d94ea602
commit
14d1487710
|
@ -974,8 +974,6 @@ void Supervision::cleanupLostCollections(Node const& snapshot, AgentInterface *a
|
||||||
|
|
||||||
auto const& trx = builder->slice();
|
auto const& trx = builder->slice();
|
||||||
|
|
||||||
LOG_TOPIC(ERR, Logger::FIXME) << "Trx: " << trx.toJson();
|
|
||||||
|
|
||||||
if(trx.length() > 0) {
|
if(trx.length() > 0) {
|
||||||
// do it! fire and forget!
|
// do it! fire and forget!
|
||||||
agent->write(builder);
|
agent->write(builder);
|
||||||
|
|
|
@ -51,6 +51,11 @@ public:
|
||||||
/// @brief construct with description
|
/// @brief construct with description
|
||||||
Action(MaintenanceFeature&, std::shared_ptr<ActionDescription> const&);
|
Action(MaintenanceFeature&, std::shared_ptr<ActionDescription> const&);
|
||||||
|
|
||||||
|
Action(Action const&) = delete;
|
||||||
|
Action(Action &&) = delete;
|
||||||
|
Action() = delete;
|
||||||
|
Action& operator=(Action const&) = delete;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief construct with concrete action base
|
* @brief construct with concrete action base
|
||||||
* @param feature Maintenance feature
|
* @param feature Maintenance feature
|
||||||
|
|
|
@ -57,6 +57,7 @@ void MaintenanceWorker::run() {
|
||||||
|
|
||||||
while(eSTOP != _loopState && !_feature.isShuttingDown()){
|
while(eSTOP != _loopState && !_feature.isShuttingDown()){
|
||||||
|
|
||||||
|
try {
|
||||||
switch(_loopState) {
|
switch(_loopState) {
|
||||||
case eFIND_ACTION:
|
case eFIND_ACTION:
|
||||||
_curAction = _feature.findReadyAction(_labels);
|
_curAction = _feature.findReadyAction(_labels);
|
||||||
|
@ -79,9 +80,36 @@ void MaintenanceWorker::run() {
|
||||||
|
|
||||||
} // switch
|
} // switch
|
||||||
|
|
||||||
|
} catch(std::exception const& ex) {
|
||||||
|
if (_curAction) {
|
||||||
|
LOG_TOPIC(ERR, Logger::CLUSTER)
|
||||||
|
<< "MaintenanceWorkerRun: caught exception (" << ex.what() << ")"
|
||||||
|
<< " state:" << _loopState
|
||||||
|
<< " action:" << *_curAction;
|
||||||
|
|
||||||
|
_curAction->setState(FAILED);
|
||||||
|
} else {
|
||||||
|
LOG_TOPIC(ERR, Logger::CLUSTER)
|
||||||
|
<< "MaintenanceWorkerRun: caught exception (" << ex.what() << ")"
|
||||||
|
<< " state:" << _loopState;
|
||||||
|
}
|
||||||
|
} catch(...) {
|
||||||
|
if (_curAction) {
|
||||||
|
LOG_TOPIC(ERR, Logger::CLUSTER)
|
||||||
|
<< "MaintenanceWorkerRun: caught error, state: " << _loopState
|
||||||
|
<< " state:" << _loopState
|
||||||
|
<< " action:" << *_curAction;
|
||||||
|
|
||||||
|
_curAction->setState(FAILED);
|
||||||
|
} else {
|
||||||
|
LOG_TOPIC(ERR, Logger::CLUSTER)
|
||||||
|
<< "MaintenanceWorkerRun: caught error, state: " << _loopState
|
||||||
|
<< " state:" << _loopState;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// determine next loop state
|
// determine next loop state
|
||||||
nextState(more);
|
nextState(more);
|
||||||
|
|
||||||
} // while
|
} // while
|
||||||
|
|
||||||
} // MaintenanceWorker::run
|
} // MaintenanceWorker::run
|
||||||
|
@ -145,7 +173,6 @@ void MaintenanceWorker::nextState(bool actionMore) {
|
||||||
} // else
|
} // else
|
||||||
} else {
|
} else {
|
||||||
std::shared_ptr<Action> failAction(_curAction);
|
std::shared_ptr<Action> failAction(_curAction);
|
||||||
|
|
||||||
// fail all actions that would follow
|
// fail all actions that would follow
|
||||||
do {
|
do {
|
||||||
failAction->setState(FAILED);
|
failAction->setState(FAILED);
|
||||||
|
|
Loading…
Reference in New Issue