1
0
Fork 0

Catch all exceptions to prevent maintenance workers from crashing. (#6645)

* Catch all exceptions to prevent maintenance workers from crashing.
* Please don't free this.
* Unified code paths.
* Remove dub comment.
* Removed debug output.
* Deleted unneeded constructors.
* Assignment operator deleted.
This commit is contained in:
Lars Maier 2018-09-28 17:10:44 +02:00 committed by Max Neunhöffer
parent 02d94ea602
commit 14d1487710
4 changed files with 57 additions and 27 deletions

View File

@ -974,8 +974,6 @@ void Supervision::cleanupLostCollections(Node const& snapshot, AgentInterface *a
auto const& trx = builder->slice();
LOG_TOPIC(ERR, Logger::FIXME) << "Trx: " << trx.toJson();
if(trx.length() > 0) {
// do it! fire and forget!
agent->write(builder);

View File

@ -65,7 +65,7 @@ static factories_t const factories = factories_t {
{DROP_INDEX,
[](MaintenanceFeature& f,ActionDescription const& a) {
return std::unique_ptr<ActionBase>(new DropIndex(f,a)); }},
return std::unique_ptr<ActionBase>(new DropIndex(f,a)); }},
{ENSURE_INDEX,
[](MaintenanceFeature& f,ActionDescription const& a) {
@ -77,12 +77,12 @@ static factories_t const factories = factories_t {
{SYNCHRONIZE_SHARD,
[](MaintenanceFeature& f,ActionDescription const& a) {
return std::unique_ptr<ActionBase>(new SynchronizeShard(f,a)); }},
return std::unique_ptr<ActionBase>(new SynchronizeShard(f,a)); }},
{UPDATE_COLLECTION,
[](MaintenanceFeature& f,ActionDescription const& a) {
return std::unique_ptr<ActionBase>(new UpdateCollection(f,a)); }},
};
Action::Action(
@ -120,8 +120,8 @@ void Action::create(
_action = (factory != factories.end()) ?
factory->second(feature, description) :
std::unique_ptr<ActionBase>(new NonAction(feature, description));
}
ActionDescription const& Action::describe() const {

View File

@ -51,6 +51,11 @@ public:
/// @brief construct with description
Action(MaintenanceFeature&, std::shared_ptr<ActionDescription> const&);
Action(Action const&) = delete;
Action(Action &&) = delete;
Action() = delete;
Action& operator=(Action const&) = delete;
/**
* @brief construct with concrete action base
* @param feature Maintenance feature

View File

@ -32,12 +32,12 @@ namespace maintenance {
MaintenanceWorker::MaintenanceWorker(
arangodb::MaintenanceFeature& feature,
std::unordered_set<std::string> const& labels)
std::unordered_set<std::string> const& labels)
: Thread("MaintenanceWorker"), _feature(feature), _curAction(nullptr),
_loopState(eFIND_ACTION), _directAction(false), _labels(labels) {
return;
} // MaintenanceWorker::MaintenanceWorker
@ -57,31 +57,59 @@ void MaintenanceWorker::run() {
while(eSTOP != _loopState && !_feature.isShuttingDown()){
switch(_loopState) {
case eFIND_ACTION:
_curAction = _feature.findReadyAction(_labels);
more = (bool)_curAction;
break;
try {
switch(_loopState) {
case eFIND_ACTION:
_curAction = _feature.findReadyAction(_labels);
more = (bool)_curAction;
break;
case eRUN_FIRST:
_curAction->startStats();
more = _curAction->first();
break;
case eRUN_FIRST:
_curAction->startStats();
more = _curAction->first();
break;
case eRUN_NEXT:
more = _curAction->next();
break;
case eRUN_NEXT:
more = _curAction->next();
break;
default:
_loopState = eSTOP;
default:
_loopState = eSTOP;
LOG_TOPIC(ERR, Logger::CLUSTER)
<< "MaintenanceWorkerRun: unexpected state (" << _loopState << ")";
} // switch
} catch(std::exception const& ex) {
if (_curAction) {
LOG_TOPIC(ERR, Logger::CLUSTER)
<< "MaintenanceWorkerRun: unexpected state (" << _loopState << ")";
<< "MaintenanceWorkerRun: caught exception (" << ex.what() << ")"
<< " state:" << _loopState
<< " action:" << *_curAction;
} // switch
_curAction->setState(FAILED);
} else {
LOG_TOPIC(ERR, Logger::CLUSTER)
<< "MaintenanceWorkerRun: caught exception (" << ex.what() << ")"
<< " state:" << _loopState;
}
} catch(...) {
if (_curAction) {
LOG_TOPIC(ERR, Logger::CLUSTER)
<< "MaintenanceWorkerRun: caught error, state: " << _loopState
<< " state:" << _loopState
<< " action:" << *_curAction;
_curAction->setState(FAILED);
} else {
LOG_TOPIC(ERR, Logger::CLUSTER)
<< "MaintenanceWorkerRun: caught error, state: " << _loopState
<< " state:" << _loopState;
}
}
// determine next loop state
nextState(more);
} // while
} // MaintenanceWorker::run
@ -145,7 +173,6 @@ void MaintenanceWorker::nextState(bool actionMore) {
} // else
} else {
std::shared_ptr<Action> failAction(_curAction);
// fail all actions that would follow
do {
failAction->setState(FAILED);