mirror of https://gitee.com/bigwinds/arangodb
Catch all exceptions to prevent maintenance workers from crashing. (#6645)
* Catch all exceptions to prevent maintenance workers from crashing. * Please don't free this. * Unified code paths. * Remove dub comment. * Removed debug output. * Deleted unneeded constructors. * Assignment operator deleted.
This commit is contained in:
parent
02d94ea602
commit
14d1487710
|
@ -974,8 +974,6 @@ void Supervision::cleanupLostCollections(Node const& snapshot, AgentInterface *a
|
|||
|
||||
auto const& trx = builder->slice();
|
||||
|
||||
LOG_TOPIC(ERR, Logger::FIXME) << "Trx: " << trx.toJson();
|
||||
|
||||
if(trx.length() > 0) {
|
||||
// do it! fire and forget!
|
||||
agent->write(builder);
|
||||
|
|
|
@ -65,7 +65,7 @@ static factories_t const factories = factories_t {
|
|||
|
||||
{DROP_INDEX,
|
||||
[](MaintenanceFeature& f,ActionDescription const& a) {
|
||||
return std::unique_ptr<ActionBase>(new DropIndex(f,a)); }},
|
||||
return std::unique_ptr<ActionBase>(new DropIndex(f,a)); }},
|
||||
|
||||
{ENSURE_INDEX,
|
||||
[](MaintenanceFeature& f,ActionDescription const& a) {
|
||||
|
@ -77,12 +77,12 @@ static factories_t const factories = factories_t {
|
|||
|
||||
{SYNCHRONIZE_SHARD,
|
||||
[](MaintenanceFeature& f,ActionDescription const& a) {
|
||||
return std::unique_ptr<ActionBase>(new SynchronizeShard(f,a)); }},
|
||||
return std::unique_ptr<ActionBase>(new SynchronizeShard(f,a)); }},
|
||||
|
||||
{UPDATE_COLLECTION,
|
||||
[](MaintenanceFeature& f,ActionDescription const& a) {
|
||||
return std::unique_ptr<ActionBase>(new UpdateCollection(f,a)); }},
|
||||
|
||||
|
||||
};
|
||||
|
||||
Action::Action(
|
||||
|
@ -120,8 +120,8 @@ void Action::create(
|
|||
_action = (factory != factories.end()) ?
|
||||
factory->second(feature, description) :
|
||||
std::unique_ptr<ActionBase>(new NonAction(feature, description));
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
ActionDescription const& Action::describe() const {
|
||||
|
|
|
@ -51,6 +51,11 @@ public:
|
|||
/// @brief construct with description
|
||||
Action(MaintenanceFeature&, std::shared_ptr<ActionDescription> const&);
|
||||
|
||||
Action(Action const&) = delete;
|
||||
Action(Action &&) = delete;
|
||||
Action() = delete;
|
||||
Action& operator=(Action const&) = delete;
|
||||
|
||||
/**
|
||||
* @brief construct with concrete action base
|
||||
* @param feature Maintenance feature
|
||||
|
|
|
@ -32,12 +32,12 @@ namespace maintenance {
|
|||
|
||||
MaintenanceWorker::MaintenanceWorker(
|
||||
arangodb::MaintenanceFeature& feature,
|
||||
std::unordered_set<std::string> const& labels)
|
||||
std::unordered_set<std::string> const& labels)
|
||||
: Thread("MaintenanceWorker"), _feature(feature), _curAction(nullptr),
|
||||
_loopState(eFIND_ACTION), _directAction(false), _labels(labels) {
|
||||
|
||||
return;
|
||||
|
||||
|
||||
} // MaintenanceWorker::MaintenanceWorker
|
||||
|
||||
|
||||
|
@ -57,31 +57,59 @@ void MaintenanceWorker::run() {
|
|||
|
||||
while(eSTOP != _loopState && !_feature.isShuttingDown()){
|
||||
|
||||
switch(_loopState) {
|
||||
case eFIND_ACTION:
|
||||
_curAction = _feature.findReadyAction(_labels);
|
||||
more = (bool)_curAction;
|
||||
break;
|
||||
try {
|
||||
switch(_loopState) {
|
||||
case eFIND_ACTION:
|
||||
_curAction = _feature.findReadyAction(_labels);
|
||||
more = (bool)_curAction;
|
||||
break;
|
||||
|
||||
case eRUN_FIRST:
|
||||
_curAction->startStats();
|
||||
more = _curAction->first();
|
||||
break;
|
||||
case eRUN_FIRST:
|
||||
_curAction->startStats();
|
||||
more = _curAction->first();
|
||||
break;
|
||||
|
||||
case eRUN_NEXT:
|
||||
more = _curAction->next();
|
||||
break;
|
||||
case eRUN_NEXT:
|
||||
more = _curAction->next();
|
||||
break;
|
||||
|
||||
default:
|
||||
_loopState = eSTOP;
|
||||
default:
|
||||
_loopState = eSTOP;
|
||||
LOG_TOPIC(ERR, Logger::CLUSTER)
|
||||
<< "MaintenanceWorkerRun: unexpected state (" << _loopState << ")";
|
||||
|
||||
} // switch
|
||||
|
||||
} catch(std::exception const& ex) {
|
||||
if (_curAction) {
|
||||
LOG_TOPIC(ERR, Logger::CLUSTER)
|
||||
<< "MaintenanceWorkerRun: unexpected state (" << _loopState << ")";
|
||||
<< "MaintenanceWorkerRun: caught exception (" << ex.what() << ")"
|
||||
<< " state:" << _loopState
|
||||
<< " action:" << *_curAction;
|
||||
|
||||
} // switch
|
||||
_curAction->setState(FAILED);
|
||||
} else {
|
||||
LOG_TOPIC(ERR, Logger::CLUSTER)
|
||||
<< "MaintenanceWorkerRun: caught exception (" << ex.what() << ")"
|
||||
<< " state:" << _loopState;
|
||||
}
|
||||
} catch(...) {
|
||||
if (_curAction) {
|
||||
LOG_TOPIC(ERR, Logger::CLUSTER)
|
||||
<< "MaintenanceWorkerRun: caught error, state: " << _loopState
|
||||
<< " state:" << _loopState
|
||||
<< " action:" << *_curAction;
|
||||
|
||||
_curAction->setState(FAILED);
|
||||
} else {
|
||||
LOG_TOPIC(ERR, Logger::CLUSTER)
|
||||
<< "MaintenanceWorkerRun: caught error, state: " << _loopState
|
||||
<< " state:" << _loopState;
|
||||
}
|
||||
}
|
||||
|
||||
// determine next loop state
|
||||
nextState(more);
|
||||
|
||||
} // while
|
||||
|
||||
} // MaintenanceWorker::run
|
||||
|
@ -145,7 +173,6 @@ void MaintenanceWorker::nextState(bool actionMore) {
|
|||
} // else
|
||||
} else {
|
||||
std::shared_ptr<Action> failAction(_curAction);
|
||||
|
||||
// fail all actions that would follow
|
||||
do {
|
||||
failAction->setState(FAILED);
|
||||
|
|
Loading…
Reference in New Issue