mirror of https://gitee.com/bigwinds/arangodb
[3.5] Check scheduler queue return value (#9759)
* Add to cmake. * Backport changes from devel. * added CHANGELOG entry, port adjustments from devel
This commit is contained in:
parent
3dcc293224
commit
663212ba19
11
CHANGELOG
11
CHANGELOG
|
@ -1,6 +1,17 @@
|
||||||
v3.5.1 (XXXX-XX-XX)
|
v3.5.1 (XXXX-XX-XX)
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
|
* Consistently honor the return value of all attempts to queue tasks in the
|
||||||
|
internal scheduler.
|
||||||
|
|
||||||
|
Previously some call sites did not check the return value of internal queueing
|
||||||
|
operations, and if the scheduler queue was full, operations that were thought
|
||||||
|
to be requeued were silently dropped. Now, there will be reactions on such
|
||||||
|
failures. Requeuing an important task with a time offset (Scheduler::queueDelay)
|
||||||
|
is now also retried on failure (queue full) up to at most five minutes. If after
|
||||||
|
five minutes such a task still cannot be queued, a fatal error will be logged
|
||||||
|
and the server process will be aborted.
|
||||||
|
|
||||||
* Made index selection much more deterministic in case there are
|
* Made index selection much more deterministic in case there are
|
||||||
multiple competing indexes.
|
multiple competing indexes.
|
||||||
|
|
||||||
|
|
|
@ -70,6 +70,5 @@ bool SharedQueryState::executeContinueCallback() const {
|
||||||
}
|
}
|
||||||
// do NOT use scheduler->post(), can have high latency that
|
// do NOT use scheduler->post(), can have high latency that
|
||||||
// then backs up libcurl callbacks to other objects
|
// then backs up libcurl callbacks to other objects
|
||||||
scheduler->queue(RequestLane::CLIENT_AQL, _continueCallback);
|
return scheduler->queue(RequestLane::CLIENT_AQL, _continueCallback);
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -101,8 +101,11 @@ void CacheManagerFeature::start() {
|
||||||
|
|
||||||
auto scheduler = SchedulerFeature::SCHEDULER;
|
auto scheduler = SchedulerFeature::SCHEDULER;
|
||||||
auto postFn = [scheduler](std::function<void()> fn) -> bool {
|
auto postFn = [scheduler](std::function<void()> fn) -> bool {
|
||||||
scheduler->queue(RequestLane::INTERNAL_LOW, fn);
|
try {
|
||||||
return true;
|
return scheduler->queue(RequestLane::INTERNAL_LOW, fn);
|
||||||
|
} catch (...) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
_manager.reset(new Manager(postFn, _cacheSize));
|
_manager.reset(new Manager(postFn, _cacheSize));
|
||||||
MANAGER = _manager.get();
|
MANAGER = _manager.get();
|
||||||
|
|
|
@ -20,6 +20,9 @@
|
||||||
/// @author Simon Grätzer
|
/// @author Simon Grätzer
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
#include "Conductor.h"
|
#include "Conductor.h"
|
||||||
|
|
||||||
#include "Pregel/Aggregator.h"
|
#include "Pregel/Aggregator.h"
|
||||||
|
@ -30,6 +33,7 @@
|
||||||
#include "Pregel/Recovery.h"
|
#include "Pregel/Recovery.h"
|
||||||
#include "Pregel/Utils.h"
|
#include "Pregel/Utils.h"
|
||||||
|
|
||||||
|
#include "Basics/FunctionUtils.h"
|
||||||
#include "Basics/MutexLocker.h"
|
#include "Basics/MutexLocker.h"
|
||||||
#include "Basics/StringUtils.h"
|
#include "Basics/StringUtils.h"
|
||||||
#include "Basics/VelocyPackHelper.h"
|
#include "Basics/VelocyPackHelper.h"
|
||||||
|
@ -305,7 +309,7 @@ VPackBuilder Conductor::finishedWorkerStep(VPackSlice const& data) {
|
||||||
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
||||||
// don't block the response for workers waiting on this callback
|
// don't block the response for workers waiting on this callback
|
||||||
// this should allow workers to go into the IDLE state
|
// this should allow workers to go into the IDLE state
|
||||||
scheduler->queue(RequestLane::INTERNAL_LOW, [this] {
|
bool queued = scheduler->queue(RequestLane::INTERNAL_LOW, [this] {
|
||||||
MUTEX_LOCKER(guard, _callbackMutex);
|
MUTEX_LOCKER(guard, _callbackMutex);
|
||||||
|
|
||||||
if (_state == ExecutionState::RUNNING) {
|
if (_state == ExecutionState::RUNNING) {
|
||||||
|
@ -319,6 +323,11 @@ VPackBuilder Conductor::finishedWorkerStep(VPackSlice const& data) {
|
||||||
<< "No further action taken after receiving all responses";
|
<< "No further action taken after receiving all responses";
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
if (!queued) {
|
||||||
|
LOG_TOPIC("038db", ERR, Logger::PREGEL)
|
||||||
|
<< "No thread available to queue response, canceling execution";
|
||||||
|
cancel();
|
||||||
|
}
|
||||||
return VPackBuilder();
|
return VPackBuilder();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -389,7 +398,18 @@ void Conductor::cancel() {
|
||||||
void Conductor::cancelNoLock() {
|
void Conductor::cancelNoLock() {
|
||||||
_callbackMutex.assertLockedByCurrentThread();
|
_callbackMutex.assertLockedByCurrentThread();
|
||||||
_state = ExecutionState::CANCELED;
|
_state = ExecutionState::CANCELED;
|
||||||
_finalizeWorkers();
|
bool ok;
|
||||||
|
int res;
|
||||||
|
std::tie(ok, res) = basics::function_utils::retryUntilTimeout<int>(
|
||||||
|
[this]() -> std::pair<bool, int> {
|
||||||
|
int res = _finalizeWorkers();
|
||||||
|
return std::make_pair(res != TRI_ERROR_QUEUE_FULL, res);
|
||||||
|
},
|
||||||
|
Logger::PREGEL, "cancel worker execution");
|
||||||
|
if (!ok) {
|
||||||
|
LOG_TOPIC("f8b3c", ERR, Logger::PREGEL)
|
||||||
|
<< "Failed to cancel worker execution for five minutes, giving up.";
|
||||||
|
}
|
||||||
_workHandle.reset();
|
_workHandle.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -412,7 +432,8 @@ void Conductor::startRecovery() {
|
||||||
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
||||||
|
|
||||||
// let's wait for a final state in the cluster
|
// let's wait for a final state in the cluster
|
||||||
_workHandle = SchedulerFeature::SCHEDULER->queueDelay(
|
bool queued = false;
|
||||||
|
std::tie(queued, _workHandle) = SchedulerFeature::SCHEDULER->queueDelay(
|
||||||
RequestLane::CLUSTER_AQL, std::chrono::seconds(2), [this](bool cancelled) {
|
RequestLane::CLUSTER_AQL, std::chrono::seconds(2), [this](bool cancelled) {
|
||||||
if (cancelled || _state != ExecutionState::RECOVERING) {
|
if (cancelled || _state != ExecutionState::RECOVERING) {
|
||||||
return; // seems like we are canceled
|
return; // seems like we are canceled
|
||||||
|
@ -460,6 +481,10 @@ void Conductor::startRecovery() {
|
||||||
LOG_TOPIC("fefc6", ERR, Logger::PREGEL) << "Compensation failed";
|
LOG_TOPIC("fefc6", ERR, Logger::PREGEL) << "Compensation failed";
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
if (!queued) {
|
||||||
|
LOG_TOPIC("92a8d", ERR, Logger::PREGEL)
|
||||||
|
<< "No thread available to queue recovery, may be in dirty state.";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// resolves into an ordered list of shards for each collection on each server
|
// resolves into an ordered list of shards for each collection on each server
|
||||||
|
@ -691,12 +716,17 @@ void Conductor::finishedWorkerFinalize(VPackSlice data) {
|
||||||
auto* scheduler = SchedulerFeature::SCHEDULER;
|
auto* scheduler = SchedulerFeature::SCHEDULER;
|
||||||
if (scheduler) {
|
if (scheduler) {
|
||||||
uint64_t exe = _executionNumber;
|
uint64_t exe = _executionNumber;
|
||||||
scheduler->queue(RequestLane::CLUSTER_AQL, [exe] {
|
bool queued = scheduler->queue(RequestLane::CLUSTER_AQL, [exe] {
|
||||||
auto pf = PregelFeature::instance();
|
auto pf = PregelFeature::instance();
|
||||||
if (pf) {
|
if (pf) {
|
||||||
pf->cleanupConductor(exe);
|
pf->cleanupConductor(exe);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
if (!queued) {
|
||||||
|
LOG_TOPIC("038da", ERR, Logger::PREGEL)
|
||||||
|
<< "No thread available to queue cleanup, canceling execution";
|
||||||
|
cancel();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -766,7 +796,7 @@ int Conductor::_sendToAllDBServers(std::string const& path, VPackBuilder const&
|
||||||
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
||||||
uint64_t exe = _executionNumber;
|
uint64_t exe = _executionNumber;
|
||||||
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
||||||
scheduler->queue(RequestLane::INTERNAL_LOW, [path, message, exe] {
|
bool queued = scheduler->queue(RequestLane::INTERNAL_LOW, [path, message, exe] {
|
||||||
auto pf = PregelFeature::instance();
|
auto pf = PregelFeature::instance();
|
||||||
if (!pf) {
|
if (!pf) {
|
||||||
return;
|
return;
|
||||||
|
@ -778,6 +808,9 @@ int Conductor::_sendToAllDBServers(std::string const& path, VPackBuilder const&
|
||||||
PregelFeature::handleWorkerRequest(vocbase, path, message.slice(), response);
|
PregelFeature::handleWorkerRequest(vocbase, path, message.slice(), response);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
if (!queued) {
|
||||||
|
return TRI_ERROR_QUEUE_FULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return TRI_ERROR_NO_ERROR;
|
return TRI_ERROR_NO_ERROR;
|
||||||
}
|
}
|
||||||
|
|
|
@ -129,16 +129,25 @@ void GraphStore<V, E>::loadShards(WorkerConfig* config,
|
||||||
_runningThreads++;
|
_runningThreads++;
|
||||||
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
||||||
TRI_ASSERT(scheduler);
|
TRI_ASSERT(scheduler);
|
||||||
scheduler->queue(RequestLane::INTERNAL_LOW,
|
bool queued =
|
||||||
[this, vertexShard, edges] {
|
scheduler->queue(RequestLane::INTERNAL_LOW, [this, vertexShard, edges] {
|
||||||
TRI_DEFER(_runningThreads--); // exception safe
|
TRI_DEFER(_runningThreads--); // exception safe
|
||||||
try {
|
try {
|
||||||
_loadVertices(vertexShard, edges);
|
_loadVertices(vertexShard, edges);
|
||||||
} catch (std::exception const& ex) {
|
} catch (std::exception const& ex) {
|
||||||
LOG_TOPIC("c87c9", WARN, Logger::PREGEL) << "caught exception while "
|
LOG_TOPIC("c87c9", WARN, Logger::PREGEL)
|
||||||
|
<< "caught exception while "
|
||||||
<< "loading pregel graph: " << ex.what();
|
<< "loading pregel graph: " << ex.what();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
if (!queued) {
|
||||||
|
LOG_TOPIC("38da2", WARN, Logger::PREGEL)
|
||||||
|
<< "No thread available to queue vertex loading";
|
||||||
|
}
|
||||||
|
} catch (basics::Exception const& ex) {
|
||||||
|
LOG_TOPIC("3f283", WARN, Logger::PREGEL)
|
||||||
|
<< "unhandled exception while "
|
||||||
|
<< "loading pregel graph: " << ex.what();
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
LOG_TOPIC("3f282", WARN, Logger::PREGEL) << "unhandled exception while "
|
LOG_TOPIC("3f282", WARN, Logger::PREGEL) << "unhandled exception while "
|
||||||
<< "loading pregel graph";
|
<< "loading pregel graph";
|
||||||
|
@ -151,7 +160,12 @@ void GraphStore<V, E>::loadShards(WorkerConfig* config,
|
||||||
}
|
}
|
||||||
|
|
||||||
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
||||||
scheduler->queue(RequestLane::INTERNAL_LOW, cb);
|
bool queued = scheduler->queue(RequestLane::INTERNAL_LOW, cb);
|
||||||
|
if (!queued) {
|
||||||
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUEUE_FULL,
|
||||||
|
"No thread available to queue callback, "
|
||||||
|
"canceling execution");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename V, typename E>
|
template <typename V, typename E>
|
||||||
|
@ -563,7 +577,7 @@ void GraphStore<V, E>::storeResults(WorkerConfig* config,
|
||||||
numT << " threads";
|
numT << " threads";
|
||||||
|
|
||||||
for (size_t i = 0; i < numT; i++) {
|
for (size_t i = 0; i < numT; i++) {
|
||||||
SchedulerFeature::SCHEDULER->queue(RequestLane::INTERNAL_LOW, [=]{
|
bool queued = SchedulerFeature::SCHEDULER->queue(RequestLane::INTERNAL_LOW, [=] {
|
||||||
size_t startI = i * (numSegments / numT);
|
size_t startI = i * (numSegments / numT);
|
||||||
size_t endI = (i + 1) * (numSegments / numT);
|
size_t endI = (i + 1) * (numSegments / numT);
|
||||||
TRI_ASSERT(endI <= numSegments);
|
TRI_ASSERT(endI <= numSegments);
|
||||||
|
@ -584,6 +598,11 @@ void GraphStore<V, E>::storeResults(WorkerConfig* config,
|
||||||
cb();
|
cb();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
if (!queued) {
|
||||||
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUEUE_FULL,
|
||||||
|
"No thread available to queue vertex "
|
||||||
|
"storage, canceling execution");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -308,10 +308,14 @@ void PregelFeature::cleanupWorker(uint64_t executionNumber) {
|
||||||
// unmapping etc might need a few seconds
|
// unmapping etc might need a few seconds
|
||||||
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
||||||
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
||||||
scheduler->queue(RequestLane::INTERNAL_LOW, [this, executionNumber, instance] {
|
bool queued = scheduler->queue(RequestLane::INTERNAL_LOW, [this, executionNumber, instance] {
|
||||||
MUTEX_LOCKER(guard, _mutex);
|
MUTEX_LOCKER(guard, _mutex);
|
||||||
_workers.erase(executionNumber);
|
_workers.erase(executionNumber);
|
||||||
});
|
});
|
||||||
|
if (!queued) {
|
||||||
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUEUE_FULL,
|
||||||
|
"No thread available to queue cleanup.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PregelFeature::cleanupAll() {
|
void PregelFeature::cleanupAll() {
|
||||||
|
|
|
@ -143,8 +143,13 @@ void RecoveryManager::updatedFailedServers(std::vector<ServerID> const& failed)
|
||||||
|
|
||||||
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
||||||
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
||||||
scheduler->queue(RequestLane::INTERNAL_LOW,
|
bool queued = scheduler->queue(RequestLane::INTERNAL_LOW, [this, shard] {
|
||||||
[this, shard] { _renewPrimaryServer(shard); });
|
_renewPrimaryServer(shard);
|
||||||
|
});
|
||||||
|
if (!queued) {
|
||||||
|
LOG_TOPIC("038de", ERR, Logger::PREGEL)
|
||||||
|
<< "No thread available to queue pregel recovery manager request";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -166,9 +166,13 @@ void Worker<V, E, M>::setupWorker() {
|
||||||
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
||||||
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
||||||
auto self = shared_from_this();
|
auto self = shared_from_this();
|
||||||
scheduler->queue(RequestLane::INTERNAL_LOW, [self, this, cb] {
|
bool queued = scheduler->queue(RequestLane::INTERNAL_LOW, [self, this, cb] {
|
||||||
_graphStore->loadShards(&_config, cb);
|
_graphStore->loadShards(&_config, cb);
|
||||||
});
|
});
|
||||||
|
if (!queued) {
|
||||||
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUEUE_FULL,
|
||||||
|
"No available thread to load shards");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -329,7 +333,7 @@ void Worker<V, E, M>::_startProcessing() {
|
||||||
|
|
||||||
auto self = shared_from_this();
|
auto self = shared_from_this();
|
||||||
for (size_t i = 0; i < numT; i++) {
|
for (size_t i = 0; i < numT; i++) {
|
||||||
scheduler->queue(RequestLane::INTERNAL_LOW, [self, this, i, numT, numSegments] {
|
bool queued = scheduler->queue(RequestLane::INTERNAL_LOW, [self, this, i, numT, numSegments] {
|
||||||
if (_state != WorkerState::COMPUTING) {
|
if (_state != WorkerState::COMPUTING) {
|
||||||
LOG_TOPIC("f0e3d", WARN, Logger::PREGEL) << "Execution aborted prematurely.";
|
LOG_TOPIC("f0e3d", WARN, Logger::PREGEL) << "Execution aborted prematurely.";
|
||||||
return;
|
return;
|
||||||
|
@ -344,6 +348,10 @@ void Worker<V, E, M>::_startProcessing() {
|
||||||
_finishedProcessing(); // last thread turns the lights out
|
_finishedProcessing(); // last thread turns the lights out
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
if (!queued) {
|
||||||
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUEUE_FULL,
|
||||||
|
"No thread available to start processing");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TRI_ASSERT(_runningThreads == i);
|
// TRI_ASSERT(_runningThreads == i);
|
||||||
|
@ -565,7 +573,8 @@ void Worker<V, E, M>::_continueAsync() {
|
||||||
// wait for new messages before beginning to process
|
// wait for new messages before beginning to process
|
||||||
int64_t milli = _writeCache->containedMessageCount() < _messageBatchSize ? 50 : 5;
|
int64_t milli = _writeCache->containedMessageCount() < _messageBatchSize ? 50 : 5;
|
||||||
// start next iteration in $milli mseconds.
|
// start next iteration in $milli mseconds.
|
||||||
_workHandle = SchedulerFeature::SCHEDULER->queueDelay(
|
bool queued = false;
|
||||||
|
std::tie(queued, _workHandle) = SchedulerFeature::SCHEDULER->queueDelay(
|
||||||
RequestLane::INTERNAL_LOW, std::chrono::milliseconds(milli), [this](bool cancelled) {
|
RequestLane::INTERNAL_LOW, std::chrono::milliseconds(milli), [this](bool cancelled) {
|
||||||
if (!cancelled) {
|
if (!cancelled) {
|
||||||
{ // swap these pointers atomically
|
{ // swap these pointers atomically
|
||||||
|
@ -583,6 +592,10 @@ void Worker<V, E, M>::_continueAsync() {
|
||||||
_startProcessing();
|
_startProcessing();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
if (!queued) {
|
||||||
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
||||||
|
TRI_ERROR_QUEUE_FULL, "No thread available to continue execution.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
|
@ -703,7 +716,7 @@ void Worker<V, E, M>::compensateStep(VPackSlice const& data) {
|
||||||
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
||||||
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
||||||
auto self = shared_from_this();
|
auto self = shared_from_this();
|
||||||
scheduler->queue(RequestLane::INTERNAL_LOW, [self, this] {
|
bool queued = scheduler->queue(RequestLane::INTERNAL_LOW, [self, this] {
|
||||||
if (_state != WorkerState::RECOVERING) {
|
if (_state != WorkerState::RECOVERING) {
|
||||||
LOG_TOPIC("554e2", WARN, Logger::PREGEL) << "Compensation aborted prematurely.";
|
LOG_TOPIC("554e2", WARN, Logger::PREGEL) << "Compensation aborted prematurely.";
|
||||||
return;
|
return;
|
||||||
|
@ -740,6 +753,10 @@ void Worker<V, E, M>::compensateStep(VPackSlice const& data) {
|
||||||
package.close();
|
package.close();
|
||||||
_callConductor(Utils::finishedRecoveryPath, package);
|
_callConductor(Utils::finishedRecoveryPath, package);
|
||||||
});
|
});
|
||||||
|
if (!queued) {
|
||||||
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
||||||
|
TRI_ERROR_QUEUE_FULL, "No thread available to queue compensation.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
|
@ -768,10 +785,14 @@ void Worker<V, E, M>::_callConductor(std::string const& path, VPackBuilder const
|
||||||
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
|
||||||
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
||||||
auto self = shared_from_this();
|
auto self = shared_from_this();
|
||||||
scheduler->queue(RequestLane::INTERNAL_LOW, [self, path, message] {
|
bool queued = scheduler->queue(RequestLane::INTERNAL_LOW, [self, path, message] {
|
||||||
VPackBuilder response;
|
VPackBuilder response;
|
||||||
PregelFeature::handleConductorRequest(path, message.slice(), response);
|
PregelFeature::handleConductorRequest(path, message.slice(), response);
|
||||||
});
|
});
|
||||||
|
if (!queued) {
|
||||||
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUEUE_FULL,
|
||||||
|
"No thread available to call conductor");
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
std::shared_ptr<ClusterComm> cc = ClusterComm::instance();
|
std::shared_ptr<ClusterComm> cc = ClusterComm::instance();
|
||||||
std::string baseUrl = Utils::baseUrl(_config.database(), Utils::conductorPrefix);
|
std::string baseUrl = Utils::baseUrl(_config.database(), Utils::conductorPrefix);
|
||||||
|
|
|
@ -22,6 +22,10 @@
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#include "InitialSyncer.h"
|
#include "InitialSyncer.h"
|
||||||
|
|
||||||
|
#include "Basics/FunctionUtils.h"
|
||||||
|
#include "Basics/application-exit.h"
|
||||||
|
#include "Logger/LogMacros.h"
|
||||||
#include "Scheduler/Scheduler.h"
|
#include "Scheduler/Scheduler.h"
|
||||||
#include "Scheduler/SchedulerFeature.h"
|
#include "Scheduler/SchedulerFeature.h"
|
||||||
|
|
||||||
|
@ -56,19 +60,35 @@ void InitialSyncer::startRecurringBatchExtension() {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::weak_ptr<Syncer> self(shared_from_this());
|
std::weak_ptr<Syncer> self(shared_from_this());
|
||||||
_batchPingTimer = SchedulerFeature::SCHEDULER->queueDelay(
|
bool queued = false;
|
||||||
RequestLane::SERVER_REPLICATION, std::chrono::seconds(secs), [self](bool cancelled) {
|
std::tie(queued, _batchPingTimer) =
|
||||||
|
basics::function_utils::retryUntilTimeout<Scheduler::WorkHandle>(
|
||||||
|
[secs, self]() -> std::pair<bool, Scheduler::WorkHandle> {
|
||||||
|
return SchedulerFeature::SCHEDULER->queueDelay(
|
||||||
|
RequestLane::SERVER_REPLICATION, std::chrono::seconds(secs),
|
||||||
|
[self](bool cancelled) {
|
||||||
if (!cancelled) {
|
if (!cancelled) {
|
||||||
auto syncer = self.lock();
|
auto syncer = self.lock();
|
||||||
if (syncer) {
|
if (syncer) {
|
||||||
auto* s = static_cast<InitialSyncer*>(syncer.get());
|
auto* s = static_cast<InitialSyncer*>(syncer.get());
|
||||||
if (s->_batch.id != 0 && !s->isAborted()) {
|
if (s->_batch.id != 0 && !s->isAborted()) {
|
||||||
s->_batch.extend(s->_state.connection, s->_progress, s->_state.syncerId);
|
s->_batch.extend(s->_state.connection, s->_progress,
|
||||||
|
s->_state.syncerId);
|
||||||
s->startRecurringBatchExtension();
|
s->startRecurringBatchExtension();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
},
|
||||||
|
Logger::REPLICATION, "queue batch extension");
|
||||||
|
if (!queued) {
|
||||||
|
LOG_TOPIC("f8b3e", ERR, Logger::REPLICATION)
|
||||||
|
<< "Failed to queue replication batch extension for 5 minutes, exiting.";
|
||||||
|
// don't abort, as this is not a critical error
|
||||||
|
// if requeueing has failed here, the replication can still go on, but
|
||||||
|
// it _may_ fail later because the batch has expired on the leader.
|
||||||
|
// but there are still chances it can continue successfully
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace arangodb
|
} // namespace arangodb
|
||||||
|
|
|
@ -91,23 +91,23 @@ RestStatus RestShutdownHandler::execute() {
|
||||||
clusterFeature->setUnregisterOnShutdown(true);
|
clusterFeature->setUnregisterOnShutdown(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
|
||||||
VPackBuilder result;
|
|
||||||
result.add(VPackValue("OK"));
|
|
||||||
generateResult(rest::ResponseCode::OK, result.slice());
|
|
||||||
} catch (...) {
|
|
||||||
// Ignore the error
|
|
||||||
}
|
|
||||||
auto self = shared_from_this();
|
auto self = shared_from_this();
|
||||||
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
||||||
// don't block the response for workers waiting on this callback
|
// don't block the response for workers waiting on this callback
|
||||||
// this should allow workers to go into the IDLE state
|
// this should allow workers to go into the IDLE state
|
||||||
scheduler->queue(RequestLane::CLUSTER_INTERNAL, [self] {
|
bool queued = scheduler->queue(RequestLane::CLUSTER_INTERNAL, [self] {
|
||||||
// Give the server 2 seconds to send the reply:
|
// Give the server 2 seconds to send the reply:
|
||||||
std::this_thread::sleep_for(std::chrono::seconds(2));
|
std::this_thread::sleep_for(std::chrono::seconds(2));
|
||||||
// Go down:
|
// Go down:
|
||||||
ApplicationServer::server->beginShutdown();
|
ApplicationServer::server->beginShutdown();
|
||||||
});
|
});
|
||||||
|
if (queued) {
|
||||||
|
VPackBuilder result;
|
||||||
|
result.add(VPackValue("OK"));
|
||||||
|
generateResult(rest::ResponseCode::OK, result.slice());
|
||||||
|
} else {
|
||||||
|
generateError(rest::ResponseCode::SERVER_ERROR, TRI_ERROR_QUEUE_FULL);
|
||||||
|
}
|
||||||
|
|
||||||
return RestStatus::DONE;
|
return RestStatus::DONE;
|
||||||
}
|
}
|
||||||
|
|
|
@ -143,14 +143,15 @@ void Scheduler::runCronThread() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Scheduler::WorkHandle Scheduler::queueDelay(RequestLane lane, clock::duration delay,
|
std::pair<bool, Scheduler::WorkHandle> Scheduler::queueDelay(
|
||||||
std::function<void(bool cancelled)> handler) {
|
RequestLane lane, clock::duration delay, std::function<void(bool cancelled)> handler) {
|
||||||
TRI_ASSERT(!isStopping());
|
TRI_ASSERT(!isStopping());
|
||||||
|
|
||||||
if (delay < std::chrono::milliseconds(1)) {
|
if (delay < std::chrono::milliseconds(1)) {
|
||||||
// execute directly
|
// execute directly
|
||||||
|
bool queued =
|
||||||
queue(lane, [handler = std::move(handler)]() { handler(false); });
|
queue(lane, [handler = std::move(handler)]() { handler(false); });
|
||||||
return nullptr;
|
return std::make_pair(queued, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto item = std::make_shared<WorkItem>(std::move(handler), lane, this);
|
auto item = std::make_shared<WorkItem>(std::move(handler), lane, this);
|
||||||
|
@ -164,8 +165,9 @@ Scheduler::WorkHandle Scheduler::queueDelay(RequestLane lane, clock::duration de
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return item;
|
return std::make_pair(true, item);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
void Scheduler::cancelAllTasks() {
|
void Scheduler::cancelAllTasks() {
|
||||||
//std::unique_lock<std::mutex> guard(_cronQueueMutex);
|
//std::unique_lock<std::mutex> guard(_cronQueueMutex);
|
||||||
|
|
|
@ -25,10 +25,13 @@
|
||||||
#ifndef ARANGOD_SCHEDULER_SCHEDULER_H
|
#ifndef ARANGOD_SCHEDULER_SCHEDULER_H
|
||||||
#define ARANGOD_SCHEDULER_SCHEDULER_H 1
|
#define ARANGOD_SCHEDULER_SCHEDULER_H 1
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
|
||||||
|
#include "Basics/Exceptions.h"
|
||||||
|
#include "Basics/system-compiler.h"
|
||||||
#include "GeneralServer/RequestLane.h"
|
#include "GeneralServer/RequestLane.h"
|
||||||
|
|
||||||
namespace arangodb {
|
namespace arangodb {
|
||||||
|
@ -37,6 +40,7 @@ namespace velocypack {
|
||||||
class Builder;
|
class Builder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class LogTopic;
|
||||||
class SchedulerThread;
|
class SchedulerThread;
|
||||||
class SchedulerCronThread;
|
class SchedulerCronThread;
|
||||||
|
|
||||||
|
@ -54,12 +58,15 @@ class Scheduler {
|
||||||
typedef std::shared_ptr<WorkItem> WorkHandle;
|
typedef std::shared_ptr<WorkItem> WorkHandle;
|
||||||
|
|
||||||
// Enqueues a task - this is implemented on the specific scheduler
|
// Enqueues a task - this is implemented on the specific scheduler
|
||||||
virtual bool queue(RequestLane lane, std::function<void()>, bool allowDirectHandling = false) = 0;
|
// May throw.
|
||||||
|
virtual bool queue(RequestLane lane, std::function<void()>,
|
||||||
|
bool allowDirectHandling = false) ADB_WARN_UNUSED_RESULT = 0;
|
||||||
|
|
||||||
// Enqueues a task after delay - this uses the queue functions above.
|
// Enqueues a task after delay - this uses the queue functions above.
|
||||||
// WorkHandle is a shared_ptr to a WorkItem. If all references the WorkItem
|
// WorkHandle is a shared_ptr to a WorkItem. If all references the WorkItem
|
||||||
// are dropped, the task is canceled.
|
// are dropped, the task is canceled. It will return true if queued, false
|
||||||
virtual WorkHandle queueDelay(RequestLane lane, clock::duration delay,
|
// otherwise.
|
||||||
|
virtual std::pair<bool, WorkHandle> queueDelay(RequestLane lane, clock::duration delay,
|
||||||
std::function<void(bool canceled)> handler);
|
std::function<void(bool canceled)> handler);
|
||||||
|
|
||||||
class WorkItem final {
|
class WorkItem final {
|
||||||
|
@ -96,9 +103,11 @@ class Scheduler {
|
||||||
// The following code moves the _handler into the Scheduler.
|
// The following code moves the _handler into the Scheduler.
|
||||||
// Thus any reference to class to self in the _handler will be released
|
// Thus any reference to class to self in the _handler will be released
|
||||||
// as soon as the scheduler executed the _handler lambda.
|
// as soon as the scheduler executed the _handler lambda.
|
||||||
_scheduler->queue(_lane, [handler = std::move(_handler), arg]() {
|
bool queued = _scheduler->queue(_lane, [handler = std::move(_handler),
|
||||||
handler(arg);
|
arg]() { handler(arg); });
|
||||||
});
|
if (!queued) {
|
||||||
|
THROW_ARANGO_EXCEPTION(TRI_ERROR_QUEUE_FULL);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||||
|
|
|
@ -23,12 +23,39 @@
|
||||||
#include "ManagerFeature.h"
|
#include "ManagerFeature.h"
|
||||||
|
|
||||||
#include "ApplicationFeatures/ApplicationServer.h"
|
#include "ApplicationFeatures/ApplicationServer.h"
|
||||||
|
#include "Basics/FunctionUtils.h"
|
||||||
#include "Basics/MutexLocker.h"
|
#include "Basics/MutexLocker.h"
|
||||||
|
#include "Basics/application-exit.h"
|
||||||
#include "Scheduler/SchedulerFeature.h"
|
#include "Scheduler/SchedulerFeature.h"
|
||||||
#include "StorageEngine/EngineSelectorFeature.h"
|
#include "StorageEngine/EngineSelectorFeature.h"
|
||||||
#include "StorageEngine/StorageEngine.h"
|
#include "StorageEngine/StorageEngine.h"
|
||||||
#include "Transaction/Manager.h"
|
#include "Transaction/Manager.h"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
void queueGarbageCollection(std::mutex& mutex, arangodb::Scheduler::WorkHandle& workItem,
|
||||||
|
std::function<void(bool)>& gcfunc) {
|
||||||
|
bool queued = false;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> guard(mutex);
|
||||||
|
std::tie(queued, workItem) =
|
||||||
|
arangodb::basics::function_utils::retryUntilTimeout<arangodb::Scheduler::WorkHandle>(
|
||||||
|
[&gcfunc]() -> std::pair<bool, arangodb::Scheduler::WorkHandle> {
|
||||||
|
auto off = std::chrono::seconds(1);
|
||||||
|
return arangodb::SchedulerFeature::SCHEDULER->queueDelay(arangodb::RequestLane::INTERNAL_LOW,
|
||||||
|
off, gcfunc);
|
||||||
|
},
|
||||||
|
arangodb::Logger::TRANSACTIONS,
|
||||||
|
"queue transaction garbage collection");
|
||||||
|
}
|
||||||
|
if (!queued) {
|
||||||
|
LOG_TOPIC("f8b3d", FATAL, arangodb::Logger::TRANSACTIONS)
|
||||||
|
<< "Failed to queue transaction garbage collection, for 5 minutes, "
|
||||||
|
"exiting.";
|
||||||
|
FATAL_ERROR_EXIT();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
using namespace arangodb::application_features;
|
using namespace arangodb::application_features;
|
||||||
using namespace arangodb::basics;
|
using namespace arangodb::basics;
|
||||||
using namespace arangodb::options;
|
using namespace arangodb::options;
|
||||||
|
@ -53,11 +80,8 @@ ManagerFeature::ManagerFeature(application_features::ApplicationServer& server)
|
||||||
|
|
||||||
MANAGER->garbageCollect(/*abortAll*/false);
|
MANAGER->garbageCollect(/*abortAll*/false);
|
||||||
|
|
||||||
auto off = std::chrono::seconds(1);
|
|
||||||
|
|
||||||
std::lock_guard<std::mutex> guard(_workItemMutex);
|
|
||||||
if (!ApplicationServer::isStopping()) {
|
if (!ApplicationServer::isStopping()) {
|
||||||
_workItem = SchedulerFeature::SCHEDULER->queueDelay(RequestLane::INTERNAL_LOW, off, _gcfunc);
|
::queueGarbageCollection(_workItemMutex, _workItem, _gcfunc);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -71,9 +95,7 @@ void ManagerFeature::prepare() {
|
||||||
void ManagerFeature::start() {
|
void ManagerFeature::start() {
|
||||||
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
Scheduler* scheduler = SchedulerFeature::SCHEDULER;
|
||||||
if (scheduler != nullptr) { // is nullptr in catch tests
|
if (scheduler != nullptr) { // is nullptr in catch tests
|
||||||
auto off = std::chrono::seconds(1);
|
::queueGarbageCollection(_workItemMutex, _workItem, _gcfunc);
|
||||||
std::lock_guard<std::mutex> guard(_workItemMutex);
|
|
||||||
_workItem = scheduler->queueDelay(RequestLane::INTERNAL_LOW, off, _gcfunc);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#include <velocypack/Builder.h>
|
#include <velocypack/Builder.h>
|
||||||
#include <velocypack/velocypack-aliases.h>
|
#include <velocypack/velocypack-aliases.h>
|
||||||
|
|
||||||
|
#include "Basics/FunctionUtils.h"
|
||||||
#include "Basics/StringUtils.h"
|
#include "Basics/StringUtils.h"
|
||||||
#include "Basics/tri-strings.h"
|
#include "Basics/tri-strings.h"
|
||||||
#include "Cluster/ServerState.h"
|
#include "Cluster/ServerState.h"
|
||||||
|
@ -296,20 +297,35 @@ std::function<void(bool cancelled)> Task::callbackFunction() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// now do the work:
|
// now do the work:
|
||||||
SchedulerFeature::SCHEDULER->queue(RequestLane::INTERNAL_LOW, [self, this, execContext] {
|
bool queued = basics::function_utils::retryUntilTimeout(
|
||||||
|
[this, self, execContext]() -> bool {
|
||||||
|
return SchedulerFeature::SCHEDULER->queue(RequestLane::INTERNAL_LOW, [self, this, execContext] {
|
||||||
ExecContextScope scope(_user.empty() ? ExecContext::superuser()
|
ExecContextScope scope(_user.empty() ? ExecContext::superuser()
|
||||||
: execContext.get());
|
: execContext.get());
|
||||||
work(execContext.get());
|
work(execContext.get());
|
||||||
|
|
||||||
if (_periodic.load() && !application_features::ApplicationServer::isStopping()) {
|
if (_periodic.load() && !application_features::ApplicationServer::isStopping()) {
|
||||||
// requeue the task
|
// requeue the task
|
||||||
queue(_interval);
|
bool queued = basics::function_utils::retryUntilTimeout(
|
||||||
|
[this]() -> bool { return queue(_interval); }, Logger::FIXME,
|
||||||
|
"queue task");
|
||||||
|
if (!queued) {
|
||||||
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
||||||
|
TRI_ERROR_QUEUE_FULL,
|
||||||
|
"Failed to queue task for 5 minutes, gave up.");
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// in case of one-off tasks or in case of a shutdown, simply
|
// in case of one-off tasks or in case of a shutdown, simply
|
||||||
// remove the task from the list
|
// remove the task from the list
|
||||||
Task::unregisterTask(_id, true);
|
Task::unregisterTask(_id, true);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
},
|
||||||
|
Logger::FIXME, "queue task");
|
||||||
|
if (!queued) {
|
||||||
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
||||||
|
TRI_ERROR_QUEUE_FULL, "Failed to queue task for 5 minutes, gave up.");
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -327,13 +343,21 @@ void Task::start() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// initially queue the task
|
// initially queue the task
|
||||||
queue(_offset);
|
bool queued = basics::function_utils::retryUntilTimeout(
|
||||||
|
[this]() -> bool { return queue(_offset); }, Logger::FIXME, "queue task");
|
||||||
|
if (!queued) {
|
||||||
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
||||||
|
TRI_ERROR_QUEUE_FULL, "Failed to queue task for 5 minutes, gave up.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Task::queue(std::chrono::microseconds offset) {
|
bool Task::queue(std::chrono::microseconds offset) {
|
||||||
MUTEX_LOCKER(lock, _taskHandleMutex);
|
MUTEX_LOCKER(lock, _taskHandleMutex);
|
||||||
_taskHandle = SchedulerFeature::SCHEDULER->queueDelay(RequestLane::INTERNAL_LOW,
|
bool queued = false;
|
||||||
offset, callbackFunction());
|
std::tie(queued, _taskHandle) =
|
||||||
|
SchedulerFeature::SCHEDULER->queueDelay(RequestLane::INTERNAL_LOW, offset,
|
||||||
|
callbackFunction());
|
||||||
|
return queued;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Task::cancel() {
|
void Task::cancel() {
|
||||||
|
|
|
@ -82,7 +82,7 @@ class Task : public std::enable_shared_from_this<Task> {
|
||||||
private:
|
private:
|
||||||
void toVelocyPack(velocypack::Builder&) const;
|
void toVelocyPack(velocypack::Builder&) const;
|
||||||
void work(ExecContext const*);
|
void work(ExecContext const*);
|
||||||
void queue(std::chrono::microseconds offset);
|
bool queue(std::chrono::microseconds offset) ADB_WARN_UNUSED_RESULT;
|
||||||
std::function<void(bool cancelled)> callbackFunction();
|
std::function<void(bool cancelled)> callbackFunction();
|
||||||
std::string const& name() const { return _name; }
|
std::string const& name() const { return _name; }
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// DISCLAIMER
|
||||||
|
///
|
||||||
|
/// Copyright 2014-2019 ArangoDB GmbH, Cologne, Germany
|
||||||
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
||||||
|
///
|
||||||
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
/// you may not use this file except in compliance with the License.
|
||||||
|
/// You may obtain a copy of the License at
|
||||||
|
///
|
||||||
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
///
|
||||||
|
/// Unless required by applicable law or agreed to in writing, software
|
||||||
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
/// See the License for the specific language governing permissions and
|
||||||
|
/// limitations under the License.
|
||||||
|
///
|
||||||
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||||
|
///
|
||||||
|
/// @author Dan Larkin-York
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#include "FunctionUtils.h"
|
||||||
|
|
||||||
|
namespace arangodb {
|
||||||
|
namespace basics {
|
||||||
|
namespace function_utils {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Execute a lambda, retrying periodically until it succeeds or times out
|
||||||
|
* @param fn Lambda to run
|
||||||
|
* @param topic Log topic for any messages
|
||||||
|
* @param message Description for log messages (format below)
|
||||||
|
* @param retryInterval Period to wait between attempts
|
||||||
|
* @param timeout Total time to wait before timing out and returning
|
||||||
|
*
|
||||||
|
* If a given attempt fails, a log message will be made in the following form:
|
||||||
|
* "Failed to " + message + ", waiting to retry..."
|
||||||
|
*/
|
||||||
|
bool retryUntilTimeout(std::function<bool()> fn, LogTopic& topic,
|
||||||
|
std::string const& message, std::chrono::nanoseconds retryInterval,
|
||||||
|
std::chrono::nanoseconds timeout) {
|
||||||
|
auto start = std::chrono::steady_clock::now();
|
||||||
|
bool success = false;
|
||||||
|
while ((std::chrono::steady_clock::now() - start) < timeout) {
|
||||||
|
success = fn();
|
||||||
|
if (success) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
LOG_TOPIC("18d0b", INFO, topic) << "Failed to " << message << ", waiting to retry...";
|
||||||
|
std::this_thread::sleep_for(retryInterval);
|
||||||
|
}
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace function_utils
|
||||||
|
} // namespace basics
|
||||||
|
} // namespace arangodb
|
|
@ -0,0 +1,88 @@
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// DISCLAIMER
|
||||||
|
///
|
||||||
|
/// Copyright 2014-2019 ArangoDB GmbH, Cologne, Germany
|
||||||
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
||||||
|
///
|
||||||
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
/// you may not use this file except in compliance with the License.
|
||||||
|
/// You may obtain a copy of the License at
|
||||||
|
///
|
||||||
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
///
|
||||||
|
/// Unless required by applicable law or agreed to in writing, software
|
||||||
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
/// See the License for the specific language governing permissions and
|
||||||
|
/// limitations under the License.
|
||||||
|
///
|
||||||
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||||
|
///
|
||||||
|
/// @author Dan Larkin-York
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#ifndef ARANGODB_BASICS_FUNCTION_UTILS_H
|
||||||
|
#define ARANGODB_BASICS_FUNCTION_UTILS_H 1
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <chrono>
|
||||||
|
#include <functional>
|
||||||
|
#include <thread>
|
||||||
|
#include <tuple>
|
||||||
|
|
||||||
|
#include "Logger/LogMacros.h"
|
||||||
|
|
||||||
|
namespace arangodb {
|
||||||
|
namespace basics {
|
||||||
|
namespace function_utils {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Execute a lambda, retrying periodically until it succeeds or times out
|
||||||
|
* @param fn Lambda to run
|
||||||
|
* @param topic Log topic for any messages
|
||||||
|
* @param message Description for log messages (format below)
|
||||||
|
* @param retryInterval Period to wait between attempts
|
||||||
|
* @param timeout Total time to wait before timing out and returning
|
||||||
|
*
|
||||||
|
* If a given attempt fails, a log message will be made in the following form:
|
||||||
|
* "Failed to " + message + ", waiting to retry..."
|
||||||
|
*/
|
||||||
|
template <typename R>
|
||||||
|
std::pair<bool, R> retryUntilTimeout(
|
||||||
|
std::function<std::pair<bool, R>()> fn, LogTopic& topic, std::string const& message,
|
||||||
|
std::chrono::nanoseconds retryInterval = std::chrono::seconds(1),
|
||||||
|
std::chrono::nanoseconds timeout = std::chrono::minutes(5)) {
|
||||||
|
auto start = std::chrono::steady_clock::now();
|
||||||
|
bool success = false;
|
||||||
|
R value{};
|
||||||
|
while ((std::chrono::steady_clock::now() - start) < timeout) {
|
||||||
|
std::tie(success, value) = fn();
|
||||||
|
if (success) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
LOG_TOPIC("18d0a", INFO, topic) << "Failed to " << message << ", waiting to retry...";
|
||||||
|
std::this_thread::sleep_for(retryInterval);
|
||||||
|
}
|
||||||
|
return std::make_pair(success, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Execute a lambda, retrying periodically until it succeeds or times out
|
||||||
|
* @param fn Lambda to run
|
||||||
|
* @param topic Log topic for any messages
|
||||||
|
* @param message Description for log messages (format below)
|
||||||
|
* @param retryInterval Period to wait between attempts
|
||||||
|
* @param timeout Total time to wait before timing out and returning
|
||||||
|
*
|
||||||
|
* If a given attempt fails, a log message will be made in the following form:
|
||||||
|
* "Failed to " + message + ", waiting to retry..."
|
||||||
|
*/
|
||||||
|
bool retryUntilTimeout(std::function<bool()> fn, LogTopic& topic, std::string const& message,
|
||||||
|
std::chrono::nanoseconds retryInterval = std::chrono::seconds(1),
|
||||||
|
std::chrono::nanoseconds timeout = std::chrono::minutes(5));
|
||||||
|
|
||||||
|
} // namespace function_utils
|
||||||
|
} // namespace basics
|
||||||
|
} // namespace arangodb
|
||||||
|
|
||||||
|
#endif
|
|
@ -155,6 +155,7 @@ add_library(${LIB_ARANGO} STATIC
|
||||||
Basics/DataProtector.cpp
|
Basics/DataProtector.cpp
|
||||||
Basics/Exceptions.cpp
|
Basics/Exceptions.cpp
|
||||||
Basics/FileUtils.cpp
|
Basics/FileUtils.cpp
|
||||||
|
Basics/FunctionUtils.cpp
|
||||||
Basics/HybridLogicalClock.cpp
|
Basics/HybridLogicalClock.cpp
|
||||||
Basics/LdapUrlParser.cpp
|
Basics/LdapUrlParser.cpp
|
||||||
Basics/LocalTaskQueue.cpp
|
Basics/LocalTaskQueue.cpp
|
||||||
|
|
Loading…
Reference in New Issue