1
0
Fork 0

Bugfix: More 3.4 scheduler changes backported (#7091)

This commit is contained in:
Matthew Von-Maszewski 2018-10-26 11:09:20 -04:00 committed by Jan
parent 7113e779fb
commit 97ba8ca2be
29 changed files with 509 additions and 458 deletions

View File

@ -105,7 +105,7 @@ void CacheManagerFeature::start() {
auto scheduler = SchedulerFeature::SCHEDULER;
auto postFn = [scheduler](std::function<void()> fn) -> bool {
scheduler->post(fn, false);
scheduler->queue(RequestPriority::LOW, fn);
return true;
};
_manager.reset(new Manager(postFn, _cacheSize));

View File

@ -230,7 +230,7 @@ char const* ClusterCommResult::stringifyStatus(ClusterCommOpStatus status) {
////////////////////////////////////////////////////////////////////////////////
ClusterComm::ClusterComm()
: _backgroundThread(nullptr),
: _roundRobin(0),
_logConnectionErrors(false),
_authenticationEnabled(false),
_jwtAuthorization("") {
@ -243,18 +243,15 @@ ClusterComm::ClusterComm()
_jwtAuthorization = "bearer " + token;
}
_communicator = std::make_shared<communicator::Communicator>();
}
/// @brief Unit test constructor
ClusterComm::ClusterComm(bool ignored)
: _backgroundThread(nullptr),
: _roundRobin(0),
_logConnectionErrors(false),
_authenticationEnabled(false),
_jwtAuthorization("") {
//_communicator = std::make_shared<communicator::Communicator>();
} // ClusterComm::ClusterComm(bool)
////////////////////////////////////////////////////////////////////////////////
@ -262,12 +259,7 @@ ClusterComm::ClusterComm(bool ignored)
////////////////////////////////////////////////////////////////////////////////
ClusterComm::~ClusterComm() {
if (_backgroundThread != nullptr) {
_backgroundThread->beginShutdown();
delete _backgroundThread;
_backgroundThread = nullptr;
}
stopBackgroundThreads();
cleanupAllQueues();
}
@ -315,7 +307,7 @@ std::shared_ptr<ClusterComm> ClusterComm::instance() {
void ClusterComm::initialize() {
auto i = instance(); // this will create the static instance
i->startBackgroundThread();
i->startBackgroundThreads();
}
////////////////////////////////////////////////////////////////////////////////
@ -323,22 +315,51 @@ void ClusterComm::initialize() {
////////////////////////////////////////////////////////////////////////////////
void ClusterComm::cleanup() {
if (!_theInstance) {
return ;
}
_theInstance.reset(); // no more operations will be started, but running
// ones have their copy of the shared_ptr
}
////////////////////////////////////////////////////////////////////////////////
/// @brief start the communication background thread
/// @brief start the communication background threads
////////////////////////////////////////////////////////////////////////////////
void ClusterComm::startBackgroundThread() {
_backgroundThread = new ClusterCommThread();
void ClusterComm::startBackgroundThreads() {
if (!_backgroundThread->start()) {
LOG_TOPIC(FATAL, Logger::CLUSTER)
<< "ClusterComm background thread does not work";
FATAL_ERROR_EXIT();
for(unsigned loop=0; loop<(TRI_numberProcessors()/8+1); ++loop) {
ClusterCommThread * thread = new ClusterCommThread();
if (thread->start()) {
_backgroundThreads.push_back(thread);
} else {
LOG_TOPIC(FATAL, Logger::CLUSTER)
<< "ClusterComm background thread does not work";
FATAL_ERROR_EXIT();
} // else
} // for
}
void ClusterComm::stopBackgroundThreads() {
for (ClusterCommThread * thread: _backgroundThreads) {
thread->beginShutdown();
delete thread;
}
_backgroundThreads.clear();
}
////////////////////////////////////////////////////////////////////////////////
/// @brief choose next communicator via round robin
////////////////////////////////////////////////////////////////////////////////
std::shared_ptr<communicator::Communicator> ClusterComm::communicator() {
unsigned index;
index = (++_roundRobin) % _backgroundThreads.size();
return _backgroundThreads[index]->communicator();
}
////////////////////////////////////////////////////////////////////////////////
@ -386,7 +407,13 @@ OperationID ClusterComm::getOperationID() { return TRI_NewTickServer(); }
///
/// There are two timeout arguments. `timeout` is the globale timeout
/// specifying after how many seconds the complete operation must be
/// completed. `connectTimeout` is the Timeout for initially opening a connection
/// completed. `initTimeout` is a second timeout, which is used to
/// limit the time to send the initial request away. If `initTimeout`
/// is negative (as for example in the default value), then `initTimeout`
/// is taken to be the same as `timeout`. The idea behind the two timeouts
/// is to be able to specify correct behaviour for automatic failover.
/// The idea is that if the initial request cannot be sent within
/// `initTimeout`, one can retry after a potential failover.
////////////////////////////////////////////////////////////////////////////////
OperationID ClusterComm::asyncRequest(
@ -395,7 +422,7 @@ OperationID ClusterComm::asyncRequest(
std::shared_ptr<std::string const> body,
std::unordered_map<std::string, std::string> const& headerFields,
std::shared_ptr<ClusterCommCallback> callback, ClusterCommTimeout timeout,
bool singleRequest, ClusterCommTimeout connectTimeout) {
bool singleRequest, ClusterCommTimeout initTimeout) {
auto prepared = prepareRequest(destination, reqtype, body.get(), headerFields);
std::shared_ptr<ClusterCommResult> result(prepared.first);
result->coordTransactionID = coordTransactionID;
@ -410,21 +437,22 @@ OperationID ClusterComm::asyncRequest(
}
communicator::Options opt;
opt.connectionTimeout = connectTimeout;
opt.connectionTimeout = initTimeout;
opt.requestTimeout = timeout;
Callbacks callbacks;
bool doLogConnectionErrors = logConnectionErrors();
callbacks._scheduleMe = scheduleMe;
if (callback) {
callbacks._onError = [callback, result, doLogConnectionErrors, this, connectTimeout](int errorCode, std::unique_ptr<GeneralResponse> response) {
callbacks._onError = [callback, result, doLogConnectionErrors, this, initTimeout](int errorCode, std::unique_ptr<GeneralResponse> response) {
{
CONDITION_LOCKER(locker, somethingReceived);
responses.erase(result->operationID);
}
result->fromError(errorCode, std::move(response));
if (result->status == CL_COMM_BACKEND_UNAVAILABLE) {
logConnectionError(doLogConnectionErrors, result.get(), connectTimeout, __LINE__);
logConnectionError(doLogConnectionErrors, result.get(), initTimeout, __LINE__);
}
/*bool ret =*/ ((*callback.get())(result.get()));
// TRI_ASSERT(ret == true);
@ -440,11 +468,11 @@ OperationID ClusterComm::asyncRequest(
//TRI_ASSERT(ret == true);
};
} else {
callbacks._onError = [result, doLogConnectionErrors, this, connectTimeout](int errorCode, std::unique_ptr<GeneralResponse> response) {
callbacks._onError = [result, doLogConnectionErrors, this, initTimeout](int errorCode, std::unique_ptr<GeneralResponse> response) {
CONDITION_LOCKER(locker, somethingReceived);
result->fromError(errorCode, std::move(response));
if (result->status == CL_COMM_BACKEND_UNAVAILABLE) {
logConnectionError(doLogConnectionErrors, result.get(), connectTimeout, __LINE__);
logConnectionError(doLogConnectionErrors, result.get(), initTimeout, __LINE__);
}
somethingReceived.broadcast();
};
@ -458,7 +486,7 @@ OperationID ClusterComm::asyncRequest(
TRI_ASSERT(request != nullptr);
CONDITION_LOCKER(locker, somethingReceived);
auto ticketId = _communicator->addRequest(createCommunicatorDestination(result->endpoint, path),
auto ticketId = communicator()->addRequest(createCommunicatorDestination(result->endpoint, path),
std::move(request), callbacks, opt);
result->operationID = ticketId;
@ -521,13 +549,14 @@ std::unique_ptr<ClusterCommResult> ClusterComm::syncRequest(
wasSignaled = true;
cv.signal();
});
callbacks._scheduleMe = scheduleMe;
communicator::Options opt;
opt.requestTimeout = timeout;
TRI_ASSERT(request != nullptr);
result->status = CL_COMM_SENDING;
CONDITION_LOCKER(isen, cv);
_communicator->addRequest(createCommunicatorDestination(result->endpoint, path),
communicator()->addRequest(createCommunicatorDestination(result->endpoint, path),
std::move(request), callbacks, opt);
while (!wasSignaled) {
@ -776,6 +805,7 @@ void ClusterComm::cleanupAllQueues() {
ClusterCommThread::ClusterCommThread() : Thread("ClusterComm"), _cc(nullptr) {
_cc = ClusterComm::instance().get();
_communicator = std::make_shared<communicator::Communicator>();
}
ClusterCommThread::~ClusterCommThread() { shutdown(); }
@ -815,7 +845,7 @@ size_t ClusterComm::performRequests(std::vector<ClusterCommRequest>& requests,
ClusterCommTimeout timeout, size_t& nrDone,
arangodb::LogTopic const& logTopic,
bool retryOnCollNotFound) {
if (requests.empty()) {
if (requests.size() == 0) {
nrDone = 0;
return 0;
}
@ -1293,8 +1323,14 @@ std::vector<communicator::Ticket> ClusterComm::activeServerTickets(std::vector<s
}
void ClusterComm::disable() {
_communicator->disable();
_communicator->abortRequests();
for (ClusterCommThread * thread: _backgroundThreads) {
thread->communicator()->disable();
thread->communicator()->abortRequests();
}
}
void ClusterComm::scheduleMe(std::function<void()> task) {
arangodb::SchedulerFeature::SCHEDULER->queue(RequestPriority::HIGH, task);
}
////////////////////////////////////////////////////////////////////////////////
@ -1307,14 +1343,14 @@ void ClusterCommThread::abortRequestsToFailedServers() {
if (failedServers.size() > 0) {
auto ticketIds = _cc->activeServerTickets(failedServers);
for (auto const& ticketId: ticketIds) {
_cc->communicator()->abortRequest(ticketId);
_communicator->abortRequest(ticketId);
}
}
}
void ClusterCommThread::run() {
TRI_ASSERT(_communicator != nullptr);
LOG_TOPIC(DEBUG, Logger::CLUSTER) << "starting ClusterComm thread";
auto lastAbortCheck = std::chrono::steady_clock::now();
while (!isStopping()) {
try {
@ -1322,8 +1358,8 @@ void ClusterCommThread::run() {
abortRequestsToFailedServers();
lastAbortCheck = std::chrono::steady_clock::now();
}
_cc->communicator()->work_once();
_cc->communicator()->wait();
_communicator->work_once();
_communicator->wait();
LOG_TOPIC(TRACE, Logger::CLUSTER) << "done waiting in ClusterCommThread";
} catch (std::exception const& ex) {
LOG_TOPIC(ERR, arangodb::Logger::CLUSTER)
@ -1333,15 +1369,15 @@ void ClusterCommThread::run() {
<< "caught unknown exception in ClusterCommThread";
}
}
_cc->communicator()->abortRequests();
_communicator->abortRequests();
LOG_TOPIC(DEBUG, Logger::CLUSTER) << "waiting for curl to stop remaining handles";
while (_cc->communicator()->work_once() > 0) {
while (_communicator->work_once() > 0) {
std::this_thread::sleep_for(std::chrono::microseconds(10));
}
LOG_TOPIC(DEBUG, Logger::CLUSTER) << "stopped ClusterComm thread";
}
/// @brief logs a connection error (backend unavailable)
void ClusterComm::logConnectionError(bool useErrorLogLevel, ClusterCommResult const* result, double timeout, int /*line*/) {
std::string msg = "cannot create connection to server";
@ -1349,7 +1385,7 @@ void ClusterComm::logConnectionError(bool useErrorLogLevel, ClusterCommResult co
msg += ": '" + result->serverID + '\'';
}
msg += " at endpoint " + result->endpoint + "', timeout: " + std::to_string(timeout);
if (useErrorLogLevel) {
LOG_TOPIC(ERR, Logger::CLUSTER) << msg;
} else {

View File

@ -24,6 +24,9 @@
#ifndef ARANGOD_CLUSTER_CLUSTER_COMM_H
#define ARANGOD_CLUSTER_CLUSTER_COMM_H 1
#include <atomic>
#include <vector>
#include "Basics/Common.h"
#include "Agency/AgencyComm.h"
@ -396,21 +399,21 @@ struct ClusterCommRequest {
}
return *headerFields;
}
void setHeaders(
std::unique_ptr<std::unordered_map<std::string, std::string>> headers) {
headerFields = std::move(headers);
}
/// @brief "safe" accessor for body
/// @brief "safe" accessor for body
std::string const& getBody() const {
if (body == nullptr) {
return noBody;
}
return *body;
}
/// @brief "safe" accessor for body
/// @brief "safe" accessor for body
std::shared_ptr<std::string const> getBodyShared() const {
if (body == nullptr) {
return sharedNoBody;
@ -486,7 +489,8 @@ class ClusterComm {
/// @brief start the communication background thread
//////////////////////////////////////////////////////////////////////////////
void startBackgroundThread();
void startBackgroundThreads();
void stopBackgroundThreads();
//////////////////////////////////////////////////////////////////////////////
/// @brief submit an HTTP request to a shard asynchronously.
@ -498,7 +502,7 @@ class ClusterComm {
std::string const& path, std::shared_ptr<std::string const> body,
std::unordered_map<std::string, std::string> const& headerFields,
std::shared_ptr<ClusterCommCallback> callback, ClusterCommTimeout timeout,
bool singleRequest = false, ClusterCommTimeout connectTimeout = -1.0);
bool singleRequest = false, ClusterCommTimeout initTimeout = -1.0);
//////////////////////////////////////////////////////////////////////////////
/// @brief submit a single HTTP request to a shard synchronously.
@ -563,7 +567,7 @@ class ClusterComm {
////////////////////////////////////////////////////////////////////////////////
/// @brief this method performs the given requests described by the vector
/// of ClusterCommRequest structs in the following way:
/// of ClusterCommRequest structs in the following way:
/// Each request is done with asyncRequest.
/// After each request is successfully send out we drop all requests.
/// Hence it is guaranteed that all requests are send, but
@ -574,14 +578,10 @@ class ClusterComm {
/// instead.
////////////////////////////////////////////////////////////////////////////////
void fireAndForgetRequests(std::vector<ClusterCommRequest> const& requests);
typedef std::function<void(std::vector<ClusterCommRequest> const&, size_t, size_t)> AsyncCallback;
void performAsyncRequests(std::vector<ClusterCommRequest>&&, ClusterCommTimeout timeout,
bool retryOnCollNotFound, AsyncCallback const&);
std::shared_ptr<communicator::Communicator> communicator() {
return _communicator;
}
void addAuthorization(std::unordered_map<std::string, std::string>* headers);
@ -591,6 +591,13 @@ class ClusterComm {
void disable();
//////////////////////////////////////////////////////////////////////////////
/// @brief push all libcurl callback work to Scheduler threads. It is a
/// public static function that any object can use.
//////////////////////////////////////////////////////////////////////////////
static void scheduleMe(std::function<void()> task);
protected: // protected members are for unit test purposes
/// @brief Constructor for test cases.
@ -697,7 +704,10 @@ class ClusterComm {
/// @brief our background communications thread
//////////////////////////////////////////////////////////////////////////////
ClusterCommThread* _backgroundThread;
std::atomic_uint _roundRobin;
std::vector<ClusterCommThread*> _backgroundThreads;
std::shared_ptr<communicator::Communicator> communicator();
//////////////////////////////////////////////////////////////////////////////
/// @brief whether or not connection errors should be logged as errors
@ -705,7 +715,6 @@ class ClusterComm {
bool _logConnectionErrors;
std::shared_ptr<communicator::Communicator> _communicator;
bool _authenticationEnabled;
std::string _jwtAuthorization;
@ -728,11 +737,16 @@ class ClusterCommThread : public Thread {
void beginShutdown() override;
bool isSystem() override final { return true; }
std::shared_ptr<communicator::Communicator> communicator() {
return _communicator;
}
private:
void abortRequestsToFailedServers();
protected:
void run() override final;
std::shared_ptr<communicator::Communicator> _communicator;
private:
ClusterComm* _cc;

View File

@ -125,7 +125,7 @@ bool resolveRequestContext(GeneralRequest& req) {
if (!guard) {
return false;
}
// the vocbase context is now responsible for releasing the vocbase
req.setRequestContext(guard.get(), true);
guard.release();
@ -439,10 +439,10 @@ void GeneralCommTask::addErrorResponse(rest::ResponseCode code,
// thread. Depending on the number of running threads requests may be queued
// and scheduled later when the number of used threads decreases
bool GeneralCommTask::handleRequestSync(std::shared_ptr<RestHandler> handler) {
auto const lane = handler->lane();
auto const prio = handler->priority();
auto self = shared_from_this();
bool ok = SchedulerFeature::SCHEDULER->queue(PriorityRequestLane(lane), [self, this, handler]() {
bool ok = SchedulerFeature::SCHEDULER->queue(prio, [self, this, handler]() {
handleRequestDirectly(basics::ConditionalLocking::DoLock,
std::move(handler));
});
@ -487,7 +487,7 @@ bool GeneralCommTask::handleRequestAsync(std::shared_ptr<RestHandler> handler,
// callback will persist the response with the AsyncJobManager
return SchedulerFeature::SCHEDULER->queue(
PriorityRequestLane(handler->lane()), [self, handler] {
handler->priority(), [self, handler] {
handler->runHandler([](RestHandler* h) {
GeneralServerFeature::JOB_MANAGER->finishAsyncJob(h);
});
@ -495,7 +495,7 @@ bool GeneralCommTask::handleRequestAsync(std::shared_ptr<RestHandler> handler,
} else {
// here the response will just be ignored
return SchedulerFeature::SCHEDULER->queue(
PriorityRequestLane(handler->lane()),
handler->priority(),
[self, handler] { handler->runHandler([](RestHandler*) {}); });
}
}

View File

@ -26,6 +26,8 @@
#include "Basics/Common.h"
namespace arangodb {
class GeneralRequest;
enum class RequestLane {
// For requests that do not block or wait for something.
// This ignores blocks that can occur when delivering
@ -37,7 +39,7 @@ enum class RequestLane {
// that do AQL requests, user administrator that
// internally uses AQL.
CLIENT_AQL,
// For requests that are executed within an V8 context,
// but not for requests that might use a V8 context for
// user defined functions.
@ -50,29 +52,29 @@ enum class RequestLane {
// For requests between agents. These are basically the
// requests used to implement RAFT.
AGENCY_INTERNAL,
// For requests from the DBserver or Coordinator accessing
// the agency.
AGENCY_CLUSTER,
// For requests from the DBserver to the Coordinator or
// from the Coordinator to the DBserver. But not using
// V8 or having high priority.
CLUSTER_INTERNAL,
// For requests from the from the Coordinator to the
// DBserver using V8.
CLUSTER_V8,
// For requests from the DBserver to the Coordinator or
// from the Coordinator to the DBserver for administration
// or diagnostic purpose. Should not block.
CLUSTER_ADMIN,
// For requests used between leader and follower for
// replication.
SERVER_REPLICATION,
// For periodic or one-off V8-based tasks executed by the
// Scheduler.
TASK_V8
@ -84,35 +86,8 @@ enum class RequestLane {
// AGENCY_CALLBACK`
};
enum class RequestPriority { HIGH, LOW, V8 };
enum class RequestPriority { HIGH, MED, LOW };
inline RequestPriority PriorityRequestLane(RequestLane lane) {
switch (lane) {
case RequestLane::CLIENT_FAST:
return RequestPriority::HIGH;
case RequestLane::CLIENT_AQL:
return RequestPriority::LOW;
case RequestLane::CLIENT_V8:
return RequestPriority::V8;
case RequestLane::CLIENT_SLOW:
return RequestPriority::LOW;
case RequestLane::AGENCY_INTERNAL:
return RequestPriority::HIGH;
case RequestLane::AGENCY_CLUSTER:
return RequestPriority::LOW;
case RequestLane::CLUSTER_INTERNAL:
return RequestPriority::HIGH;
case RequestLane::CLUSTER_V8:
return RequestPriority::V8;
case RequestLane::CLUSTER_ADMIN:
return RequestPriority::LOW;
case RequestLane::SERVER_REPLICATION:
return RequestPriority::LOW;
case RequestLane::TASK_V8:
return RequestPriority::V8;
}
return RequestPriority::LOW;
}
}
#endif

View File

@ -318,6 +318,42 @@ void RestHandler::runHandlerStateMachine() {
}
}
RequestPriority RestHandler::priority(RequestLane l) const {
RequestPriority p = RequestPriority::LOW;
switch (l) {
case RequestLane::AGENCY_INTERNAL:
case RequestLane::CLIENT_FAST:
case RequestLane::CLUSTER_INTERNAL:
p = RequestPriority::HIGH;
break;
case RequestLane::CLIENT_AQL:
case RequestLane::CLIENT_SLOW:
case RequestLane::AGENCY_CLUSTER:
case RequestLane::CLUSTER_ADMIN:
case RequestLane::SERVER_REPLICATION:
case RequestLane::CLIENT_V8:
case RequestLane::CLUSTER_V8:
case RequestLane::TASK_V8:
p = RequestPriority::LOW;
break;
}
if (p == RequestPriority::HIGH) {
return p;
}
bool found;
_request->header(StaticStrings::XArangoFrontend, found);
if (!found) {
return p;
}
return RequestPriority::MED;
}
// -----------------------------------------------------------------------------
// --SECTION-- private methods
// -----------------------------------------------------------------------------

View File

@ -83,6 +83,14 @@ class RestHandler : public std::enable_shared_from_this<RestHandler> {
/// @brief forwards the request to the appropriate server
bool forwardRequest();
// The priority is derived from the lane.
// Header fields might influence the priority.
// In order to change the priority of a handler
// adjust the lane, do not overwrite the priority
// function!
RequestPriority priority(RequestLane) const;
RequestPriority priority() const {return priority(lane());}
public:
// rest handler name for debugging and logging
virtual char const* name() const = 0;

View File

@ -1640,7 +1640,7 @@ int MMFilesCollection::fillIndexes(
" }, indexes: " + std::to_string(n - 1));
auto poster = [](std::function<void()> fn) -> void {
SchedulerFeature::SCHEDULER->post(fn, false);
SchedulerFeature::SCHEDULER->queue(RequestPriority::LOW, fn);
};
auto queue = std::make_shared<arangodb::basics::LocalTaskQueue>(poster);

View File

@ -312,7 +312,7 @@ VPackBuilder Conductor::finishedWorkerStep(VPackSlice const& data) {
rest::Scheduler* scheduler = SchedulerFeature::SCHEDULER;
// don't block the response for workers waiting on this callback
// this should allow workers to go into the IDLE state
scheduler->post([this] {
scheduler->queue(RequestPriority::LOW, [this] {
MUTEX_LOCKER(guard, _callbackMutex);
if (_state == ExecutionState::RUNNING) {
@ -325,7 +325,7 @@ VPackBuilder Conductor::finishedWorkerStep(VPackSlice const& data) {
LOG_TOPIC(WARN, Logger::PREGEL)
<< "No further action taken after receiving all responses";
}
}, false);
});
return VPackBuilder();
}
@ -716,7 +716,7 @@ void Conductor::collectAQLResults(VPackBuilder& outBuilder) {
b.openObject();
b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
b.close();
// merge results from DBServers
outBuilder.openArray();
int res = _sendToAllDBServers(Utils::aqlResultsPath, b,
@ -770,13 +770,13 @@ int Conductor::_sendToAllDBServers(std::string const& path,
} else {
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
rest::Scheduler* scheduler = SchedulerFeature::SCHEDULER;
scheduler->post([this, path, message] {
scheduler->queue(RequestPriority::LOW, [this, path, message] {
VPackBuilder response;
PregelFeature::handleWorkerRequest(
_vocbaseGuard.database(), path, message.slice(), response
);
}, false);
});
}
return TRI_ERROR_NO_ERROR;
}

View File

@ -97,10 +97,10 @@ std::map<CollectionID, std::vector<VertexShardInfo>>
}
std::map<CollectionID, std::vector<VertexShardInfo>> result;
LOG_TOPIC(DEBUG, Logger::PREGEL) << "Allocating memory";
uint64_t totalMemory = TRI_totalSystemMemory();
// Contains the shards located on this db server in the right order
// assuming edges are sharded after _from, vertices after _key
// then every ith vertex shard has the corresponding edges in
@ -110,7 +110,7 @@ std::map<CollectionID, std::vector<VertexShardInfo>>
std::map<CollectionID, std::vector<ShardID>> const& edgeCollMap =
_config->edgeCollectionShards();
size_t numShards = SIZE_MAX;
// Allocating some memory
uint64_t vCount = 0;
uint64_t eCount = 0;
@ -121,16 +121,16 @@ std::map<CollectionID, std::vector<VertexShardInfo>>
} else if (numShards != vertexShards.size()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, shardError);
}
for (size_t i = 0; i < vertexShards.size(); i++) {
VertexShardInfo info;
info.vertexShard = vertexShards[i];
info.trx = _createTransaction();
TRI_voc_cid_t cid = info.trx->addCollectionAtRuntime(info.vertexShard);
info.trx->pinData(cid); // will throw when it fails
OperationResult opResult = info.trx->count(info.vertexShard,
transaction::CountType::Normal);
if (opResult.fail() || _destroyed) {
@ -141,7 +141,7 @@ std::map<CollectionID, std::vector<VertexShardInfo>>
if (info.numVertices == 0) {
continue;
}
// distributeshardslike should cause the edges for a vertex to be
// in the same shard index. x in vertexShard2 => E(x) in edgeShard2
for (auto const& pair2 : edgeCollMap) {
@ -149,13 +149,13 @@ std::map<CollectionID, std::vector<VertexShardInfo>>
if (vertexShards.size() != edgeShards.size()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, shardError);
}
ShardID const& eShard = edgeShards[i];
info.edgeShards.push_back(eShard);
cid = info.trx->addCollectionAtRuntime(eShard);
info.trx->pinData(cid); // will throw when it fails
OperationResult opResult = info.trx->count(eShard, transaction::CountType::Normal);
if (opResult.fail() || _destroyed) {
THROW_ARANGO_EXCEPTION(TRI_ERROR_BAD_PARAMETER);
@ -163,12 +163,12 @@ std::map<CollectionID, std::vector<VertexShardInfo>>
info.numEdges += opResult.slice().getUInt();
}
eCount += info.numEdges;
result[pair.first].push_back(std::move(info));
}
}
_index.resize(vCount);
size_t requiredMem = vCount * _graphFormat->estimatedVertexSize() +
eCount * _graphFormat->estimatedEdgeSize();
@ -183,7 +183,7 @@ std::map<CollectionID, std::vector<VertexShardInfo>>
}
_edges = new VectorTypedBuffer<Edge<E>>(eCount);
}
return result;
}
@ -198,14 +198,14 @@ void GraphStore<V, E>::loadShards(WorkerConfig* config,
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
rest::Scheduler* scheduler = SchedulerFeature::SCHEDULER;
scheduler->post([this, scheduler, callback] {
scheduler->queue(RequestPriority::LOW, [this, scheduler, callback] {
// hold the current position where the ith vertex shard can
// start to write its data. At the end the offset should equal the
// sum of the counts of all ith edge shards
auto collectionShards = _allocateSpace();
uint64_t vertexOff = 0;
std::vector<size_t> edgeDataOffsets; // will contain # off edges in ith shard
for (auto& collection : collectionShards) {
@ -219,12 +219,12 @@ void GraphStore<V, E>::loadShards(WorkerConfig* config,
edgeDataOffsets[++shardIdx] += info.numEdges;
}
}
for (auto& collection : collectionShards) {
size_t shardIdx = 0;
for (VertexShardInfo& info : collection.second) {
try {
// we might have already loaded these shards
if (_loadedShards.find(info.vertexShard) != _loadedShards.end()) {
@ -235,30 +235,30 @@ void GraphStore<V, E>::loadShards(WorkerConfig* config,
TRI_ASSERT(info.numVertices > 0);
TRI_ASSERT(vertexOff < _index.size());
TRI_ASSERT(info.numEdges == 0 || edgeDataOffsets[shardIdx] < _edges->size());
scheduler->post([this, &info, &edgeDataOffsets, vertexOff, shardIdx] {
scheduler->queue(RequestPriority::LOW, [this, &info, &edgeDataOffsets, vertexOff, shardIdx] {
TRI_DEFER(_runningThreads--);// exception safe
_loadVertices(*info.trx, info.vertexShard, info.edgeShards,
vertexOff, edgeDataOffsets[shardIdx]);
}, false);
});
// update to next offset
vertexOff += info.numVertices;
} catch(...) {
LOG_TOPIC(WARN, Logger::PREGEL) << "unhandled exception while "
<<"loading pregel graph";
}
shardIdx++;
}
// we can only load one vertex collection at a time
while (_runningThreads > 0) {
std::this_thread::sleep_for(std::chrono::microseconds(5000));
}
}
scheduler->post(callback, false);
}, false);
scheduler->queue(RequestPriority::LOW, callback);
});
}
template <typename V, typename E>
@ -440,7 +440,7 @@ void GraphStore<V, E>::_loadVertices(transaction::Methods& trx,
break;
}
}
// Add all new vertices
_localVerticeCount += (vertexOffset - originalVertexOffset);
@ -486,7 +486,7 @@ void GraphStore<V, E>::_loadEdges(transaction::Methods& trx,
// lazy loading always uses vector backed storage
((VectorTypedBuffer<Edge<E>>*)_edges)->appendEmptyElement();
}
std::string toValue = slice.get(StaticStrings::ToString).copyString();
std::size_t pos = toValue.find('/');
std::string collectionName = toValue.substr(0, pos);
@ -632,7 +632,7 @@ void GraphStore<V, E>::storeResults(WorkerConfig* config,
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
do {
_runningThreads++;
SchedulerFeature::SCHEDULER->post([this, start, end, now, cb] {
SchedulerFeature::SCHEDULER->queue(RequestPriority::LOW, [this, start, end, now, cb] {
try {
RangeIterator<VertexEntry> it = vertexIterator(start, end);
_storeVertices(_config->globalShardIDs(), it);
@ -646,7 +646,7 @@ void GraphStore<V, E>::storeResults(WorkerConfig* config,
<< (TRI_microtime() - now) << "s";
cb();
}
}, false);
});
start = end;
end = end + delta;
if (total < end + delta) { // swallow the rest
@ -667,4 +667,3 @@ template class arangodb::pregel::GraphStore<HITSValue, int8_t>;
template class arangodb::pregel::GraphStore<DMIDValue, float>;
template class arangodb::pregel::GraphStore<LPValue, int8_t>;
template class arangodb::pregel::GraphStore<SLPAValue, int8_t>;

View File

@ -287,14 +287,14 @@ void PregelFeature::cleanupWorker(uint64_t executionNumber) {
// unmapping etc might need a few seconds
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
rest::Scheduler* scheduler = SchedulerFeature::SCHEDULER;
scheduler->post([this, executionNumber] {
scheduler->queue(RequestPriority::LOW, [this, executionNumber] {
MUTEX_LOCKER(guard, _mutex);
auto wit = _workers.find(executionNumber);
if (wit != _workers.end()) {
_workers.erase(executionNumber);
}
}, false);
});
}
void PregelFeature::cleanupAll() {

View File

@ -148,7 +148,7 @@ void RecoveryManager::updatedFailedServers(std::vector<ServerID> const& failed)
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
rest::Scheduler* scheduler = SchedulerFeature::SCHEDULER;
scheduler->post([this, shard] { _renewPrimaryServer(shard); }, false);
scheduler->queue(RequestPriority::LOW, [this, shard] { _renewPrimaryServer(shard); });
}
}
}

View File

@ -156,7 +156,7 @@ void Worker<V, E, M>::setupWorker() {
package.close();
_callConductor(Utils::finishedStartupPath, package);
};
if (_config.lazyLoading()) {
// TODO maybe lazy loading needs to be performed on another thread too
std::set<std::string> activeSet = _algorithm->initialActiveSet();
@ -173,9 +173,8 @@ void Worker<V, E, M>::setupWorker() {
// of time. Therefore this is performed asynchronous
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
rest::Scheduler* scheduler = SchedulerFeature::SCHEDULER;
scheduler->post(
[this, callback] { _graphStore->loadShards(&_config, callback); },
false);
scheduler->queue(RequestPriority::LOW,
[this, callback] { _graphStore->loadShards(&_config, callback); });
}
}
@ -335,7 +334,7 @@ void Worker<V, E, M>::_startProcessing() {
}
size_t i = 0;
do {
scheduler->post([this, start, end, i] {
scheduler->queue(RequestPriority::LOW, [this, start, end, i] {
if (_state != WorkerState::COMPUTING) {
LOG_TOPIC(WARN, Logger::PREGEL) << "Execution aborted prematurely.";
return;
@ -345,7 +344,7 @@ void Worker<V, E, M>::_startProcessing() {
if (_processVertices(i, vertices) && _state == WorkerState::COMPUTING) {
_finishedProcessing(); // last thread turns the lights out
}
}, false);
});
start = end;
end = end + delta;
if (total < end + delta) { // swallow the rest
@ -685,7 +684,7 @@ void Worker<V, E, M>::compensateStep(VPackSlice const& data) {
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
rest::Scheduler* scheduler = SchedulerFeature::SCHEDULER;
scheduler->post([this] {
scheduler->queue(RequestPriority::LOW, [this] {
if (_state != WorkerState::RECOVERING) {
LOG_TOPIC(WARN, Logger::PREGEL) << "Compensation aborted prematurely.";
return;
@ -722,7 +721,7 @@ void Worker<V, E, M>::compensateStep(VPackSlice const& data) {
_workerAggregators->serializeValues(package);
package.close();
_callConductor(Utils::finishedRecoveryPath, package);
}, false);
});
}
template <typename V, typename E, typename M>
@ -745,10 +744,10 @@ void Worker<V, E, M>::_callConductor(std::string const& path,
if (ServerState::instance()->isRunningInCluster() == false) {
TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr);
rest::Scheduler* scheduler = SchedulerFeature::SCHEDULER;
scheduler->post([path, message] {
scheduler->queue(RequestPriority::LOW, [path, message] {
VPackBuilder response;
PregelFeature::handleConductorRequest(path, message.slice(), response);
}, false);
});
} else {
std::shared_ptr<ClusterComm> cc = ClusterComm::instance();
std::string baseUrl =

View File

@ -223,11 +223,11 @@ arangodb::Result applyCollectionDumpMarkerInternal(
namespace arangodb {
Syncer::JobSynchronizer::JobSynchronizer(std::shared_ptr<Syncer const> const& syncer)
: _syncer(syncer),
Syncer::JobSynchronizer::JobSynchronizer(std::shared_ptr<Syncer const> const& syncer)
: _syncer(syncer),
_gotResponse(false),
_jobsInFlight(0) {}
Syncer::JobSynchronizer::~JobSynchronizer() {
// signal that we have got something
@ -238,9 +238,9 @@ Syncer::JobSynchronizer::~JobSynchronizer() {
}
// wait until all posted jobs have been completed/canceled
while (hasJobInFlight()) {
while (hasJobInFlight()) {
std::this_thread::sleep_for(std::chrono::microseconds(20000));
std::this_thread::yield();
std::this_thread::yield();
}
}
@ -273,7 +273,7 @@ Result Syncer::JobSynchronizer::waitForResponse(std::unique_ptr<arangodb::httpcl
while (true) {
{
CONDITION_LOCKER(guard, _condition);
if (!_gotResponse) {
guard.wait(1 * 1000 * 1000);
}
@ -299,7 +299,7 @@ Result Syncer::JobSynchronizer::waitForResponse(std::unique_ptr<arangodb::httpcl
break;
}
}
return Result(TRI_ERROR_REPLICATION_APPLIER_STOPPED);
}
@ -312,7 +312,7 @@ void Syncer::JobSynchronizer::request(std::function<void()> const& cb) {
try {
auto self = shared_from_this();
SchedulerFeature::SCHEDULER->post([this, self, cb]() {
SchedulerFeature::SCHEDULER->queue(RequestPriority::LOW, [this, self, cb]() {
// whatever happens next, when we leave this here, we need to indicate
// that there is no more posted job.
// otherwise the calling thread may block forever waiting on the posted jobs
@ -322,26 +322,26 @@ void Syncer::JobSynchronizer::request(std::function<void()> const& cb) {
});
cb();
}, false);
});
} catch (...) {
// will get here only if Scheduler::post threw
jobDone();
}
}
/// @brief notifies that a job was posted
/// returns false if job counter could not be increased (e.g. because
/// the syncer was stopped/aborted already)
bool Syncer::JobSynchronizer::jobPosted() {
while (true) {
CONDITION_LOCKER(guard, _condition);
// _jobsInFlight should be 0 in almost all cases, however, there
// is a small window in which the request has been processed already
// (i.e. after waitForResponse() has returned and before jobDone()
// has been called and has decreased _jobsInFlight). For this
// particular case, we simply wait for _jobsInFlight to become 0 again
if (_jobsInFlight == 0) {
// particular case, we simply wait for _jobsInFlight to become 0 again
if (_jobsInFlight == 0) {
++_jobsInFlight;
return true;
}
@ -801,7 +801,7 @@ Result Syncer::dropIndex(arangodb::velocypack::Slice const& slice) {
return r;
}
/// @brief creates a view, based on the VelocyPack provided
Result Syncer::createView(TRI_vocbase_t& vocbase,
arangodb::velocypack::Slice const& slice) {
@ -809,7 +809,7 @@ Result Syncer::createView(TRI_vocbase_t& vocbase,
return Result(TRI_ERROR_REPLICATION_INVALID_RESPONSE,
"collection slice is no object");
}
VPackSlice nameSlice = slice.get(StaticStrings::DataSourceName);
if (!nameSlice.isString() || nameSlice.getStringLength() == 0) {
return Result(TRI_ERROR_REPLICATION_INVALID_RESPONSE,
@ -825,7 +825,7 @@ Result Syncer::createView(TRI_vocbase_t& vocbase,
return Result(TRI_ERROR_REPLICATION_INVALID_RESPONSE,
"no type specified for view");
}
auto view = vocbase.lookupView(guidSlice.copyString());
if (view) { // identical view already exists
VPackSlice nameSlice = slice.get(StaticStrings::DataSourceName);
@ -835,11 +835,11 @@ Result Syncer::createView(TRI_vocbase_t& vocbase,
return res;
}
}
bool doSync = DatabaseFeature::DATABASE->forceSyncProperties();
return view->updateProperties(slice, false, doSync);
}
view = vocbase.lookupView(nameSlice.copyString());
if (view) { // resolve name conflict by deleting existing
Result res = vocbase.dropView(view->id(), /*dropSytem*/false);
@ -847,16 +847,16 @@ Result Syncer::createView(TRI_vocbase_t& vocbase,
return res;
}
}
VPackBuilder s;
s.openObject();
s.add("id", VPackSlice::nullSlice());
s.close();
VPackBuilder merged =
VPackCollection::merge(slice, s.slice(), /*mergeValues*/ true,
/*nullMeansRemove*/ true);
try {
vocbase.createView(merged.slice());
} catch (basics::Exception const& ex) {
@ -866,7 +866,7 @@ Result Syncer::createView(TRI_vocbase_t& vocbase,
} catch (...) {
return Result(TRI_ERROR_INTERNAL);
}
return Result();
}
@ -877,7 +877,7 @@ Result Syncer::dropView(arangodb::velocypack::Slice const& slice,
if (vocbase == nullptr) {
return Result(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND);
}
VPackSlice guidSlice = slice.get("globallyUniqueId");
if (guidSlice.isNone()) {
guidSlice = slice.get("cuid");
@ -886,7 +886,7 @@ Result Syncer::dropView(arangodb::velocypack::Slice const& slice,
return Result(TRI_ERROR_REPLICATION_INVALID_RESPONSE,
"no guid specified for view");
}
try {
auto view = vocbase->lookupView(guidSlice.copyString());
if (view != nullptr) { // ignore non-existing
@ -899,7 +899,7 @@ Result Syncer::dropView(arangodb::velocypack::Slice const& slice,
} catch (...) {
return Result(TRI_ERROR_INTERNAL);
}
return Result();
}

View File

@ -224,7 +224,7 @@ bool RestBatchHandler::executeNextHandler() {
// now scheduler the real handler
bool ok = SchedulerFeature::SCHEDULER->queue(
PriorityRequestLane(handler->lane()), [this, self, handler]() {
handler->priority(), [this, self, handler]() {
// start to work for this handler
// ignore any errors here, will be handled later by inspecting the response

View File

@ -134,7 +134,7 @@ RestStatus RestTestHandler::execute() {
auto self(shared_from_this());
bool ok = SchedulerFeature::SCHEDULER->queue(
PriorityRequestLane(res.get()),
priority(res.get()),
[this, self, duration]() {
auto stop = clock::now() + duration;
@ -166,5 +166,3 @@ RestStatus RestTestHandler::execute() {
generateError(rest::ResponseCode::SERVICE_UNAVAILABLE, TRI_ERROR_QUEUE_FULL);
return RestStatus::DONE;
}

View File

@ -46,7 +46,7 @@ using namespace arangodb::basics;
using namespace arangodb::rest;
namespace {
constexpr double MIN_SECONDS = 30.0;
constexpr double MIN_SECONDS = 60.0;
}
// -----------------------------------------------------------------------------
@ -86,8 +86,7 @@ class SchedulerManagerThread final : public Thread {
// --SECTION-- SchedulerThread
// -----------------------------------------------------------------------------
namespace {
class SchedulerThread : public Thread {
class arangodb::SchedulerThread : public Thread {
public:
SchedulerThread(std::shared_ptr<Scheduler> const& scheduler, asio_ns::io_context* service)
: Thread("Scheduler", true), _scheduler(scheduler), _service(service) {}
@ -164,30 +163,26 @@ class SchedulerThread : public Thread {
std::shared_ptr<Scheduler> _scheduler;
asio_ns::io_context* _service;
};
} // namespace
// -----------------------------------------------------------------------------
// --SECTION-- Scheduler
// -----------------------------------------------------------------------------
Scheduler::Scheduler(uint64_t nrMinimum, uint64_t nrMaximum,
uint64_t maxQueueSize, uint64_t fifo1Size,
uint64_t fifo2Size)
: _queuedV8(0),
_maxQueuedV8(std::max(static_cast<uint64_t>(1), nrMaximum - nrMinimum)),
_maxQueueSize(maxQueueSize),
_counters(0),
uint64_t fifo1Size, uint64_t fifo2Size)
: _counters(0),
_maxFifoSize{fifo1Size, fifo2Size, fifo2Size},
_fifo1(_maxFifoSize[FIFO1]),
_fifo2(_maxFifoSize[FIFO2]),
_fifo8(_maxFifoSize[FIFO8]),
_fifos{&_fifo1, &_fifo2, &_fifo8},
_fifo3(_maxFifoSize[FIFO3]),
_fifos{&_fifo1, &_fifo2, &_fifo3},
_minThreads(nrMinimum),
_maxThreads(nrMaximum),
_lastAllBusyStamp(0.0) {
LOG_TOPIC(DEBUG, Logger::THREADS) << "Scheduler configuration min: " << nrMinimum << " max: " << nrMaximum;
_fifoSize[FIFO1] = 0;
_fifoSize[FIFO2] = 0;
_fifoSize[FIFO8] = 0;
_fifoSize[FIFO3] = 0;
// setup signal handlers
initializeSignalHandlers();
@ -199,9 +194,6 @@ Scheduler::~Scheduler() {
_managerGuard.reset();
_managerContext.reset();
_serviceGuard.reset();
_ioContext.reset();
FifoJob* job = nullptr;
for (int i = 0; i < NUMBER_FIFOS; ++i) {
@ -214,98 +206,46 @@ Scheduler::~Scheduler() {
}
}
void Scheduler::post(std::function<void()> const& cb, bool isV8,
uint64_t timeout) {
// do not pass callback by reference, might get deleted before execution
void Scheduler::post(std::function<void()> const callback) {
// increment number of queued and guard against exceptions
incQueued();
auto guardQueue = scopeGuard([this]() { decQueued(); });
// increment number of queued V8 jobs and guard against exceptions
if (isV8) {
++_queuedV8;
}
auto guardV8 = scopeGuard([this, isV8]() {
if (isV8) {
--_queuedV8;
}
});
// capture without self, ioContext will not live longer than scheduler
// do not pass callback by reference, might get deleted before execution
//asio_ns::post([this, cb, isV8, timeout]() { <-- breaks tests in jenkins
_ioContext->post([this, cb, isV8, timeout]() {
// at the end (either success or exception),
// reduce number of queued V8
auto guard = scopeGuard([this, isV8]() {
if (isV8) {
--_queuedV8;
}
});
// reduce number of queued now
decQueued();
_ioContext->post([this, callback]() {
// start working
JobGuard jobGuard(this);
jobGuard.work();
if (isV8 && _queuedV8 > _maxQueuedV8 &&
numWorking(getCounters()) >= static_cast<uint64_t>(_maxQueuedV8)) {
// this must be done before requeuing the job
guard.fire();
// reduce number of queued now
decQueued();
// in case we queued more V8 jobs in the scheduler than desired this
// job is put back into the scheduler queue. An exponential backoff is
// used with a maximum of 256ms. Initial the timeout will be zero.
auto t = timeout;
if (t == 0) {
t = 1;
} else if (t <= 200) {
t *= 2;
}
std::shared_ptr<asio_ns::deadline_timer> timer(
newDeadlineTimer(boost::posix_time::millisec(timeout)));
timer->async_wait([this, cb = std::move(cb), isV8, t, timer]
(const asio::error_code& error) {
if (error != asio::error::operation_aborted) {
post(cb, isV8, t);
}
});
return;
}
cb();
callback();
});
// no exception happened, cancel guards
guardV8.cancel();
// no exception happened, cancel guard
guardQueue.cancel();
}
// do not pass callback by reference, might get deleted before execution
void Scheduler::post(asio_ns::io_context::strand& strand,
std::function<void()> const& cb) {
std::function<void()> const callback) {
incQueued();
try {
// capture without self, ioContext will not live longer than scheduler
// do not pass callback by reference, might get deleted before execution
//asio_ns::post(strand, [this, cb, isV8, timeout]() { <-- breaks tests in jenkins
strand.post([this, cb]() {
decQueued();
JobGuard guard(this);
guard.work();
auto guardQueue = scopeGuard([this]() { decQueued(); });
strand.post([this, callback]() {
JobGuard guard(this);
guard.work();
cb();
});
} catch (...) {
decQueued();
throw;
}
callback();
});
// no exception happened, cancel guard
guardQueue.cancel();
}
bool Scheduler::queue(RequestPriority prio,
@ -320,10 +260,10 @@ bool Scheduler::queue(RequestPriority prio,
// This does not care if there is anything in fifo2 or
// fifo8 because these queue have lower priority.
case RequestPriority::HIGH:
if (0 < _fifoSize[FIFO1] || !canPostDirectly()) {
ok = pushToFifo(FIFO1, callback, false);
if (0 < _fifoSize[FIFO1] || !canPostDirectly(prio)) {
ok = pushToFifo(FIFO1, callback);
} else {
post(callback, false);
post(callback);
}
break;
@ -331,19 +271,24 @@ bool Scheduler::queue(RequestPriority prio,
// or if the scheduler queue is already full, then
// append it to the fifo2. Otherewise directly queue
// it.
case RequestPriority::LOW:
if (0 < _fifoSize[FIFO1] || 0 < _fifoSize[FIFO8] ||
0 < _fifoSize[FIFO2] || !canPostDirectly()) {
ok = pushToFifo(FIFO2, callback, false);
case RequestPriority::MED:
if (0 < _fifoSize[FIFO1] || 0 < _fifoSize[FIFO2] || !canPostDirectly(prio)) {
ok = pushToFifo(FIFO2, callback);
} else {
post(callback, false);
post(callback);
}
break;
// Also push V8 requests to the fifo2. Even if we could
// queue directly.
case RequestPriority::V8:
ok = pushToFifo(FIFO2, callback, true);
// If there is anything in the fifo1, fifo2, fifo3
// or if the scheduler queue is already full, then
// append it to the fifo2. Otherwise directly queue
// it.
case RequestPriority::LOW:
if (0 < _fifoSize[FIFO1] || 0 < _fifoSize[FIFO2] || 0 < _fifoSize[FIFO3] || !canPostDirectly(prio)) {
ok = pushToFifo(FIFO3, callback);
} else {
post(callback);
}
break;
default:
@ -361,23 +306,23 @@ bool Scheduler::queue(RequestPriority prio,
}
void Scheduler::drain() {
while (canPostDirectly()) {
bool found = popFifo(FIFO1);
bool found = true;
while (found && canPostDirectly(RequestPriority::HIGH)) {
found = popFifo(FIFO1);
}
found = true;
while (found && canPostDirectly(RequestPriority::LOW)) {
found = popFifo(FIFO1);
if (!found) {
found = popFifo(FIFO8);
if (!found) {
found = popFifo(FIFO2);
} else if (canPostDirectly()) {
// There is still enough space in the scheduler queue. Queue
// one more.
popFifo(FIFO2);
}
found = popFifo(FIFO2);
}
if (!found) {
break;
found = popFifo(FIFO3);
}
}
}
@ -390,13 +335,12 @@ void Scheduler::addQueueStatistics(velocypack::Builder& b) const {
b.add("scheduler-threads", VPackValue(numRunning(counters)));
b.add("in-progress", VPackValue(numWorking(counters)));
b.add("queued", VPackValue(numQueued(counters)));
b.add("queue-size", VPackValue(_maxQueueSize));
b.add("current-fifo1", VPackValue(_fifoSize[FIFO1]));
b.add("fifo1-size", VPackValue(_maxFifoSize[FIFO1]));
b.add("current-fifo2", VPackValue(_fifoSize[FIFO2]));
b.add("fifo2-size", VPackValue(_maxFifoSize[FIFO2]));
b.add("current-fifo8", VPackValue(_fifoSize[FIFO8]));
b.add("fifo8-size", VPackValue(_maxFifoSize[FIFO8]));
b.add("current-fifo3", VPackValue(_fifoSize[FIFO3]));
b.add("fifo3-size", VPackValue(_maxFifoSize[FIFO3]));
}
Scheduler::QueueStatistics Scheduler::queueStatistics() const {
@ -406,9 +350,8 @@ Scheduler::QueueStatistics Scheduler::queueStatistics() const {
numWorking(counters),
numQueued(counters),
static_cast<uint64_t>(_fifoSize[FIFO1]),
static_cast<uint64_t>(_fifoSize[FIFO8]),
static_cast<uint64_t>(_fifoSize[FIFO8]),
static_cast<uint64_t>(_queuedV8)};
static_cast<uint64_t>(_fifoSize[FIFO2]),
static_cast<uint64_t>(_fifoSize[FIFO3])};
}
std::string Scheduler::infoStatus() {
@ -418,31 +361,39 @@ std::string Scheduler::infoStatus() {
std::to_string(_minThreads) + "<" + std::to_string(_maxThreads) +
") in-progress " + std::to_string(numWorking(counters)) + " queued " +
std::to_string(numQueued(counters)) +
" (<=" + std::to_string(_maxQueueSize) + ") V8 " +
std::to_string(_queuedV8) + " (<=" + std::to_string(_maxQueuedV8) +
") F1 " + std::to_string(_fifoSize[FIFO1]) +
" F1 " + std::to_string(_fifoSize[FIFO1]) +
" (<=" + std::to_string(_maxFifoSize[FIFO1]) + ") F2 " +
std::to_string(_fifoSize[FIFO2]) +
" (<=" + std::to_string(_maxFifoSize[FIFO2]) + ") F8 " +
std::to_string(_fifoSize[FIFO8]) +
" (<=" + std::to_string(_maxFifoSize[FIFO8]) + ")";
" (<=" + std::to_string(_maxFifoSize[FIFO2]) + ") F3 " +
std::to_string(_fifoSize[FIFO3]) +
" (<=" + std::to_string(_maxFifoSize[FIFO3]) + ")";
}
bool Scheduler::canPostDirectly() const noexcept {
bool Scheduler::canPostDirectly(RequestPriority prio) const noexcept {
auto counters = getCounters();
auto nrWorking = numWorking(counters);
auto nrQueued = numQueued(counters);
return nrWorking + nrQueued <= _maxQueueSize;
switch (prio) {
case RequestPriority::HIGH:
return nrWorking + nrQueued < _maxThreads;
// the "/ 2" is an assumption that HIGH is typically responses to our outbound messages
// where MED & LOW are incoming requests. Keep half the threads processing our work and half their work.
case RequestPriority::MED:
case RequestPriority::LOW:
return nrWorking + nrQueued < _maxThreads / 2;
}
return false;
}
bool Scheduler::pushToFifo(int64_t fifo, std::function<void()> const& callback,
bool isV8) {
bool Scheduler::pushToFifo(int64_t fifo, std::function<void()> const& callback) {
LOG_TOPIC(DEBUG, Logger::THREADS) << "Push element on fifo: " << fifo;
TRI_ASSERT(0 <= fifo && fifo < NUMBER_FIFOS);
TRI_ASSERT(fifo != FIFO8 || isV8);
size_t p = static_cast<size_t>(fifo);
auto job = std::make_unique<FifoJob>(callback, isV8);
auto job = std::make_unique<FifoJob>(callback);
try {
if (0 < _maxFifoSize[p] && (int64_t)_maxFifoSize[p] <= _fifoSize[p]) {
@ -462,7 +413,10 @@ bool Scheduler::pushToFifo(int64_t fifo, std::function<void()> const& callback,
auto nrQueued = numQueued(counters);
if (0 == nrWorking + nrQueued) {
post([] { /*wakeup call for scheduler thread*/ }, false);
post([] {
LOG_TOPIC(DEBUG, Logger::THREADS) << "Wakeup alarm";
/*wakeup call for scheduler thread*/
});
}
} catch (...) {
return false;
@ -472,12 +426,9 @@ bool Scheduler::pushToFifo(int64_t fifo, std::function<void()> const& callback,
}
bool Scheduler::popFifo(int64_t fifo) {
LOG_TOPIC(DEBUG, Logger::THREADS) << "Popping a job from fifo: " << fifo;
TRI_ASSERT(0 <= fifo && fifo < NUMBER_FIFOS);
if (fifo == FIFO8 && _queuedV8 >= _maxQueuedV8) {
return false;
}
size_t p = static_cast<size_t>(fifo);
FifoJob* job = nullptr;
@ -490,11 +441,7 @@ bool Scheduler::popFifo(int64_t fifo) {
}
});
if (!job->_isV8 || _queuedV8 < _maxQueuedV8) {
post(job->_callback, job->_isV8);
} else {
pushToFifo(FIFO8, job->_callback, job->_isV8);
}
post(job->_callback);
--_fifoSize[p];
}
@ -528,8 +475,6 @@ bool Scheduler::start() {
TRI_ASSERT(0 < _minThreads);
TRI_ASSERT(_minThreads <= _maxThreads);
TRI_ASSERT(0 < _maxQueueSize);
TRI_ASSERT(0 < _maxQueuedV8);
for (uint64_t i = 0; i < _minThreads; ++i) {
{
@ -587,6 +532,9 @@ void Scheduler::shutdown() {
std::this_thread::sleep_for(std::chrono::microseconds(20000));
}
// One has to clean up the ioContext here, because there could a lambda
// in its queue, that requires for it finalization some object (for example vocbase)
// that would already be destroyed
_managerContext.reset();
_ioContext.reset();
}
@ -642,6 +590,12 @@ void Scheduler::stopRebalancer() noexcept {
}
}
//
// This routine tries to keep only the most likely needed count of threads running:
// - asio io_context runs less efficiently if it has too many threads, but
// - there is a latency hit to starting a new thread.
//
void Scheduler::rebalanceThreads() {
static uint64_t count = 0;

View File

@ -38,6 +38,7 @@
namespace arangodb {
class JobGuard;
class ListenTask;
class SchedulerThread;
namespace velocypack {
class Builder;
@ -55,9 +56,10 @@ class Scheduler : public std::enable_shared_from_this<Scheduler> {
friend class arangodb::rest::GeneralCommTask;
friend class arangodb::rest::SocketTask;
friend class arangodb::ListenTask;
friend class arangodb::SchedulerThread;
public:
Scheduler(uint64_t minThreads, uint64_t maxThreads, uint64_t maxQueueSize,
Scheduler(uint64_t minThreads, uint64_t maxThreads,
uint64_t fifo1Size, uint64_t fifo2Size);
virtual ~Scheduler();
@ -91,16 +93,11 @@ class Scheduler : public std::enable_shared_from_this<Scheduler> {
uint64_t _queued;
uint64_t _fifo1;
uint64_t _fifo2;
uint64_t _fifo8;
uint64_t _queuedV8;
uint64_t _fifo3;
};
void post(std::function<void()> const&, bool isV8,
uint64_t timeout = 0);
void post(asio_ns::io_context::strand&, std::function<void()> const&);
bool queue(RequestPriority prio, std::function<void()> const&);
void drain();
void post(asio_ns::io_context::strand&, std::function<void()> const callback);
void addQueueStatistics(velocypack::Builder&) const;
QueueStatistics queueStatistics() const;
@ -109,53 +106,17 @@ class Scheduler : public std::enable_shared_from_this<Scheduler> {
bool isRunning() const { return numRunning(_counters) > 0; }
bool isStopping() const noexcept { return (_counters & (1ULL << 63)) != 0; }
public:
template <typename T>
asio_ns::deadline_timer* newDeadlineTimer(T timeout) {
return new asio_ns::deadline_timer(*_ioContext, timeout);
}
asio_ns::steady_timer* newSteadyTimer() {
return new asio_ns::steady_timer(*_ioContext);
}
asio_ns::io_context::strand* newStrand() {
return new asio_ns::io_context::strand(*_ioContext);
}
asio_ns::ip::tcp::acceptor* newAcceptor() {
return new asio_ns::ip::tcp::acceptor(*_ioContext);
}
#ifndef _WIN32
asio_ns::local::stream_protocol::acceptor* newDomainAcceptor() {
return new asio_ns::local::stream_protocol::acceptor(*_ioContext);
}
#endif
asio_ns::ip::tcp::socket* newSocket() {
return new asio_ns::ip::tcp::socket(*_ioContext);
}
#ifndef _WIN32
asio_ns::local::stream_protocol::socket* newDomainSocket() {
return new asio_ns::local::stream_protocol::socket(*_ioContext);
}
#endif
asio_ns::ssl::stream<asio_ns::ip::tcp::socket>* newSslSocket(
asio_ns::ssl::context& context) {
return new asio_ns::ssl::stream<asio_ns::ip::tcp::socket>(*_ioContext,
context);
}
asio_ns::ip::tcp::resolver* newResolver() {
return new asio_ns::ip::tcp::resolver(*_ioContext);
}
asio_ns::signal_set* newSignalSet() {
return new asio_ns::signal_set(*_managerContext);
}
private:
void post(std::function<void()> const callback);
void drain();
inline void setStopping() noexcept { _counters |= (1ULL << 63); }
inline bool isStopping(uint64_t value) const noexcept {
return (value & (1ULL << 63)) != 0;
}
bool canPostDirectly() const noexcept;
bool canPostDirectly(RequestPriority prio) const noexcept;
static uint64_t numRunning(uint64_t value) noexcept {
return value & 0xFFFFULL;
@ -190,11 +151,6 @@ class Scheduler : public std::enable_shared_from_this<Scheduler> {
_counters -= 1ULL << 16;
}
std::atomic<int64_t> _queuedV8;
int64_t const _maxQueuedV8;
// maximal number of running + queued jobs in the Scheduler `io_context`
uint64_t const _maxQueueSize;
// we store most of the threads status info in a single atomic uint64_t
// the encoding of the values inside this variable is (left to right means
@ -221,33 +177,79 @@ class Scheduler : public std::enable_shared_from_this<Scheduler> {
// queue is full
struct FifoJob {
FifoJob(std::function<void()> const& callback, bool isV8)
: _isV8(isV8), _callback(callback) {}
bool const _isV8;
FifoJob(std::function<void()> const& callback)
: _callback(callback) {}
std::function<void()> _callback;
};
bool pushToFifo(int64_t fifo, std::function<void()> const& callback,
bool isV8);
bool pushToFifo(int64_t fifo, std::function<void()> const& callback);
bool popFifo(int64_t fifo);
static constexpr int64_t NUMBER_FIFOS = 3;
static constexpr int64_t FIFO1 = 0;
static constexpr int64_t FIFO2 = 1;
static constexpr int64_t FIFO8 = 2;
static constexpr int64_t FIFO3 = 2;
uint64_t const _maxFifoSize[NUMBER_FIFOS];
std::atomic<int64_t> _fifoSize[NUMBER_FIFOS];
boost::lockfree::queue<FifoJob*> _fifo1;
boost::lockfree::queue<FifoJob*> _fifo2;
boost::lockfree::queue<FifoJob*> _fifo8;
boost::lockfree::queue<FifoJob*> _fifo3;
boost::lockfree::queue<FifoJob*>* _fifos[NUMBER_FIFOS];
// the following methds create tasks in the `io_context`.
// The `io_context` itself is not exposed because everything
// should use the method `post` of the Scheduler.
public:
template <typename T>
asio_ns::deadline_timer* newDeadlineTimer(T timeout) {
return new asio_ns::deadline_timer(*_ioContext, timeout);
}
asio_ns::steady_timer* newSteadyTimer() {
return new asio_ns::steady_timer(*_ioContext);
}
asio_ns::io_context::strand* newStrand() {
return new asio_ns::io_context::strand(*_ioContext);
}
asio_ns::ip::tcp::acceptor* newAcceptor() {
return new asio_ns::ip::tcp::acceptor(*_ioContext);
}
#ifndef _WIN32
asio_ns::local::stream_protocol::acceptor* newDomainAcceptor() {
return new asio_ns::local::stream_protocol::acceptor(*_ioContext);
}
#endif
asio_ns::ip::tcp::socket* newSocket() {
return new asio_ns::ip::tcp::socket(*_ioContext);
}
#ifndef _WIN32
asio_ns::local::stream_protocol::socket* newDomainSocket() {
return new asio_ns::local::stream_protocol::socket(*_ioContext);
}
#endif
asio_ns::ssl::stream<asio_ns::ip::tcp::socket>* newSslSocket(
asio_ns::ssl::context& context) {
return new asio_ns::ssl::stream<asio_ns::ip::tcp::socket>(*_ioContext,
context);
}
asio_ns::ip::tcp::resolver* newResolver() {
return new asio_ns::ip::tcp::resolver(*_ioContext);
}
asio_ns::signal_set* newSignalSet() {
return new asio_ns::signal_set(*_managerContext);
}
private:
static void initializeSignalHandlers();

View File

@ -98,6 +98,9 @@ void SchedulerFeature::validateOptions(
if (_nrMaximalThreads == 0) {
_nrMaximalThreads = defaultNumberOfThreads();
}
if (_nrMinimalThreads < 2) {
_nrMinimalThreads = 2;
}
if (_queueSize == 0) {
_queueSize = _nrMaximalThreads * 8;
@ -136,7 +139,7 @@ void SchedulerFeature::start() {
LOG_TOPIC(WARN, arangodb::Logger::THREADS)
<< "--server.maximal-threads (" << _nrMaximalThreads << ") should be at least "
<< (_nrMinimalThreads + 1) << ", raising it";
_nrMaximalThreads = _nrMinimalThreads + 1;
_nrMaximalThreads = _nrMinimalThreads;
}
TRI_ASSERT(2 <= _nrMinimalThreads);
@ -300,7 +303,7 @@ bool CtrlHandler(DWORD eventType) {
void SchedulerFeature::buildScheduler() {
_scheduler = std::make_shared<Scheduler>(_nrMinimalThreads, _nrMaximalThreads,
_queueSize, _fifo1Size, _fifo2Size);
_fifo1Size, _fifo2Size);
SCHEDULER = _scheduler.get();
}

View File

@ -228,7 +228,7 @@ LogicalCollection::LogicalCollection(
return category;
}
LogicalCollection::~LogicalCollection() {}
// SECTION: sharding
@ -418,7 +418,7 @@ bool LogicalCollection::isSmart() const { return _isSmart; }
std::unique_ptr<FollowerInfo> const& LogicalCollection::followers() const {
return _followers;
}
std::unordered_map<std::string, double> LogicalCollection::clusterIndexEstimates(bool allowUpdate) {
return getPhysical()->clusterIndexEstimates(allowUpdate);
}
@ -628,11 +628,10 @@ arangodb::Result LogicalCollection::appendVelocyPack(
if (_keyGenerator != nullptr) {
result.openObject();
_keyGenerator->toVelocyPack(result);
result.close();
} else {
result.openArray();
result.close();
}
result.close();
// Physical Information
getPhysical()->getPropertiesVPack(result);
@ -920,7 +919,7 @@ Result LogicalCollection::update(transaction::Methods* trx,
TRI_IF_FAILURE("LogicalCollection::update") {
return Result(TRI_ERROR_DEBUG);
}
resultMarkerTick = 0;
if (!newSlice.isObject()) {
return Result(TRI_ERROR_ARANGO_DOCUMENT_TYPE_INVALID);
@ -1069,4 +1068,3 @@ ChecksumResult LogicalCollection::checksum(bool withRevisions, bool withData) co
return ChecksumResult(std::move(b));
}

View File

@ -582,7 +582,7 @@ Result Collections::warmup(TRI_vocbase_t& vocbase,
auto idxs = coll.getIndexes();
auto poster = [](std::function<void()> fn) -> void {
SchedulerFeature::SCHEDULER->post(fn, false);
SchedulerFeature::SCHEDULER->queue(RequestPriority::LOW, fn);
};
auto queue = std::make_shared<basics::LocalTaskQueue>(poster);

View File

@ -283,7 +283,7 @@ std::function<void(const asio::error_code&)> Task::callbackFunction() {
// now do the work:
SchedulerFeature::SCHEDULER->queue(
PriorityRequestLane(RequestLane::TASK_V8), [self, this, execContext] {
RequestPriority::LOW, [self, this, execContext] {
ExecContextScope scope(_user.empty() ? ExecContext::superuser()
: execContext.get());

View File

@ -6,6 +6,7 @@
// We have to start the app only in production mode, not in test mode
if (!window.hasOwnProperty('TEST_BUILD')) {
$(document).ajaxSend(function (event, jqxhr, settings) {
jqxhr.setRequestHeader('X-Arango-Frontend', 'true');
var currentJwt = window.arangoHelper.getCurrentJwt();
if (currentJwt) {
jqxhr.setRequestHeader('Authorization', 'bearer ' + currentJwt);

View File

@ -155,6 +155,7 @@ std::string const StaticStrings::Unlimited = "unlimited";
std::string const StaticStrings::WwwAuthenticate("www-authenticate");
std::string const StaticStrings::XContentTypeOptions("x-content-type-options");
std::string const StaticStrings::XArangoNoLock("x-arango-nolock");
std::string const StaticStrings::XArangoFrontend("x-arango-frontend");
// mime types
std::string const StaticStrings::MimeTypeJson(

View File

@ -143,6 +143,7 @@ class StaticStrings {
static std::string const WwwAuthenticate;
static std::string const XContentTypeOptions;
static std::string const XArangoNoLock;
static std::string const XArangoFrontend;
// mime types
static std::string const MimeTypeJson;

View File

@ -38,15 +38,20 @@ class Callbacks {
typedef std::function<void(std::unique_ptr<GeneralResponse>)>
OnSuccessCallback;
typedef std::function<void(std::function<void()>)> ScheduleMeCallback;
Callbacks() {}
Callbacks(OnSuccessCallback onSuccess, OnErrorCallback onError) :
_onSuccess(onSuccess), _onError(onError) {
Callbacks(OnSuccessCallback onSuccess, OnErrorCallback onError)
: _onSuccess(onSuccess), _onError(onError), _scheduleMe(defaultScheduleMe) {
}
public:
OnSuccessCallback _onSuccess;
OnErrorCallback _onError;
ScheduleMeCallback _scheduleMe;
protected:
static void defaultScheduleMe(std::function<void()> task) {task();}
};
}
}

View File

@ -285,7 +285,7 @@ void Communicator::createRequestInProgress(NewRequest&& newRequest) {
newRequest._destination, newRequest._callbacks, newRequest._ticketId,
newRequest._options, std::move(newRequest._request));
auto handleInProgress = std::make_unique<CurlHandle>(rip);
auto handleInProgress = std::make_shared<CurlHandle>(rip);
auto request = (HttpRequest*)handleInProgress->_rip->_request.get();
@ -430,8 +430,8 @@ void Communicator::createRequestInProgress(NewRequest&& newRequest) {
curl_multi_add_handle(_curl, handle);
}
/// new code using lambda and Scheduler
void Communicator::handleResult(CURL* handle, CURLcode rc) {
double connectTime = 0.0;
curl_multi_remove_handle(_curl, handle);
RequestInProgress* rip = nullptr;
@ -440,80 +440,99 @@ void Communicator::handleResult(CURL* handle, CURLcode rc) {
return;
}
// unclear if this would be safe on another thread. Leaving here.
if (rip->_options._curlRcFn) {
(*rip->_options._curlRcFn)(rc);
}
LOG_TOPIC(TRACE, Logger::COMMUNICATION)
<< ::buildPrefix(rip->_ticketId) << "curl rc is : " << rc << " after "
<< Logger::FIXED(TRI_microtime() - rip->_startTime) << " s";
std::shared_ptr<CurlHandle> curlHandle;
if (CURLE_OPERATION_TIMEDOUT == rc) {
curl_easy_getinfo(handle, CURLINFO_CONNECT_TIME, &connectTime);
LOG_TOPIC(TRACE, Logger::COMMUNICATION)
<< ::buildPrefix(rip->_ticketId) << "CURLINFO_CONNECT_TIME is " << connectTime;
} // if
if (strlen(rip->_errorBuffer) != 0) {
LOG_TOPIC(TRACE, Logger::COMMUNICATION)
<< ::buildPrefix(rip->_ticketId) << "curl error details: " << rip->_errorBuffer;
}
MUTEX_LOCKER(guard, _handlesLock);
switch (rc) {
case CURLE_OK: {
long httpStatusCode = 200;
curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &httpStatusCode);
std::unique_ptr<GeneralResponse> response(
new HttpResponse(static_cast<ResponseCode>(httpStatusCode)));
transformResult(handle, std::move(rip->_responseHeaders),
std::move(rip->_responseBody),
dynamic_cast<HttpResponse*>(response.get()));
if (httpStatusCode < 400) {
callSuccessFn(rip->_ticketId, rip->_destination, rip->_callbacks, std::move(response));
} else {
callErrorFn(rip, httpStatusCode, std::move(response));
}
break;
{
MUTEX_LOCKER(guard, _handlesLock);
auto inProgress = _handlesInProgress.find(rip->_ticketId);
if (_handlesInProgress.end() != inProgress) {
curlHandle = (*inProgress).second->getSharedPtr();
_handlesInProgress.erase(rip->_ticketId);
}
case CURLE_COULDNT_CONNECT:
case CURLE_SSL_CONNECT_ERROR:
case CURLE_COULDNT_RESOLVE_HOST:
case CURLE_URL_MALFORMAT:
case CURLE_SEND_ERROR:
callErrorFn(rip, TRI_SIMPLE_CLIENT_COULD_NOT_CONNECT, {nullptr});
break;
case CURLE_OPERATION_TIMEDOUT:
case CURLE_RECV_ERROR:
case CURLE_GOT_NOTHING:
if (rip->_aborted || (CURLE_OPERATION_TIMEDOUT == rc && 0.0 == connectTime)) {
callErrorFn(rip, TRI_COMMUNICATOR_REQUEST_ABORTED, {nullptr});
} else {
callErrorFn(rip, TRI_ERROR_CLUSTER_TIMEOUT, {nullptr});
}
break;
case CURLE_WRITE_ERROR:
if (rip->_aborted) {
callErrorFn(rip, TRI_COMMUNICATOR_REQUEST_ABORTED, {nullptr});
} else {
LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "got a write error from curl but request was not aborted";
callErrorFn(rip, TRI_ERROR_INTERNAL, {nullptr});
}
break;
case CURLE_ABORTED_BY_CALLBACK:
TRI_ASSERT(rip->_aborted);
callErrorFn(rip, TRI_COMMUNICATOR_REQUEST_ABORTED, {nullptr});
break;
default:
LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "curl return " << rc;
callErrorFn(rip, TRI_ERROR_INTERNAL, {nullptr});
break;
}
_handlesInProgress.erase(rip->_ticketId);
if (curlHandle) {
rip->_callbacks._scheduleMe([curlHandle, this, handle, rc, rip]
{// lamda rewrite starts
double connectTime = 0.0;
LOG_TOPIC(TRACE, Logger::COMMUNICATION)
<< ::buildPrefix(rip->_ticketId) << "curl rc is : " << rc << " after "
<< Logger::FIXED(TRI_microtime() - rip->_startTime) << " s";
if (CURLE_OPERATION_TIMEDOUT == rc) {
curl_easy_getinfo(handle, CURLINFO_CONNECT_TIME, &connectTime);
LOG_TOPIC(TRACE, Logger::COMMUNICATION)
<< ::buildPrefix(rip->_ticketId) << "CURLINFO_CONNECT_TIME is " << connectTime;
} // if
if (strlen(rip->_errorBuffer) != 0) {
LOG_TOPIC(TRACE, Logger::COMMUNICATION)
<< ::buildPrefix(rip->_ticketId) << "curl error details: " << rip->_errorBuffer;
}
switch (rc) {
case CURLE_OK: {
long httpStatusCode = 200;
curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &httpStatusCode);
std::unique_ptr<GeneralResponse> response(
new HttpResponse(static_cast<ResponseCode>(httpStatusCode)));
transformResult(handle, std::move(rip->_responseHeaders),
std::move(rip->_responseBody),
dynamic_cast<HttpResponse*>(response.get()));
if (httpStatusCode < 400) {
callSuccessFn(rip->_ticketId, rip->_destination, rip->_callbacks, std::move(response));
} else {
callErrorFn(rip, httpStatusCode, std::move(response));
}
break;
}
case CURLE_COULDNT_CONNECT:
case CURLE_SSL_CONNECT_ERROR:
case CURLE_COULDNT_RESOLVE_HOST:
case CURLE_URL_MALFORMAT:
case CURLE_SEND_ERROR:
callErrorFn(rip, TRI_SIMPLE_CLIENT_COULD_NOT_CONNECT, {nullptr});
break;
case CURLE_OPERATION_TIMEDOUT:
case CURLE_RECV_ERROR:
case CURLE_GOT_NOTHING:
if (rip->_aborted || (CURLE_OPERATION_TIMEDOUT == rc && 0.0 == connectTime)) {
callErrorFn(rip, TRI_COMMUNICATOR_REQUEST_ABORTED, {nullptr});
} else {
callErrorFn(rip, TRI_ERROR_CLUSTER_TIMEOUT, {nullptr});
}
break;
case CURLE_WRITE_ERROR:
if (rip->_aborted) {
callErrorFn(rip, TRI_COMMUNICATOR_REQUEST_ABORTED, {nullptr});
} else {
LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "got a write error from curl but request was not aborted";
callErrorFn(rip, TRI_ERROR_INTERNAL, {nullptr});
}
break;
case CURLE_ABORTED_BY_CALLBACK:
TRI_ASSERT(rip->_aborted);
callErrorFn(rip, TRI_COMMUNICATOR_REQUEST_ABORTED, {nullptr});
break;
default:
LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "curl return " << rc;
callErrorFn(rip, TRI_ERROR_INTERNAL, {nullptr});
break;
}
}); //lambda rewrite ends
} else {
LOG_TOPIC(ERR, Logger::COMMUNICATION) << "In progress id not found via _handlesInProgress.find("
<< rip->_ticketId << ")";
}
}
void Communicator::transformResult(CURL* handle,

View File

@ -81,7 +81,7 @@ struct RequestInProgress {
bool _aborted;
};
struct CurlHandle {
struct CurlHandle : public std::enable_shared_from_this<CurlHandle> {
explicit CurlHandle(RequestInProgress* rip) : _handle(nullptr), _rip(rip) {
_handle = curl_easy_init();
if (_handle == nullptr) {
@ -96,6 +96,8 @@ struct CurlHandle {
}
}
std::shared_ptr<CurlHandle> getSharedPtr() {return shared_from_this();}
CurlHandle(CurlHandle& other) = delete;
CurlHandle& operator=(CurlHandle& other) = delete;
@ -227,7 +229,7 @@ class Communicator {
std::vector<NewRequest> _newRequests;
Mutex _handlesLock;
std::unordered_map<uint64_t, std::unique_ptr<CurlHandle>> _handlesInProgress;
std::unordered_map<uint64_t, std::shared_ptr<CurlHandle>> _handlesInProgress;
CURLM* _curl;
CURLMcode _mc;

View File

@ -42,7 +42,7 @@ class ClusterCommTester : public ClusterComm {
public:
ClusterCommTester()
: ClusterComm(false),
_oldSched(nullptr), _testerSched(1, 2, 3, 4, 5)
_oldSched(nullptr), _testerSched(1, 2, 3, 4)
{
// fake a scheduler object
_oldSched = SchedulerFeature::SCHEDULER;