//////////////////////////////////////////////////////////////////////////////// /// @brief Library for intra-cluster communications /// /// @file ClusterComm.cpp /// /// DISCLAIMER /// /// Copyright 2014 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Max Neunhoeffer /// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany /// @author Copyright 2013, triagens GmbH, Cologne, Germany //////////////////////////////////////////////////////////////////////////////// #include "Cluster/ClusterComm.h" #include "Basics/logging.h" #include "Basics/WriteLocker.h" #include "Basics/ConditionLocker.h" #include "Basics/StringUtils.h" #include "Dispatcher/DispatcherThread.h" #include "SimpleHttpClient/ConnectionManager.h" #include "Utils/Transaction.h" #include "VocBase/server.h" using namespace std; using namespace triagens::arango; // ----------------------------------------------------------------------------- // --SECTION-- ClusterComm connection options // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief global callback for asynchronous REST handler //////////////////////////////////////////////////////////////////////////////// void triagens::arango::ClusterCommRestCallback (std::string& coordinator, triagens::rest::HttpResponse* response) { ClusterComm::instance()->asyncAnswer(coordinator, response); } // ----------------------------------------------------------------------------- // --SECTION-- ClusterComm class // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief ClusterComm constructor //////////////////////////////////////////////////////////////////////////////// ClusterComm::ClusterComm () : _backgroundThread(nullptr), _logConnectionErrors(false) { } //////////////////////////////////////////////////////////////////////////////// /// @brief ClusterComm destructor //////////////////////////////////////////////////////////////////////////////// ClusterComm::~ClusterComm () { if (_backgroundThread != nullptr) { _backgroundThread->stop(); _backgroundThread->shutdown(); delete _backgroundThread; _backgroundThread = nullptr; } cleanupAllQueues(); } //////////////////////////////////////////////////////////////////////////////// /// @brief getter for our singleton instance //////////////////////////////////////////////////////////////////////////////// ClusterComm* ClusterComm::instance () { static ClusterComm* Instance = new ClusterComm(); return Instance; } //////////////////////////////////////////////////////////////////////////////// /// @brief initialize the cluster comm singleton object //////////////////////////////////////////////////////////////////////////////// void ClusterComm::initialize () { auto* i = instance(); i->startBackgroundThread(); } //////////////////////////////////////////////////////////////////////////////// /// @brief cleanup function to call once when shutting down //////////////////////////////////////////////////////////////////////////////// void ClusterComm::cleanup () { auto i = instance(); TRI_ASSERT(i != nullptr); delete i; } //////////////////////////////////////////////////////////////////////////////// /// @brief start the communication background thread //////////////////////////////////////////////////////////////////////////////// void ClusterComm::startBackgroundThread () { _backgroundThread = new ClusterCommThread(); if (nullptr == _backgroundThread) { LOG_FATAL_AND_EXIT("unable to start ClusterComm background thread"); } if (! _backgroundThread->init() || ! _backgroundThread->start()) { LOG_FATAL_AND_EXIT("ClusterComm background thread does not work"); } } //////////////////////////////////////////////////////////////////////////////// /// @brief produces an operation ID which is unique in this process //////////////////////////////////////////////////////////////////////////////// OperationID ClusterComm::getOperationID () { return TRI_NewTickServer(); } //////////////////////////////////////////////////////////////////////////////// /// @brief submit an HTTP request to a shard asynchronously. /// /// This function is only called when arangod is in coordinator mode. It /// queues a single HTTP request to one of the DBServers to be sent by /// ClusterComm in the background thread. This request actually orders /// an answer, which is an HTTP request sent from the target DBServer /// back to us. Therefore ClusterComm also creates an entry in a list of /// expected answers. One either has to use a callback for the answer, /// or poll for it, or drop it to prevent memory leaks. The result of /// this call is just a record that the initial HTTP request has been /// queued (`status` is CL_COMM_SUBMITTED). Use @ref enquire below to get /// information about the progress. The actual answer is then delivered /// either in the callback or via poll. The ClusterCommResult is returned /// by value. /// The library takes ownerships of the pointer `headerFields` by moving /// the unique_ptr to its own storage, this is necessary since this /// method has to add its own headers. The library retains shared /// ownership of `callback`. We use a shared_ptr for the body string /// such that it is possible to use the same body in multiple requests. /// /// Arguments: `clientTransactionID` is a string coming from the /// client and describing the transaction the client is doing, /// `coordTransactionID` is a number describing the transaction the /// coordinator is doing, `destination` is a string that either starts /// with "shard:" followed by a shardID identifying the shard this /// request is sent to, actually, this is internally translated into a /// server ID. It is also possible to specify a DB server ID directly /// here in the form of "server:" followed by a serverID. //////////////////////////////////////////////////////////////////////////////// ClusterCommResult const ClusterComm::asyncRequest ( ClientTransactionID const clientTransactionID, CoordTransactionID const coordTransactionID, std::string const& destination, triagens::rest::HttpRequest::HttpRequestType reqtype, std::string const& path, std::shared_ptr body, std::unique_ptr>& headerFields, std::shared_ptr callback, ClusterCommTimeout timeout) { auto op = std::make_unique(); op->result.clientTransactionID = clientTransactionID; op->result.coordTransactionID = coordTransactionID; do { op->result.operationID = getOperationID(); } while (op->result.operationID == 0); // just to make sure if (destination.substr(0, 6) == "shard:") { op->result.shardID = destination.substr(6); { std::shared_ptr> resp = ClusterInfo::instance()->getResponsibleServer(op->result.shardID); if (! resp->empty()) { op->result.serverID = (*resp)[0]; } else { op->result.serverID = ""; } } LOG_DEBUG("Responsible server: %s", op->result.serverID.c_str()); if (triagens::arango::Transaction::_makeNolockHeaders != nullptr) { // LOCKING-DEBUG // std::cout << "Found Nolock header\n"; auto it = triagens::arango::Transaction::_makeNolockHeaders->find(op->result.shardID); if (it != triagens::arango::Transaction::_makeNolockHeaders->end()) { // LOCKING-DEBUG // std::cout << "Found our shard\n"; (*headerFields)["X-Arango-Nolock"] = op->result.shardID; } } } else if (destination.substr(0, 7) == "server:") { op->result.shardID = ""; op->result.serverID = destination.substr(7); } else { op->result.shardID = ""; op->result.serverID = ""; } // Add the header fields for asynchronous mode: (*headerFields)["X-Arango-Async"] = "store"; (*headerFields)["X-Arango-Coordinator"] = ServerState::instance()->getId() + ":" + basics::StringUtils::itoa(op->result.operationID) + ":" + clientTransactionID + ":" + basics::StringUtils::itoa(coordTransactionID); (*headerFields)["Authorization"] = ServerState::instance()->getAuthentication(); #ifdef DEBUG_CLUSTER_COMM #ifdef TRI_ENABLE_MAINTAINER_MODE #if HAVE_BACKTRACE std::string bt; TRI_GetBacktrace(bt); std::replace( bt.begin(), bt.end(), '\n', ';'); // replace all '\n' to ';' (*headerFields)["X-Arango-BT-A-SYNC"] = bt; #endif #endif #endif op->result.status = CL_COMM_SUBMITTED; op->reqtype = reqtype; op->path = path; op->body = body; op->headerFields = std::move(headerFields); op->callback = callback; op->endTime = timeout == 0.0 ? TRI_microtime() + 24 * 60 * 60.0 : TRI_microtime() + timeout; // LOCKING-DEBUG // std::cout << "asyncRequest: sending " << triagens::rest::HttpRequest::translateMethod(reqtype) << " request to DB server '" << op->serverID << ":" << path << "\n" << *(body.get()) << "\n"; // for (auto& h : *(op->headerFields)) { // std::cout << h.first << ":" << h.second << std::endl; // } // std::cout << std::endl; ClusterCommResult const res(op->result); { CONDITION_LOCKER(locker, somethingToSend); toSend.push_back(op.get()); TRI_ASSERT(nullptr != op.get()); list::iterator i = toSend.end(); toSendByOpID[op->result.operationID] = --i; } LOG_DEBUG("In asyncRequest, put into queue %llu", (unsigned long long) op->result.operationID); op.release(); somethingToSend.signal(); return res; } //////////////////////////////////////////////////////////////////////////////// /// @brief submit a single HTTP request to a shard synchronously. /// /// This function does an HTTP request synchronously, waiting for the /// result. Note that the result has `status` field set to `CL_COMM_SENT` /// and the field `result` is set to the HTTP response. The field `answer` /// is unused in this case. In case of a timeout the field `status` is /// `CL_COMM_TIMEOUT` and the field `result` points to an HTTP response /// object that only says "timeout". Note that the ClusterComm library /// does not keep a record of this operation, in particular, you cannot /// use @ref enquire to ask about it. /// /// Arguments: `clientTransactionID` is a string coming from the client /// and describing the transaction the client is doing, `coordTransactionID` /// is a number describing the transaction the coordinator is doing, /// shardID is a string that identifies the shard this request is sent to, /// actually, this is internally translated into a server ID. It is also /// possible to specify a DB server ID directly here. //////////////////////////////////////////////////////////////////////////////// std::unique_ptr ClusterComm::syncRequest ( ClientTransactionID const& clientTransactionID, CoordTransactionID const coordTransactionID, string const& destination, triagens::rest::HttpRequest::HttpRequestType reqtype, string const& path, string const& body, map const& headerFields, ClusterCommTimeout timeout) { map headersCopy(headerFields); auto res = std::make_unique(); res->clientTransactionID = clientTransactionID; res->coordTransactionID = coordTransactionID; do { res->operationID = getOperationID(); } while (res->operationID == 0); // just to make sure res->status = CL_COMM_SENDING; double currentTime = TRI_microtime(); double endTime = timeout == 0.0 ? currentTime + 24 * 60 * 60.0 : currentTime + timeout; if (destination.substr(0, 6) == "shard:") { res->shardID = destination.substr(6); { std::shared_ptr> resp = ClusterInfo::instance()->getResponsibleServer(res->shardID); if (! resp->empty()) { res->serverID = (*resp)[0]; } else { res->serverID = ""; } } LOG_DEBUG("Responsible server: %s", res->serverID.c_str()); if (res->serverID.empty()) { res->status = CL_COMM_ERROR; return res; } if (triagens::arango::Transaction::_makeNolockHeaders != nullptr) { // LOCKING-DEBUG // std::cout << "Found Nolock header\n"; auto it = triagens::arango::Transaction::_makeNolockHeaders->find(res->shardID); if (it != triagens::arango::Transaction::_makeNolockHeaders->end()) { // LOCKING-DEBUG // std::cout << "Found this shard: " << res->shardID << std::endl; headersCopy["X-Arango-Nolock"] = res->shardID; } } } else if (destination.substr(0, 7) == "server:") { res->shardID = ""; res->serverID = destination.substr(7); } else { res->status = CL_COMM_ERROR; return res; } // We need a connection to this server: string endpoint = ClusterInfo::instance()->getServerEndpoint(res->serverID); if (endpoint.empty()) { res->status = CL_COMM_ERROR; if (logConnectionErrors()) { LOG_ERROR("cannot find endpoint of server '%s'", res->serverID.c_str()); } else { LOG_INFO("cannot find endpoint of server '%s'", res->serverID.c_str()); } } else { httpclient::ConnectionManager* cm = httpclient::ConnectionManager::instance(); httpclient::ConnectionManager::SingleServerConnection* connection = cm->leaseConnection(endpoint); if (nullptr == connection) { res->status = CL_COMM_ERROR; if (logConnectionErrors()) { LOG_ERROR("cannot create connection to server '%s'", res->serverID.c_str()); } else { LOG_INFO("cannot create connection to server '%s'", res->serverID.c_str()); } } else { LOG_DEBUG("sending %s request to DB server '%s': %s", triagens::rest::HttpRequest::translateMethod(reqtype).c_str(), res->serverID.c_str(), body.c_str()); // LOCKING-DEBUG // std::cout << "syncRequest: sending " << triagens::rest::HttpRequest::translateMethod(reqtype) << " request to DB server '" << res->serverID << ":" << path << "\n" << body << "\n"; // for (auto& h : headersCopy) { // std::cout << h.first << ":" << h.second << std::endl; // } // std::cout << std::endl; auto client = std::make_unique(connection->_connection, endTime - currentTime, false); client->keepConnectionOnDestruction(true); headersCopy["Authorization"] = ServerState::instance()->getAuthentication(); #ifdef DEBUG_CLUSTER_COMM #ifdef TRI_ENABLE_MAINTAINER_MODE #if HAVE_BACKTRACE std::string bt; TRI_GetBacktrace(bt); std::replace( bt.begin(), bt.end(), '\n', ';'); // replace all '\n' to ';' headersCopy["X-Arango-BT-SYNC"] = bt; #endif #endif #endif res->result.reset(client->request(reqtype, path, body.c_str(), body.size(), headersCopy)); if (res->result == nullptr || ! res->result->isComplete()) { res->errorMessage = client->getErrorMessage(); if (res->errorMessage == "Request timeout reached") { res->status = CL_COMM_TIMEOUT; } else { res->status = CL_COMM_ERROR; } cm->brokenConnection(connection); client->invalidateConnection(); } else { cm->returnConnection(connection); if (res->result->wasHttpError()) { res->status = CL_COMM_ERROR; res->errorMessage = client->getErrorMessage(); } } } if (res->status == CL_COMM_SENDING) { // Everything was OK res->status = CL_COMM_SENT; } } return res; } //////////////////////////////////////////////////////////////////////////////// /// @brief internal function to match an operation: //////////////////////////////////////////////////////////////////////////////// bool ClusterComm::match ( ClientTransactionID const& clientTransactionID, CoordTransactionID const coordTransactionID, ShardID const& shardID, ClusterCommOperation* op) { return ( (clientTransactionID == "" || clientTransactionID == op->result.clientTransactionID) && (0 == coordTransactionID || coordTransactionID == op->result.coordTransactionID) && (shardID == "" || shardID == op->result.shardID) ); } //////////////////////////////////////////////////////////////////////////////// /// @brief check on the status of an operation /// /// This call never blocks and returns information about a specific operation /// given by `operationID`. Note that if the `status` is >= `CL_COMM_SENT`, /// then the `result` field in the returned object is set, if the `status` /// is `CL_COMM_RECEIVED`, then `answer` is set. However, in both cases /// the ClusterComm library retains the operation in its queues! Therefore, /// you have to use @ref wait or @ref drop to dequeue. Do not delete /// `result` and `answer` before doing this! However, you have to delete /// the ClusterCommResult pointer you get, it will automatically refrain /// from deleting `result` and `answer`. //////////////////////////////////////////////////////////////////////////////// ClusterCommResult const ClusterComm::enquire (OperationID const operationID) { IndexIterator i; ClusterCommOperation* op = nullptr; // First look into the send queue: { CONDITION_LOCKER(locker, somethingToSend); i = toSendByOpID.find(operationID); if (i != toSendByOpID.end()) { op = *(i->second); return op->result; } } // Note that operations only ever move from the send queue to the // receive queue and never in the other direction. Therefore it is // OK to use two different locks here, since we look first in the // send queue and then in the receive queue; we can never miss // an operation that is actually there. // If the above did not give anything, look into the receive queue: { CONDITION_LOCKER(locker, somethingReceived); i = receivedByOpID.find(operationID); if (i != receivedByOpID.end()) { op = *(i->second); return op->result; } } ClusterCommResult res; res.operationID = operationID; res.status = CL_COMM_DROPPED; return res; } //////////////////////////////////////////////////////////////////////////////// /// @brief wait for one answer matching the criteria /// /// If clientTransactionID is empty, then any answer with any /// clientTransactionID matches. If coordTransactionID is 0, then any /// answer with any coordTransactionID matches. If shardID is empty, /// then any answer from any ShardID matches. If operationID is 0, then /// any answer with any operationID matches. This function returns /// a result structure with status CL_COMM_DROPPED if no operation /// matches. If `timeout` is given, the result can be a result structure /// with status CL_COMM_TIMEOUT indicating that no matching answer was /// available until the timeout was hit. The caller has to delete the /// result. //////////////////////////////////////////////////////////////////////////////// ClusterCommResult const ClusterComm::wait ( ClientTransactionID const& clientTransactionID, CoordTransactionID const coordTransactionID, OperationID const operationID, ShardID const& shardID, ClusterCommTimeout timeout) { IndexIterator i; QueueIterator q; double endtime; double timeleft; if (0.0 == timeout) { endtime = TRI_microtime() + 24.0 * 60.0 * 60.0; // this is the Sankt Nimmerleinstag } else { endtime = TRI_microtime() + timeout; } // tell Dispatcher that we are waiting: if (triagens::rest::DispatcherThread::currentDispatcherThread != nullptr) { triagens::rest::DispatcherThread::currentDispatcherThread->block(); } if (0 != operationID) { // In this case we only have to look into at most one operation. CONDITION_LOCKER(locker, somethingReceived); while (true) { // will be left by return or break on timeout i = receivedByOpID.find(operationID); if (i == receivedByOpID.end()) { // It could be that the operation is still in the send queue: CONDITION_LOCKER(sendLocker, somethingToSend); i = toSendByOpID.find(operationID); if (i == toSendByOpID.end()) { // Nothing known about this operation, return with failure: ClusterCommResult res; res.operationID = operationID; res.status = CL_COMM_DROPPED; // tell Dispatcher that we are back in business if (triagens::rest::DispatcherThread::currentDispatcherThread != nullptr) { triagens::rest::DispatcherThread::currentDispatcherThread->unblock(); } return res; } } else { // It is in the receive queue, now look at the status: q = i->second; if ((*q)->result.status >= CL_COMM_TIMEOUT) { std::unique_ptr op(*q); // It is done, let's remove it from the queue and return it: receivedByOpID.erase(i); received.erase(q); // tell Dispatcher that we are back in business if (triagens::rest::DispatcherThread::currentDispatcherThread != nullptr) { triagens::rest::DispatcherThread::currentDispatcherThread->unblock(); } return op->result; } // It is in the receive queue but still waiting, now wait actually } // Here it could either be in the receive or the send queue, let's wait timeleft = endtime - TRI_microtime(); if (timeleft <= 0) { break; } somethingReceived.wait(uint64_t(timeleft * 1000000.0)); } // This place is only reached on timeout } else { // here, operationID == 0, so we have to do matching, we are only // interested, if at least one operation matches, if it is ready, // we return it immediately, otherwise, we report an error or wait. CONDITION_LOCKER(locker, somethingReceived); while (true) { // will be left by return or break on timeout bool found = false; for (q = received.begin(); q != received.end(); q++) { if (match(clientTransactionID, coordTransactionID, shardID, *q)) { found = true; if ((*q)->result.status >= CL_COMM_TIMEOUT) { ClusterCommOperation* op = *q; // It is done, let's remove it from the queue and return it: i = receivedByOpID.find(op->result.operationID); // cannot fail! TRI_ASSERT(i != receivedByOpID.end()); TRI_ASSERT(i->second == q); receivedByOpID.erase(i); received.erase(q); ClusterCommResult res = op->result; delete op; // tell Dispatcher that we are back in business if (triagens::rest::DispatcherThread::currentDispatcherThread != nullptr) { triagens::rest::DispatcherThread::currentDispatcherThread->unblock(); } return res; } } } // If we found nothing, we have to look through the send queue: if (! found) { CONDITION_LOCKER(sendLocker, somethingToSend); for (q = toSend.begin(); q != toSend.end(); q++) { if (match(clientTransactionID, coordTransactionID, shardID, *q)) { found = true; break; } } } if (! found) { // Nothing known about this operation, return with failure: ClusterCommResult res; res.clientTransactionID = clientTransactionID; res.coordTransactionID = coordTransactionID; res.operationID = operationID; res.shardID = shardID; res.status = CL_COMM_DROPPED; // tell Dispatcher that we are back in business if (triagens::rest::DispatcherThread::currentDispatcherThread != nullptr) { triagens::rest::DispatcherThread::currentDispatcherThread->unblock(); } return res; } // Here it could either be in the receive or the send queue, let's wait timeleft = endtime - TRI_microtime(); if (timeleft <= 0) { break; } somethingReceived.wait(uint64_t(timeleft * 1000000.0)); } // This place is only reached on timeout } // Now we have to react on timeout: ClusterCommResult res; res.clientTransactionID = clientTransactionID; res.coordTransactionID = coordTransactionID; res.operationID = operationID; res.shardID = shardID; res.status = CL_COMM_TIMEOUT; // tell Dispatcher that we are back in business if (triagens::rest::DispatcherThread::currentDispatcherThread != nullptr) { triagens::rest::DispatcherThread::currentDispatcherThread->unblock(); } return res; } //////////////////////////////////////////////////////////////////////////////// /// @brief ignore and drop current and future answers matching /// /// If clientTransactionID is empty, then any answer with any /// clientTransactionID matches. If coordTransactionID is 0, then /// any answer with any coordTransactionID matches. If shardID is /// empty, then any answer from any ShardID matches. If operationID /// is 0, then any answer with any operationID matches. If there /// is already an answer for a matching operation, it is dropped and /// freed. If not, any future answer coming in is automatically dropped. /// This function can be used to automatically delete all information about an /// operation, for which @ref enquire reported successful completion. //////////////////////////////////////////////////////////////////////////////// void ClusterComm::drop ( ClientTransactionID const& clientTransactionID, CoordTransactionID const coordTransactionID, OperationID const operationID, ShardID const& shardID) { QueueIterator q; QueueIterator nextq; IndexIterator i; // First look through the send queue: { CONDITION_LOCKER(sendLocker, somethingToSend); for (q = toSend.begin(); q != toSend.end(); ) { ClusterCommOperation* op = *q; if ((0 != operationID && operationID == op->result.operationID) || match(clientTransactionID, coordTransactionID, shardID, op)) { if (op->result.status == CL_COMM_SENDING) { op->result.dropped = true; q++; } else { nextq = q; nextq++; i = toSendByOpID.find(op->result.operationID); // cannot fail TRI_ASSERT(i != toSendByOpID.end()); TRI_ASSERT(q == i->second); toSendByOpID.erase(i); toSend.erase(q); q = nextq; } } else { q++; } } } // Now look through the receive queue: { CONDITION_LOCKER(locker, somethingReceived); for (q = received.begin(); q != received.end(); ) { ClusterCommOperation* op = *q; if ((0 != operationID && operationID == op->result.operationID) || match(clientTransactionID, coordTransactionID, shardID, op)) { nextq = q; nextq++; i = receivedByOpID.find(op->result.operationID); // cannot fail TRI_ASSERT(i != receivedByOpID.end()); TRI_ASSERT(q == i->second); receivedByOpID.erase(i); received.erase(q); q = nextq; } else { q++; } } } } //////////////////////////////////////////////////////////////////////////////// /// @brief send an answer HTTP request to a coordinator /// /// This is only called in a DBServer node and never in a coordinator /// node. //////////////////////////////////////////////////////////////////////////////// void ClusterComm::asyncAnswer (string& coordinatorHeader, triagens::rest::HttpResponse* responseToSend) { // First take apart the header to get the coordinatorID: ServerID coordinatorID; size_t start = 0; size_t pos; LOG_DEBUG("In asyncAnswer, seeing %s", coordinatorHeader.c_str()); pos = coordinatorHeader.find(":",start); if (pos == string::npos) { LOG_ERROR("Could not find coordinator ID in X-Arango-Coordinator"); return; } coordinatorID = coordinatorHeader.substr(start,pos-start); // Now find the connection to which the request goes from the coordinatorID: httpclient::ConnectionManager* cm = httpclient::ConnectionManager::instance(); string endpoint = ClusterInfo::instance()->getServerEndpoint(coordinatorID); if (endpoint == "") { if (logConnectionErrors()) { LOG_ERROR("asyncAnswer: cannot find endpoint for server '%s'", coordinatorID.c_str()); } else { LOG_INFO("asyncAnswer: cannot find endpoint for server '%s'", coordinatorID.c_str()); } return; } httpclient::ConnectionManager::SingleServerConnection* connection = cm->leaseConnection(endpoint); if (nullptr == connection) { LOG_ERROR("asyncAnswer: cannot create connection to server '%s'", coordinatorID.c_str()); return; } map headers = responseToSend->headers(); headers["X-Arango-Coordinator"] = coordinatorHeader; headers["X-Arango-Response-Code"] = responseToSend->responseString(responseToSend->responseCode()); headers["Authorization"] = ServerState::instance()->getAuthentication(); char const* body = responseToSend->body().c_str(); size_t len = responseToSend->body().length(); LOG_DEBUG("asyncAnswer: sending PUT request to DB server '%s'", coordinatorID.c_str()); auto client = std::make_unique(connection->_connection, 3600.0, false); client->keepConnectionOnDestruction(true); // We add this result to the operation struct without acquiring // a lock, since we know that only we do such a thing: httpclient::SimpleHttpResult* result = client->request(rest::HttpRequest::HTTP_REQUEST_PUT, "/_api/shard-comm", body, len, headers); if (result == nullptr || ! result->isComplete()) { cm->brokenConnection(connection); client->invalidateConnection(); } else { cm->returnConnection(connection); } // We cannot deal with a bad result here, so forget about it in any case. delete result; } //////////////////////////////////////////////////////////////////////////////// /// @brief process an answer coming in on the HTTP socket /// /// this is called for a request, which is actually an answer to one of /// our earlier requests, return value of "" means OK and nonempty is /// an error. This is only called in a coordinator node and not in a /// DBServer node. //////////////////////////////////////////////////////////////////////////////// string ClusterComm::processAnswer (string& coordinatorHeader, triagens::rest::HttpRequest* answer) { // First take apart the header to get the operaitonID: OperationID operationID; size_t start = 0; size_t pos; LOG_DEBUG("In processAnswer, seeing %s", coordinatorHeader.c_str()); pos = coordinatorHeader.find(":", start); if (pos == string::npos) { return string("could not find coordinator ID in 'X-Arango-Coordinator'"); } //coordinatorID = coordinatorHeader.substr(start,pos-start); start = pos + 1; pos = coordinatorHeader.find(":", start); if (pos == string::npos) { return string("could not find operationID in 'X-Arango-Coordinator'"); } operationID = basics::StringUtils::uint64(coordinatorHeader.substr(start)); // Finally find the ClusterCommOperation record for this operation: { CONDITION_LOCKER(locker, somethingReceived); ClusterComm::IndexIterator i; i = receivedByOpID.find(operationID); if (i != receivedByOpID.end()) { ClusterCommOperation* op = *(i->second); op->result.answer.reset(answer); op->result.answer_code = rest::HttpResponse::responseCode( answer->header("x-arango-response-code")); op->result.status = CL_COMM_RECEIVED; // Do we have to do a callback? if (nullptr != op->callback.get()) { if ((*op->callback.get())(&op->result)) { // This is fully processed, so let's remove it from the queue: QueueIterator q = i->second; receivedByOpID.erase(i); received.erase(q); delete op; } } } else { // We have to look in the send queue as well, as it might not yet // have been moved to the received queue. Note however that it must // have been fully sent, so this is highly unlikely, but not impossible. CONDITION_LOCKER(sendLocker, somethingToSend); i = toSendByOpID.find(operationID); if (i != toSendByOpID.end()) { ClusterCommOperation* op = *(i->second); op->result.answer.reset(answer); op->result.answer_code = rest::HttpResponse::responseCode( answer->header("x-arango-response-code")); op->result.status = CL_COMM_RECEIVED; if (nullptr != op->callback) { if ((*op->callback)(&op->result)) { // This is fully processed, so let's remove it from the queue: QueueIterator q = i->second; toSendByOpID.erase(i); toSend.erase(q); delete op; } } } else { // Nothing known about the request, get rid of it: delete answer; return string("operation was already dropped by sender"); } } } // Finally tell the others: somethingReceived.broadcast(); return string(""); } //////////////////////////////////////////////////////////////////////////////// /// @brief move an operation from the send to the receive queue //////////////////////////////////////////////////////////////////////////////// bool ClusterComm::moveFromSendToReceived (OperationID operationID) { LOG_DEBUG("In moveFromSendToReceived %llu", (unsigned long long) operationID); CONDITION_LOCKER(locker, somethingReceived); CONDITION_LOCKER(sendLocker, somethingToSend); IndexIterator i = toSendByOpID.find(operationID); // cannot fail TRI_ASSERT(i != toSendByOpID.end()); QueueIterator q = i->second; ClusterCommOperation* op = *q; TRI_ASSERT(op->result.operationID == operationID); toSendByOpID.erase(i); toSend.erase(q); std::unique_ptr opPtr(op); if (op->result.dropped) { return false; } if (op->result.status == CL_COMM_SENDING) { // Note that in the meantime the status could have changed to // CL_COMM_ERROR, CL_COMM_TIMEOUT or indeed to CL_COMM_RECEIVED in // these cases, we do not want to overwrite this result op->result.status = CL_COMM_SENT; } received.push_back(op); opPtr.release(); q = received.end(); q--; receivedByOpID[operationID] = q; somethingReceived.broadcast(); return true; } //////////////////////////////////////////////////////////////////////////////// /// @brief cleanup all queues //////////////////////////////////////////////////////////////////////////////// void ClusterComm::cleanupAllQueues() { { CONDITION_LOCKER(locker, somethingToSend); for (auto& it : toSend) { delete it; } toSendByOpID.clear(); toSend.clear(); } { CONDITION_LOCKER(locker, somethingReceived); for (auto& it : received) { delete it; } receivedByOpID.clear(); received.clear(); } } // ----------------------------------------------------------------------------- // --SECTION-- ClusterCommThread // ----------------------------------------------------------------------------- // ----------------------------------------------------------------------------- // --SECTION-- constructors and destructors // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief constructs a ClusterCommThread //////////////////////////////////////////////////////////////////////////////// ClusterCommThread::ClusterCommThread () : Thread("ClusterComm"), _agency(), _condition(), _stop(0) { allowAsynchronousCancelation(); } //////////////////////////////////////////////////////////////////////////////// /// @brief destroys a ClusterCommThread //////////////////////////////////////////////////////////////////////////////// ClusterCommThread::~ClusterCommThread () { } // ----------------------------------------------------------------------------- // --SECTION-- ClusterCommThread methods // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief ClusterComm main loop //////////////////////////////////////////////////////////////////////////////// void ClusterCommThread::run () { ClusterComm::QueueIterator q; ClusterComm::IndexIterator i; ClusterCommOperation* op; ClusterComm* cc = ClusterComm::instance(); LOG_DEBUG("starting ClusterComm thread"); while (0 == _stop) { // First check the sending queue, as long as it is not empty, we send // a request via SimpleHttpClient: while (true) { // left via break when there is no job in send queue if (0 != _stop) { break; } { CONDITION_LOCKER(locker, cc->somethingToSend); if (cc->toSend.empty()) { break; } else { LOG_DEBUG("Noticed something to send"); op = cc->toSend.front(); TRI_ASSERT(op->result.status == CL_COMM_SUBMITTED); op->result.status = CL_COMM_SENDING; } } // We release the lock, if the operation is dropped now, the // `dropped` flag is set. We find out about this after we have // sent the request (happens in moveFromSendToReceived). // Have we already reached the timeout? double currentTime = TRI_microtime(); if (op->endTime <= currentTime) { op->result.status = CL_COMM_TIMEOUT; } else { if (op->result.serverID == "") { op->result.status = CL_COMM_ERROR; } else { // We need a connection to this server: string endpoint = ClusterInfo::instance()->getServerEndpoint(op->result.serverID); if (endpoint == "") { op->result.status = CL_COMM_ERROR; if (cc->logConnectionErrors()) { LOG_ERROR("cannot find endpoint for server '%s'", op->result.serverID.c_str()); } else { LOG_INFO("cannot find endpoint for server '%s'", op->result.serverID.c_str()); } } else { httpclient::ConnectionManager* cm = httpclient::ConnectionManager::instance(); httpclient::ConnectionManager::SingleServerConnection* connection = cm->leaseConnection(endpoint); if (nullptr == connection) { op->result.status = CL_COMM_ERROR; if (cc->logConnectionErrors()) { LOG_ERROR("cannot create connection to server '%s'", op->result.serverID.c_str()); } else { LOG_INFO("cannot create connection to server '%s'", op->result.serverID.c_str()); } } else { if (nullptr != op->body.get()) { LOG_DEBUG("sending %s request to DB server '%s': %s", triagens::rest::HttpRequest::translateMethod(op->reqtype) .c_str(), op->result.serverID.c_str(), op->body->c_str()); } else { LOG_DEBUG("sending %s request to DB server '%s'", triagens::rest::HttpRequest::translateMethod(op->reqtype) .c_str(), op->result.serverID.c_str()); } auto client = std::make_unique(connection->_connection, op->endTime-currentTime, false); client->keepConnectionOnDestruction(true); // We add this result to the operation struct without acquiring // a lock, since we know that only we do such a thing: if (nullptr != op->body.get()) { op->result.result.reset(client->request(op->reqtype, op->path, op->body->c_str(), op->body->size(), *(op->headerFields))); } else { op->result.result.reset(client->request(op->reqtype, op->path, nullptr, 0, *(op->headerFields))); } if (op->result.result.get() == nullptr || ! op->result.result->isComplete()) { if (client->getErrorMessage() == "Request timeout reached") { op->result.status = CL_COMM_TIMEOUT; } else { op->result.status = CL_COMM_ERROR; } cm->brokenConnection(connection); client->invalidateConnection(); } else { cm->returnConnection(connection); if (op->result.result->wasHttpError()) { op->result.status = CL_COMM_ERROR; } } } } } } cc->moveFromSendToReceived(op->result.operationID); // Potentially it was dropped in the meantime, then we forget about it. } // Now the send queue is empty (at least was empty, when we looked // just now, so we can check on our receive queue to detect timeouts: { double currentTime = TRI_microtime(); CONDITION_LOCKER(locker, cc->somethingReceived); ClusterComm::QueueIterator q; for (q = cc->received.begin(); q != cc->received.end(); ++q) { op = *q; if (op->result.status == CL_COMM_SENT) { if (op->endTime < currentTime) { op->result.status = CL_COMM_TIMEOUT; } } } } // Finally, wait for some time or until something happens using // the condition variable: { CONDITION_LOCKER(locker, cc->somethingToSend); locker.wait(100000); } } // another thread is waiting for this value to shut down properly _stop = 2; LOG_DEBUG("stopped ClusterComm thread"); } // ----------------------------------------------------------------------------- // --SECTION-- public methods // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief initializes the cluster comm background thread //////////////////////////////////////////////////////////////////////////////// bool ClusterCommThread::init () { return true; } // ----------------------------------------------------------------------------- // --SECTION-- END-OF-FILE // ----------------------------------------------------------------------------- // Local Variables: // mode: outline-minor // outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}" // End: