//////////////////////////////////////////////////////////////////////////////// /// @brief methods to do things in a cluster /// /// @file ClusterMethods.cpp /// /// DISCLAIMER /// /// Copyright 2014 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Max Neunhoeffer /// @author Copyright 2014, triagens GmbH, Cologne, Germany //////////////////////////////////////////////////////////////////////////////// #include "ClusterMethods.h" #include "Cluster/ClusterInfo.h" #include "Cluster/ClusterComm.h" #include "Basics/conversions.h" #include "Basics/json.h" #include "Basics/tri-strings.h" #include "Basics/vector.h" #include "Basics/json-utilities.h" #include "Basics/StringUtils.h" #include "Indexes/Index.h" #include "VocBase/server.h" using namespace std; using namespace triagens::basics; using namespace triagens::rest; using namespace triagens::arango; namespace triagens { namespace arango { // ----------------------------------------------------------------------------- // --SECTION-- public functions // ----------------------------------------------------------------------------- //////////////////////////////////////////////////////////////////////////////// /// @brief extracts a numeric value from an hierarchical JSON //////////////////////////////////////////////////////////////////////////////// template static T ExtractFigure (TRI_json_t const* json, char const* group, char const* name) { TRI_json_t const* g = TRI_LookupObjectJson(json, group); if (! TRI_IsObjectJson(g)) { return static_cast(0); } TRI_json_t const* value = TRI_LookupObjectJson(g, name); if (! TRI_IsNumberJson(value)) { return static_cast(0); } return static_cast(value->_value._number); } //////////////////////////////////////////////////////////////////////////////// /// @brief merge headers of a DB server response into the current response //////////////////////////////////////////////////////////////////////////////// void mergeResponseHeaders (HttpResponse* response, map const& headers) { map::const_iterator it = headers.begin(); while (it != headers.end()) { // skip first header line (which is the HTTP response code) const string& key = (*it).first; // the following headers are ignored if (key != "http/1.1" && key != "connection" && key != "content-length" && key != "server") { response->setHeader(key, (*it).second); } ++it; } } //////////////////////////////////////////////////////////////////////////////// /// @brief creates a copy of all HTTP headers to forward //////////////////////////////////////////////////////////////////////////////// std::map getForwardableRequestHeaders (triagens::rest::HttpRequest* request) { map const& headers = request->headers(); map::const_iterator it = headers.begin(); map result; while (it != headers.end()) { const string& key = (*it).first; // ignore the following headers if (key != "x-arango-async" && key != "authorization" && key != "content-length" && key != "connection" && key != "expect" && key != "host" && key != "origin" && key.substr(0, 14) != "access-control") { result.emplace(make_pair(key, (*it).second)); } ++it; } return result; } //////////////////////////////////////////////////////////////////////////////// /// @brief check if a list of attributes have the same values in two JSON /// documents //////////////////////////////////////////////////////////////////////////////// bool shardKeysChanged (std::string const& dbname, std::string const& collname, TRI_json_t const* oldJson, TRI_json_t const* newJson, bool isPatch) { if (! TRI_IsObjectJson(oldJson) || ! TRI_IsObjectJson(newJson)) { // expecting two objects. everything else is an error return true; } TRI_json_t nullJson; TRI_InitNullJson(&nullJson); ClusterInfo* ci = ClusterInfo::instance(); shared_ptr const& c = ci->getCollection(dbname, collname); const std::vector& shardKeys = c->shardKeys(); for (size_t i = 0; i < shardKeys.size(); ++i) { if (shardKeys[i] == TRI_VOC_ATTRIBUTE_KEY) { continue; } TRI_json_t const* n = TRI_LookupObjectJson(newJson, shardKeys[i].c_str()); if (n == nullptr && isPatch) { // attribute not set in patch document. this means no update continue; } TRI_json_t const* o = TRI_LookupObjectJson(oldJson, shardKeys[i].c_str()); if (o == nullptr) { // if attribute is undefined, use "null" instead o = &nullJson; } if (n == nullptr) { // if attribute is undefined, use "null" instead n = &nullJson; } if (! TRI_CheckSameValueJson(o, n)) { return true; } } return false; } //////////////////////////////////////////////////////////////////////////////// /// @brief returns users //////////////////////////////////////////////////////////////////////////////// int usersOnCoordinator (std::string const& dbname, TRI_json_t*& result, double timeout) { // Set a few variables needed for our work: ClusterInfo* ci = ClusterInfo::instance(); ClusterComm* cc = ClusterComm::instance(); // First determine the collection ID from the name: shared_ptr collinfo = ci->getCollection(dbname, TRI_COL_NAME_USERS); if (collinfo->empty()) { return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND; } result = TRI_CreateArrayJson(TRI_UNKNOWN_MEM_ZONE); if (result == nullptr) { return TRI_ERROR_OUT_OF_MEMORY; } // If we get here, the sharding attributes are not only _key, therefore // we have to contact everybody: ClusterCommResult* res; map shards = collinfo->shardIds(); map::iterator it; CoordTransactionID coordTransactionID = TRI_NewTickServer(); for (it = shards.begin(); it != shards.end(); ++it) { map* headers = new map; // set collection name (shard id) string* body = new string; body->append("{\"collection\":\""); body->append((*it).first); body->append("\"}"); res = cc->asyncRequest("", coordTransactionID, "shard:" + it->first, triagens::rest::HttpRequest::HTTP_REQUEST_PUT, "/_db/" + StringUtils::urlEncode(dbname) + "/_api/simple/all", body, true, headers, nullptr, 10.0); delete res; } // Now listen to the results: int count; int nrok = 0; for (count = (int) shards.size(); count > 0; count--) { res = cc->wait("", coordTransactionID, 0, "", timeout); if (res->status == CL_COMM_RECEIVED) { if (res->answer_code == triagens::rest::HttpResponse::OK || res->answer_code == triagens::rest::HttpResponse::CREATED) { TRI_json_t* json = TRI_JsonString(TRI_UNKNOWN_MEM_ZONE, res->answer->body()); if (JsonHelper::isObject(json)) { TRI_json_t const* r = TRI_LookupObjectJson(json, "result"); if (TRI_IsArrayJson(r)) { size_t const n = TRI_LengthArrayJson(r); for (size_t i = 0; i < n; ++i) { TRI_json_t const* p = TRI_LookupArrayJson(r, i); if (TRI_IsObjectJson(p)) { TRI_PushBack3ArrayJson(TRI_UNKNOWN_MEM_ZONE, result, TRI_CopyJson(TRI_UNKNOWN_MEM_ZONE, p)); } } } nrok++; } if (json != nullptr) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); } } } delete res; } if (nrok != (int) shards.size()) { return TRI_ERROR_INTERNAL; } return TRI_ERROR_NO_ERROR; // the cluster operation was OK, however, // the DBserver could have reported an error. } //////////////////////////////////////////////////////////////////////////////// /// @brief returns revision for a sharded collection //////////////////////////////////////////////////////////////////////////////// int revisionOnCoordinator (std::string const& dbname, std::string const& collname, TRI_voc_rid_t& rid) { // Set a few variables needed for our work: ClusterInfo* ci = ClusterInfo::instance(); ClusterComm* cc = ClusterComm::instance(); // First determine the collection ID from the name: shared_ptr collinfo = ci->getCollection(dbname, collname); if (collinfo->empty()) { return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND; } rid = 0; // If we get here, the sharding attributes are not only _key, therefore // we have to contact everybody: ClusterCommResult* res; map shards = collinfo->shardIds(); map::iterator it; CoordTransactionID coordTransactionID = TRI_NewTickServer(); for (it = shards.begin(); it != shards.end(); ++it) { map* headers = new map; res = cc->asyncRequest("", coordTransactionID, "shard:" + it->first, triagens::rest::HttpRequest::HTTP_REQUEST_GET, "/_db/" + StringUtils::urlEncode(dbname) + "/_api/collection/" + StringUtils::urlEncode(it->first) + "/revision", 0, false, headers, nullptr, 300.0); delete res; } // Now listen to the results: int count; int nrok = 0; for (count = (int) shards.size(); count > 0; count--) { res = cc->wait( "", coordTransactionID, 0, "", 0.0); if (res->status == CL_COMM_RECEIVED) { if (res->answer_code == triagens::rest::HttpResponse::OK) { TRI_json_t* json = TRI_JsonString(TRI_UNKNOWN_MEM_ZONE, res->answer->body()); if (JsonHelper::isObject(json)) { TRI_json_t const* r = TRI_LookupObjectJson(json, "revision"); if (TRI_IsStringJson(r)) { TRI_voc_rid_t cmp = StringUtils::uint64(r->_value._string.data); if (cmp > rid) { // get the maximum value rid = cmp; } } nrok++; } if (json != 0) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); } } } delete res; } if (nrok != (int) shards.size()) { return TRI_ERROR_INTERNAL; } return TRI_ERROR_NO_ERROR; // the cluster operation was OK, however, // the DBserver could have reported an error. } //////////////////////////////////////////////////////////////////////////////// /// @brief returns figures for a sharded collection //////////////////////////////////////////////////////////////////////////////// int figuresOnCoordinator (string const& dbname, string const& collname, TRI_doc_collection_info_t*& result) { // Set a few variables needed for our work: ClusterInfo* ci = ClusterInfo::instance(); ClusterComm* cc = ClusterComm::instance(); // First determine the collection ID from the name: shared_ptr collinfo = ci->getCollection(dbname, collname); if (collinfo->empty()) { return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND; } // prefill with 0s result = (TRI_doc_collection_info_t*) TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_doc_collection_info_t), true); if (result == nullptr) { return TRI_ERROR_OUT_OF_MEMORY; } // If we get here, the sharding attributes are not only _key, therefore // we have to contact everybody: ClusterCommResult* res; map shards = collinfo->shardIds(); map::iterator it; CoordTransactionID coordTransactionID = TRI_NewTickServer(); for (it = shards.begin(); it != shards.end(); ++it) { map* headers = new map; res = cc->asyncRequest("", coordTransactionID, "shard:" + it->first, triagens::rest::HttpRequest::HTTP_REQUEST_GET, "/_db/" + StringUtils::urlEncode(dbname) + "/_api/collection/" + StringUtils::urlEncode(it->first) + "/figures", 0, false, headers, nullptr, 300.0); delete res; } // Now listen to the results: int count; int nrok = 0; for (count = (int) shards.size(); count > 0; count--) { res = cc->wait( "", coordTransactionID, 0, "", 0.0); if (res->status == CL_COMM_RECEIVED) { if (res->answer_code == triagens::rest::HttpResponse::OK) { TRI_json_t* json = TRI_JsonString(TRI_UNKNOWN_MEM_ZONE, res->answer->body()); if (JsonHelper::isObject(json)) { TRI_json_t const* figures = TRI_LookupObjectJson(json, "figures"); if (TRI_IsObjectJson(figures)) { // add to the total result->_numberAlive += ExtractFigure(figures, "alive", "count"); result->_numberDead += ExtractFigure(figures, "dead", "count"); result->_numberDeletion += ExtractFigure(figures, "dead", "deletion"); result->_numberShapes += ExtractFigure(figures, "shapes", "count"); result->_numberAttributes += ExtractFigure(figures, "attributes", "count"); result->_numberIndexes += ExtractFigure(figures, "indexes", "count"); result->_sizeAlive += ExtractFigure(figures, "alive", "size"); result->_sizeDead += ExtractFigure(figures, "dead", "size"); result->_sizeShapes += ExtractFigure(figures, "shapes", "size"); result->_sizeAttributes += ExtractFigure(figures, "attributes", "size"); result->_sizeIndexes += ExtractFigure(figures, "indexes", "size"); result->_numberDatafiles += ExtractFigure(figures, "datafiles", "count"); result->_numberJournalfiles += ExtractFigure(figures, "journals", "count"); result->_numberCompactorfiles += ExtractFigure(figures, "compactors", "count"); result->_numberShapefiles += ExtractFigure(figures, "shapefiles", "count"); result->_datafileSize += ExtractFigure(figures, "datafiles", "fileSize"); result->_journalfileSize += ExtractFigure(figures, "journals", "fileSize"); result->_compactorfileSize += ExtractFigure(figures, "compactors", "fileSize"); result->_shapefileSize += ExtractFigure(figures, "shapefiles", "fileSize"); } nrok++; } if (json != 0) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); } } } delete res; } if (nrok != (int) shards.size()) { TRI_Free(TRI_UNKNOWN_MEM_ZONE, result); result = 0; return TRI_ERROR_INTERNAL; } return TRI_ERROR_NO_ERROR; // the cluster operation was OK, however, // the DBserver could have reported an error. } //////////////////////////////////////////////////////////////////////////////// /// @brief counts number of documents in a coordinator //////////////////////////////////////////////////////////////////////////////// int countOnCoordinator ( string const& dbname, string const& collname, uint64_t& result) { // Set a few variables needed for our work: ClusterInfo* ci = ClusterInfo::instance(); ClusterComm* cc = ClusterComm::instance(); result = 0; // First determine the collection ID from the name: shared_ptr collinfo = ci->getCollection(dbname, collname); if (collinfo->empty()) { return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND; } ClusterCommResult* res; map shards = collinfo->shardIds(); map::iterator it; CoordTransactionID coordTransactionID = TRI_NewTickServer(); for (it = shards.begin(); it != shards.end(); ++it) { map* headers = new map; res = cc->asyncRequest("", coordTransactionID, "shard:" + it->first, triagens::rest::HttpRequest::HTTP_REQUEST_GET, "/_db/" + StringUtils::urlEncode(dbname) + "/_api/collection/" + StringUtils::urlEncode(it->first) + "/count", 0, false, headers, nullptr, 300.0); delete res; } // Now listen to the results: int count; int nrok = 0; for (count = (int) shards.size(); count > 0; count--) { res = cc->wait("", coordTransactionID, 0, "", 0.0); if (res->status == CL_COMM_RECEIVED) { if (res->answer_code == triagens::rest::HttpResponse::OK) { TRI_json_t* json = TRI_JsonString(TRI_UNKNOWN_MEM_ZONE, res->answer->body()); if (JsonHelper::isObject(json)) { // add to the total result += JsonHelper::getNumericValue(json, "count", 0); nrok++; } if (json != 0) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); } } } delete res; } if (nrok != (int) shards.size()) { return TRI_ERROR_INTERNAL; } return TRI_ERROR_NO_ERROR; // the cluster operation was OK, however, // the DBserver could have reported an error. } //////////////////////////////////////////////////////////////////////////////// /// @brief creates a document in a coordinator //////////////////////////////////////////////////////////////////////////////// int createDocumentOnCoordinator ( string const& dbname, string const& collname, bool waitForSync, TRI_json_t* json, map const& headers, triagens::rest::HttpResponse::HttpResponseCode& responseCode, map& resultHeaders, string& resultBody) { // Set a few variables needed for our work: ClusterInfo* ci = ClusterInfo::instance(); ClusterComm* cc = ClusterComm::instance(); // First determine the collection ID from the name: shared_ptr collinfo = ci->getCollection(dbname, collname); if (collinfo->empty()) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND; } string const collid = StringUtils::itoa(collinfo->id()); // Sort out the _key attribute: // The user is allowed to specify _key, provided that _key is the one // and only sharding attribute, because in this case we can delegate // the responsibility to make _key attributes unique to the responsible // shard. Otherwise, we ensure uniqueness here and now by taking a // cluster-wide unique number. Note that we only know the sharding // attributes a bit further down the line when we have determined // the responsible shard. TRI_json_t* subjson = TRI_LookupObjectJson(json, TRI_VOC_ATTRIBUTE_KEY); bool userSpecifiedKey = false; string _key; if (subjson == nullptr) { // The user did not specify a key, let's create one: uint64_t uid = ci->uniqid(); _key = triagens::basics::StringUtils::itoa(uid); TRI_Insert3ObjectJson(TRI_UNKNOWN_MEM_ZONE, json, TRI_VOC_ATTRIBUTE_KEY, TRI_CreateStringReferenceJson(TRI_UNKNOWN_MEM_ZONE, _key.c_str(), _key.size())); } else { userSpecifiedKey = true; } // Now find the responsible shard: bool usesDefaultShardingAttributes; ShardID shardID; int error = ci->getResponsibleShard( collid, json, true, shardID, usesDefaultShardingAttributes ); if (error == TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); return TRI_ERROR_CLUSTER_SHARD_GONE; } // Now perform the above mentioned check: if (userSpecifiedKey && ! usesDefaultShardingAttributes) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); return TRI_ERROR_CLUSTER_MUST_NOT_SPECIFY_KEY; } if (userSpecifiedKey && ! collinfo->allowUserKeys()) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); return TRI_ERROR_CLUSTER_MUST_NOT_SPECIFY_KEY; } string const body = JsonHelper::toString(json); TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); // Send a synchronous request to that shard using ClusterComm: ClusterCommResult* res; res = cc->syncRequest("", TRI_NewTickServer(), "shard:" + shardID, triagens::rest::HttpRequest::HTTP_REQUEST_POST, "/_db/" + StringUtils::urlEncode(dbname) + "/_api/document?collection="+ StringUtils::urlEncode(shardID) + "&waitForSync=" + (waitForSync ? "true" : "false"), body, headers, 60.0); if (res->status == CL_COMM_TIMEOUT) { // No reply, we give up: delete res; return TRI_ERROR_CLUSTER_TIMEOUT; } if (res->status == CL_COMM_ERROR) { // This could be a broken connection or an Http error: if (res->result == nullptr || ! res->result->isComplete()) { // there is not result delete res; return TRI_ERROR_CLUSTER_CONNECTION_LOST; } // In this case a proper HTTP error was reported by the DBserver, // this can be 400 or 404, we simply forward the result. // We intentionally fall through here. } responseCode = static_cast (res->result->getHttpReturnCode()); resultHeaders = res->result->getHeaderFields(); resultBody.assign(res->result->getBody().c_str(), res->result->getBody().length()); delete res; return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief deletes a document in a coordinator //////////////////////////////////////////////////////////////////////////////// int deleteDocumentOnCoordinator ( string const& dbname, string const& collname, string const& key, TRI_voc_rid_t const rev, TRI_doc_update_policy_e policy, bool waitForSync, map const& headers, triagens::rest::HttpResponse::HttpResponseCode& responseCode, map& resultHeaders, string& resultBody) { // Set a few variables needed for our work: ClusterInfo* ci = ClusterInfo::instance(); ClusterComm* cc = ClusterComm::instance(); // First determine the collection ID from the name: shared_ptr collinfo = ci->getCollection(dbname, collname); if (collinfo->empty()) { return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND; } string collid = StringUtils::itoa(collinfo->id()); // If _key is the one and only sharding attribute, we can do this quickly, // because we can easily determine which shard is responsible for the // document. Otherwise we have to contact all shards and ask them to // delete the document. All but one will not know it. // Now find the responsible shard: TRI_json_t* json = TRI_CreateObjectJson(TRI_UNKNOWN_MEM_ZONE); if (json == nullptr) { return TRI_ERROR_OUT_OF_MEMORY; } TRI_Insert3ObjectJson(TRI_UNKNOWN_MEM_ZONE, json, TRI_VOC_ATTRIBUTE_KEY, TRI_CreateStringReferenceJson(TRI_UNKNOWN_MEM_ZONE, key.c_str(), key.size())); bool usesDefaultShardingAttributes; ShardID shardID; int error = ci->getResponsibleShard( collid, json, true, shardID, usesDefaultShardingAttributes ); TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); // Some stuff to prepare cluster-intern requests: ClusterCommResult* res; string revstr; if (rev != 0) { revstr = "&rev="+StringUtils::itoa(rev); } string policystr; if (policy == TRI_DOC_UPDATE_LAST_WRITE) { policystr = "&policy=last"; } if (usesDefaultShardingAttributes) { // OK, this is the fast method, we only have to ask one shard: if (error == TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND) { return TRI_ERROR_CLUSTER_SHARD_GONE; } // Send a synchronous request to that shard using ClusterComm: res = cc->syncRequest("", TRI_NewTickServer(), "shard:"+shardID, triagens::rest::HttpRequest::HTTP_REQUEST_DELETE, "/_db/"+dbname+"/_api/document/"+ StringUtils::urlEncode(shardID)+"/"+StringUtils::urlEncode(key)+ "?waitForSync="+(waitForSync ? "true" : "false")+ revstr+policystr, "", headers, 60.0); if (res->status == CL_COMM_TIMEOUT) { // No reply, we give up: delete res; return TRI_ERROR_CLUSTER_TIMEOUT; } if (res->status == CL_COMM_ERROR) { // This could be a broken connection or an Http error: if (res->result == nullptr || ! res->result->isComplete()) { delete res; return TRI_ERROR_CLUSTER_CONNECTION_LOST; } // In this case a proper HTTP error was reported by the DBserver, // this can be 400 or 404, we simply forward the result. // We intentionally fall through here. } responseCode = static_cast (res->result->getHttpReturnCode()); resultHeaders = res->result->getHeaderFields(); resultBody.assign(res->result->getBody().c_str(), res->result->getBody().length()); delete res; return TRI_ERROR_NO_ERROR; } // If we get here, the sharding attributes are not only _key, therefore // we have to contact everybody: map shards = collinfo->shardIds(); map::iterator it; CoordTransactionID coordTransactionID = TRI_NewTickServer(); for (it = shards.begin(); it != shards.end(); ++it) { map* headersCopy = new map(headers); res = cc->asyncRequest("", coordTransactionID, "shard:" + it->first, triagens::rest::HttpRequest::HTTP_REQUEST_DELETE, "/_db/" + StringUtils::urlEncode(dbname) + "/_api/document/" + StringUtils::urlEncode(it->first) + "/" + StringUtils::urlEncode(key) + "?waitForSync=" + (waitForSync ? "true" : "false") + revstr + policystr, 0, false, headersCopy, nullptr, 60.0); delete res; } // Now listen to the results: int count; int nrok = 0; for (count = (int) shards.size(); count > 0; count--) { res = cc->wait("", coordTransactionID, 0, "", 0.0); if (res->status == CL_COMM_RECEIVED) { if (res->answer_code != triagens::rest::HttpResponse::NOT_FOUND || (nrok == 0 && count == 1)) { nrok++; responseCode = res->answer_code; resultHeaders = res->answer->headers(); resultBody = string(res->answer->body(), res->answer->bodySize()); } } delete res; } // Note that nrok is always at least 1! if (nrok > 1) { return TRI_ERROR_CLUSTER_GOT_CONTRADICTING_ANSWERS; } return TRI_ERROR_NO_ERROR; // the cluster operation was OK, however, // the DBserver could have reported an error. } //////////////////////////////////////////////////////////////////////////////// /// @brief truncate a cluster collection on a coordinator //////////////////////////////////////////////////////////////////////////////// int truncateCollectionOnCoordinator ( string const& dbname, string const& collname ) { // Set a few variables needed for our work: ClusterInfo* ci = ClusterInfo::instance(); ClusterComm* cc = ClusterComm::instance(); // First determine the collection ID from the name: shared_ptr collinfo = ci->getCollection(dbname, collname); if (collinfo->empty()) { return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND; } // Some stuff to prepare cluster-intern requests: ClusterCommResult* res; // We have to contact everybody: map shards = collinfo->shardIds(); map::iterator it; CoordTransactionID coordTransactionID = TRI_NewTickServer(); for (it = shards.begin(); it != shards.end(); ++it) { map* headersCopy = new map(); res = cc->asyncRequest("", coordTransactionID, "shard:" + it->first, triagens::rest::HttpRequest::HTTP_REQUEST_PUT, "/_db/" + StringUtils::urlEncode(dbname) + "/_api/collection/" + it->first + "/truncate", 0, false, headersCopy, nullptr, 60.0); delete res; } // Now listen to the results: unsigned int count; unsigned int nrok = 0; for (count = (unsigned int) shards.size(); count > 0; count--) { res = cc->wait( "", coordTransactionID, 0, "", 0.0); if (res->status == CL_COMM_RECEIVED) { if (res->answer_code == triagens::rest::HttpResponse::OK) { nrok++; } } delete res; } // Note that nrok is always at least 1! if (nrok < shards.size()) { return TRI_ERROR_CLUSTER_COULD_NOT_TRUNCATE_COLLECTION; } return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief get a document in a coordinator //////////////////////////////////////////////////////////////////////////////// int getDocumentOnCoordinator ( string const& dbname, string const& collname, string const& key, TRI_voc_rid_t const rev, map const& headers, bool generateDocument, triagens::rest::HttpResponse::HttpResponseCode& responseCode, map& resultHeaders, string& resultBody) { // Set a few variables needed for our work: ClusterInfo* ci = ClusterInfo::instance(); ClusterComm* cc = ClusterComm::instance(); // First determine the collection ID from the name: shared_ptr collinfo = ci->getCollection(dbname, collname); if (collinfo->empty()) { return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND; } string collid = StringUtils::itoa(collinfo->id()); // If _key is the one and only sharding attribute, we can do this quickly, // because we can easily determine which shard is responsible for the // document. Otherwise we have to contact all shards and ask them to // delete the document. All but one will not know it. // Now find the responsible shard: TRI_json_t* json = TRI_CreateObjectJson(TRI_UNKNOWN_MEM_ZONE); if (json == nullptr) { return TRI_ERROR_OUT_OF_MEMORY; } TRI_Insert3ObjectJson(TRI_UNKNOWN_MEM_ZONE, json, "_key", TRI_CreateStringReferenceJson(TRI_UNKNOWN_MEM_ZONE, key.c_str(), key.size())); bool usesDefaultShardingAttributes; ShardID shardID; int error = ci->getResponsibleShard(collid, json, true, shardID, usesDefaultShardingAttributes ); TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); // Some stuff to prepare cluster-intern requests: ClusterCommResult* res; string revstr; if (rev != 0) { revstr = "?rev=" + StringUtils::itoa(rev); } triagens::rest::HttpRequest::HttpRequestType reqType; if (generateDocument) { reqType = triagens::rest::HttpRequest::HTTP_REQUEST_GET; } else { reqType = triagens::rest::HttpRequest::HTTP_REQUEST_HEAD; } if (usesDefaultShardingAttributes) { // OK, this is the fast method, we only have to ask one shard: if (error == TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND) { return TRI_ERROR_CLUSTER_SHARD_GONE; } // Send a synchronous request to that shard using ClusterComm: res = cc->syncRequest("", TRI_NewTickServer(), "shard:"+shardID, reqType, "/_db/"+dbname+"/_api/document/"+ StringUtils::urlEncode(shardID)+"/"+StringUtils::urlEncode(key)+ revstr, "", headers, 60.0); if (res->status == CL_COMM_TIMEOUT) { // No reply, we give up: delete res; return TRI_ERROR_CLUSTER_TIMEOUT; } if (res->status == CL_COMM_ERROR) { // This could be a broken connection or an Http error: if (! res->result || ! res->result->isComplete()) { delete res; return TRI_ERROR_CLUSTER_CONNECTION_LOST; } // In this case a proper HTTP error was reported by the DBserver, // this can be 400 or 404, we simply forward the result. // We intentionally fall through here. } responseCode = static_cast (res->result->getHttpReturnCode()); resultHeaders = res->result->getHeaderFields(); resultBody.assign(res->result->getBody().c_str(), res->result->getBody().length()); delete res; return TRI_ERROR_NO_ERROR; } // If we get here, the sharding attributes are not only _key, therefore // we have to contact everybody: map shards = collinfo->shardIds(); map::iterator it; CoordTransactionID coordTransactionID = TRI_NewTickServer(); for (it = shards.begin(); it != shards.end(); ++it) { map* headersCopy = new map(headers); res = cc->asyncRequest("", coordTransactionID, "shard:" + it->first, reqType, "/_db/" + StringUtils::urlEncode(dbname) + "/_api/document/"+ StringUtils::urlEncode(it->first) + "/" + StringUtils::urlEncode(key) + revstr, 0, false, headersCopy, nullptr, 60.0); delete res; } // Now listen to the results: int count; int nrok = 0; for (count = (int) shards.size(); count > 0; count--) { res = cc->wait("", coordTransactionID, 0, "", 0.0); if (res->status == CL_COMM_RECEIVED) { if (res->answer_code != triagens::rest::HttpResponse::NOT_FOUND || (nrok == 0 && count == 1)) { nrok++; responseCode = res->answer_code; resultHeaders = res->answer->headers(); resultBody = string(res->answer->body(), res->answer->bodySize()); } } delete res; } // Note that nrok is always at least 1! if (nrok > 1) { return TRI_ERROR_CLUSTER_GOT_CONTRADICTING_ANSWERS; } return TRI_ERROR_NO_ERROR; // the cluster operation was OK, however, // the DBserver could have reported an error. } //////////////////////////////////////////////////////////////////////////////// /// @brief get all documents in a coordinator //////////////////////////////////////////////////////////////////////////////// int getAllDocumentsOnCoordinator ( string const& dbname, string const& collname, string const& returnType, triagens::rest::HttpResponse::HttpResponseCode& responseCode, string& contentType, string& resultBody ) { // Set a few variables needed for our work: ClusterInfo* ci = ClusterInfo::instance(); ClusterComm* cc = ClusterComm::instance(); // First determine the collection ID from the name: shared_ptr collinfo = ci->getCollection(dbname, collname); if (collinfo->empty()) { return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND; } ClusterCommResult* res; map shards = collinfo->shardIds(); map::iterator it; CoordTransactionID coordTransactionID = TRI_NewTickServer(); for (it = shards.begin(); it != shards.end(); ++it) { map* headers = new map; res = cc->asyncRequest("", coordTransactionID, "shard:" + it->first, triagens::rest::HttpRequest::HTTP_REQUEST_GET, "/_db/" + StringUtils::urlEncode(dbname) + "/_api/document?collection=" + it->first + "&type=" + StringUtils::urlEncode(returnType), 0, false, headers, nullptr, 3600.0); delete res; } // Now listen to the results: int count; responseCode = triagens::rest::HttpResponse::OK; contentType = "application/json; charset=utf-8"; triagens::basics::Json result(triagens::basics::Json::Object); triagens::basics::Json documents(triagens::basics::Json::Array); for (count = (int) shards.size(); count > 0; count--) { res = cc->wait( "", coordTransactionID, 0, "", 0.0); if (res->status == CL_COMM_TIMEOUT) { delete res; cc->drop( "", coordTransactionID, 0, ""); return TRI_ERROR_CLUSTER_TIMEOUT; } if (res->status == CL_COMM_ERROR || res->status == CL_COMM_DROPPED || res->answer_code == triagens::rest::HttpResponse::NOT_FOUND) { delete res; cc->drop( "", coordTransactionID, 0, ""); return TRI_ERROR_INTERNAL; } std::unique_ptr shardResult(TRI_JsonString(TRI_UNKNOWN_MEM_ZONE, res->answer->body())); if (shardResult == nullptr || ! TRI_IsObjectJson(shardResult.get())) { delete res; return TRI_ERROR_INTERNAL; } auto docs = TRI_LookupObjectJson(shardResult.get(), "documents"); if (! TRI_IsArrayJson(docs)) { delete res; return TRI_ERROR_INTERNAL; } size_t const n = TRI_LengthArrayJson(docs); documents.reserve(n); for (size_t j = 0; j < n; ++j) { auto doc = static_cast(TRI_AtVector(&docs->_value._objects, j)); // this will transfer the ownership for the JSON into "documents" documents.transfer(doc); } delete res; } result("documents", documents); resultBody = triagens::basics::JsonHelper::toString(result.json()); return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief modify a document in a coordinator //////////////////////////////////////////////////////////////////////////////// int modifyDocumentOnCoordinator ( string const& dbname, string const& collname, string const& key, TRI_voc_rid_t const rev, TRI_doc_update_policy_e policy, bool waitForSync, bool isPatch, bool keepNull, // only counts for isPatch == true bool mergeObjects, // only counts for isPatch == true TRI_json_t* json, map const& headers, triagens::rest::HttpResponse::HttpResponseCode& responseCode, map& resultHeaders, string& resultBody) { // Set a few variables needed for our work: ClusterInfo* ci = ClusterInfo::instance(); ClusterComm* cc = ClusterComm::instance(); // First determine the collection ID from the name: shared_ptr collinfo = ci->getCollection(dbname, collname); if (collinfo->empty()) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND; } string collid = StringUtils::itoa(collinfo->id()); // We have a fast path and a slow path. The fast path only asks one shard // to do the job and the slow path asks them all and expects to get // "not found" from all but one shard. We have to cover the following // cases: // isPatch == false (this is a "replace" operation) // Here, the complete new document is given, we assume that we // can read off the responsible shard, therefore can use the fast // path, this is always true if _key is the one and only sharding // attribute, however, if there is any other sharding attribute, // it is possible that the user has changed the values in any of // them, in that case we will get a "not found" or a "sharding // attributes changed answer" in the fast path. In the latter case // we have to delegate to the slow path. // isPatch == true (this is an "update" operation) // In this case we might or might not have all sharding attributes // specified in the partial document given. If _key is the one and // only sharding attribute, it is always given, if not all sharding // attributes are explicitly given (at least as value `null`), we must // assume that the fast path cannot be used. If all sharding attributes // are given, we first try the fast path, but might, as above, // have to use the slow path after all. bool usesDefaultShardingAttributes; ShardID shardID; int error = ci->getResponsibleShard(collid, json, ! isPatch, shardID, usesDefaultShardingAttributes); if (error == TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); return error; } // Some stuff to prepare cluster-internal requests: ClusterCommResult* res; string revstr; if (rev != 0) { revstr = "&rev=" + StringUtils::itoa(rev); } triagens::rest::HttpRequest::HttpRequestType reqType; if (isPatch) { reqType = triagens::rest::HttpRequest::HTTP_REQUEST_PATCH; if (! keepNull) { revstr += "&keepNull=false"; } if (mergeObjects) { revstr += "&mergeObjects=true"; } else { revstr += "&mergeObjects=false"; } } else { reqType = triagens::rest::HttpRequest::HTTP_REQUEST_PUT; } string policystr; if (policy == TRI_DOC_UPDATE_LAST_WRITE) { policystr = "&policy=last"; } string body = JsonHelper::toString(json); TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); if (! isPatch || error != TRI_ERROR_CLUSTER_NOT_ALL_SHARDING_ATTRIBUTES_GIVEN) { // This is the fast method, we only have to ask one shard, unless // the we are in isPatch==false and the user has actually changed the // sharding attributes // Send a synchronous request to that shard using ClusterComm: res = cc->syncRequest("", TRI_NewTickServer(), "shard:" + shardID, reqType, "/_db/" + StringUtils::urlEncode(dbname) + "/_api/document/" + StringUtils::urlEncode(shardID) + "/" + StringUtils::urlEncode(key) + "?waitForSync=" + (waitForSync ? "true" : "false") + revstr + policystr, body, headers, 60.0); if (res->status == CL_COMM_TIMEOUT) { // No reply, we give up: delete res; return TRI_ERROR_CLUSTER_TIMEOUT; } if (res->status == CL_COMM_ERROR) { // This could be a broken connection or an Http error: if (res->result == nullptr || ! res->result->isComplete()) { delete res; return TRI_ERROR_CLUSTER_CONNECTION_LOST; } // In this case a proper HTTP error was reported by the DBserver, // this can be 400 or 404, we simply forward the result. // We intentionally fall through here. } // Now we have to distinguish whether we still have to go the slow way: responseCode = static_cast (res->result->getHttpReturnCode()); if (responseCode < triagens::rest::HttpResponse::BAD) { // OK, we are done, let's report: resultHeaders = res->result->getHeaderFields(); resultBody.assign(res->result->getBody().c_str(), res->result->getBody().length()); delete res; return TRI_ERROR_NO_ERROR; } delete res; } // If we get here, we have to do it the slow way and contact everybody: map shards = collinfo->shardIds(); map::iterator it; CoordTransactionID coordTransactionID = TRI_NewTickServer(); for (it = shards.begin(); it != shards.end(); ++it) { map* headersCopy = new map(headers); res = cc->asyncRequest("", coordTransactionID, "shard:" + it->first, reqType, "/_db/" + StringUtils::urlEncode(dbname) + "/_api/document/"+ StringUtils::urlEncode(it->first) + "/" + StringUtils::urlEncode(key) + "?waitForSync=" + (waitForSync ? "true" : "false") + revstr + policystr, &body, false, headersCopy, nullptr, 60.0); delete res; } // Now listen to the results: int count; int nrok = 0; for (count = (int) shards.size(); count > 0; count--) { res = cc->wait("", coordTransactionID, 0, "", 0.0); if (res->status == CL_COMM_RECEIVED) { if (res->answer_code != triagens::rest::HttpResponse::NOT_FOUND || (nrok == 0 && count == 1)) { nrok++; responseCode = res->answer_code; resultHeaders = res->answer->headers(); resultBody = string(res->answer->body(), res->answer->bodySize()); } } delete res; } // Note that nrok is always at least 1! if (nrok > 1) { return TRI_ERROR_CLUSTER_GOT_CONTRADICTING_ANSWERS; } return TRI_ERROR_NO_ERROR; // the cluster operation was OK, however, // the DBserver could have reported an error. } //////////////////////////////////////////////////////////////////////////////// /// @brief creates an edge in a coordinator //////////////////////////////////////////////////////////////////////////////// int createEdgeOnCoordinator ( string const& dbname, string const& collname, bool waitForSync, TRI_json_t* json, char const* from, char const* to, triagens::rest::HttpResponse::HttpResponseCode& responseCode, map& resultHeaders, string& resultBody) { // Set a few variables needed for our work: ClusterInfo* ci = ClusterInfo::instance(); ClusterComm* cc = ClusterComm::instance(); // First determine the collection ID from the name: shared_ptr collinfo = ci->getCollection(dbname, collname); if (collinfo->empty()) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND; } string collid = StringUtils::itoa(collinfo->id()); // Sort out the _key attribute: // The user is allowed to specify _key, provided that _key is the one // and only sharding attribute, because in this case we can delegate // the responsibility to make _key attributes unique to the responsible // shard. Otherwise, we ensure uniqueness here and now by taking a // cluster-wide unique number. Note that we only know the sharding // attributes a bit further down the line when we have determined // the responsible shard. TRI_json_t* subjson = TRI_LookupObjectJson(json, "_key"); bool userSpecifiedKey = false; string _key; if (subjson == nullptr) { // The user did not specify a key, let's create one: uint64_t uid = ci->uniqid(); _key = triagens::basics::StringUtils::itoa(uid); TRI_Insert3ObjectJson(TRI_UNKNOWN_MEM_ZONE, json, "_key", TRI_CreateStringReferenceJson(TRI_UNKNOWN_MEM_ZONE, _key.c_str(), _key.size())); } else { userSpecifiedKey = true; } // Now find the responsible shard: bool usesDefaultShardingAttributes; ShardID shardID; int error = ci->getResponsibleShard( collid, json, true, shardID, usesDefaultShardingAttributes ); if (error == TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); return TRI_ERROR_CLUSTER_SHARD_GONE; } // Now perform the above mentioned check: if (userSpecifiedKey && !usesDefaultShardingAttributes) { TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); return TRI_ERROR_CLUSTER_MUST_NOT_SPECIFY_KEY; } string body = JsonHelper::toString(json); TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json); // Send a synchronous request to that shard using ClusterComm: ClusterCommResult* res; map headers; res = cc->syncRequest("", TRI_NewTickServer(), "shard:" + shardID, triagens::rest::HttpRequest::HTTP_REQUEST_POST, "/_db/" + dbname + "/_api/edge?collection=" + StringUtils::urlEncode(shardID) + "&waitForSync=" + (waitForSync ? "true" : "false") + "&from=" + StringUtils::urlEncode(from) + "&to=" + StringUtils::urlEncode(to), body, headers, 60.0); if (res->status == CL_COMM_TIMEOUT) { // No reply, we give up: delete res; return TRI_ERROR_CLUSTER_TIMEOUT; } if (res->status == CL_COMM_ERROR) { // This could be a broken connection or an Http error: if (res->result == nullptr || ! res->result->isComplete()) { // there is not result delete res; return TRI_ERROR_CLUSTER_CONNECTION_LOST; } // In this case a proper HTTP error was reported by the DBserver, // this can be 400 or 404, we simply forward the result. // We intentionally fall through here. } responseCode = static_cast (res->result->getHttpReturnCode()); resultHeaders = res->result->getHeaderFields(); resultBody.assign(res->result->getBody().c_str(), res->result->getBody().length()); delete res; return TRI_ERROR_NO_ERROR; } //////////////////////////////////////////////////////////////////////////////// /// @brief flush Wal on all DBservers //////////////////////////////////////////////////////////////////////////////// int flushWalOnAllDBServers (bool waitForSync, bool waitForCollector) { ClusterInfo* ci = ClusterInfo::instance(); ClusterComm* cc = ClusterComm::instance(); vector DBservers = ci->getCurrentDBServers(); CoordTransactionID coordTransactionID = TRI_NewTickServer(); string url = string("/_admin/wal/flush?waitForSync=") + (waitForSync ? "true" : "false") + "&waitForCollector=" + (waitForCollector ? "true" : "false"); ClusterCommResult* res; for (auto it = DBservers.begin(); it != DBservers.end(); ++it) { map* headers = new map; // set collection name (shard id) string* body = new string; res = cc->asyncRequest("", coordTransactionID, "server:" + *it, triagens::rest::HttpRequest::HTTP_REQUEST_PUT, url, body, true, headers, nullptr, 120.0); delete res; } // Now listen to the results: int count; int nrok = 0; for (count = (int) DBservers.size(); count > 0; count--) { res = cc->wait( "", coordTransactionID, 0, "", 0.0); if (res->status == CL_COMM_RECEIVED) { if (res->answer_code == triagens::rest::HttpResponse::OK) { nrok++; } } delete res; } if (nrok != (int) DBservers.size()) { return TRI_ERROR_INTERNAL; } return TRI_ERROR_NO_ERROR; } } // namespace arango } // namespace triagens // ----------------------------------------------------------------------------- // --SECTION-- END-OF-FILE // ----------------------------------------------------------------------------- // Local Variables: // mode: outline-minor // outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}" // End: