1
0
Fork 0
arangodb/arangod/Cluster/ClusterMethods.cpp

2026 lines
73 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Max Neunhoeffer
////////////////////////////////////////////////////////////////////////////////
#include "ClusterMethods.h"
#include "Basics/conversions.h"
#include "Basics/StaticStrings.h"
#include "Basics/StringRef.h"
#include "Basics/StringUtils.h"
#include "Basics/tri-strings.h"
#include "Basics/VelocyPackHelper.h"
#include "Cluster/ClusterComm.h"
#include "Cluster/ClusterInfo.h"
#include "Indexes/Index.h"
#include "VocBase/Traverser.h"
#include "VocBase/server.h"
#include <velocypack/Buffer.h>
#include <velocypack/Helpers.h>
#include <velocypack/Iterator.h>
#include <velocypack/Slice.h>
#include <velocypack/velocypack-aliases.h>
using namespace arangodb::basics;
using namespace arangodb::rest;
static double const CL_DEFAULT_TIMEOUT = 60.0;
namespace arangodb {
static int handleGeneralCommErrors(ClusterCommResult const* res) {
// This function creates an error code from a ClusterCommResult,
// but only if it is a communication error. If the communication
// was successful and there was an HTTP error code, this function
// returns TRI_ERROR_NO_ERROR.
// If TRI_ERROR_NO_ERROR is returned, then the result was CL_COMM_RECEIVED
// and .answer can safely be inspected.
if (res->status == CL_COMM_TIMEOUT) {
// No reply, we give up:
return TRI_ERROR_CLUSTER_TIMEOUT;
} else if (res->status == CL_COMM_ERROR) {
return TRI_ERROR_CLUSTER_CONNECTION_LOST;
} else if (res->status == CL_COMM_BACKEND_UNAVAILABLE) {
if (res->result == nullptr) {
return TRI_ERROR_CLUSTER_CONNECTION_LOST;
}
if (!res->result->isComplete()) {
// there is no result
return TRI_ERROR_CLUSTER_CONNECTION_LOST;
}
return TRI_ERROR_CLUSTER_BACKEND_UNAVAILABLE;
}
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief extracts a numeric value from an hierarchical VelocyPack
////////////////////////////////////////////////////////////////////////////////
template <typename T>
static T ExtractFigure(VPackSlice const& slice, char const* group,
char const* name) {
TRI_ASSERT(slice.isObject());
VPackSlice g = slice.get(group);
if (!g.isObject()) {
return static_cast<T>(0);
}
return arangodb::basics::VelocyPackHelper::getNumericValue<T>(g, name, 0);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief extracts answer from response into a VPackBuilder.
/// If there was an error extracting the answer the builder will be
/// empty.
/// No Error can be thrown.
////////////////////////////////////////////////////////////////////////////////
static std::shared_ptr<VPackBuilder> ExtractAnswer(
ClusterCommResult const& res) {
try {
return VPackParser::fromJson(res.answer->body());
} catch (...) {
// Return an empty Builder
return std::make_shared<VPackBuilder>();
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief merge the baby-object results.
/// The shard map contains the ordering of elements, the vector in this
/// Map is expected to be sorted from front to back.
/// The second map contains the answers for each shard.
/// The builder in the third parameter will be cleared and will contain
/// the resulting array. It is guaranteed that the resulting array
/// indexes
/// are equal to the original request ordering before it was destructured
/// for babies.
////////////////////////////////////////////////////////////////////////////////
static void mergeResults(
std::vector<std::pair<ShardID, VPackValueLength>> const& reverseMapping,
std::unordered_map<ShardID, std::shared_ptr<VPackBuilder>> const& resultMap,
std::shared_ptr<VPackBuilder>& resultBody) {
resultBody->clear();
resultBody->openArray();
for (auto const& pair : reverseMapping) {
VPackSlice arr = resultMap.find(pair.first)->second->slice();
resultBody->add(arr.at(pair.second));
}
resultBody->close();
}
////////////////////////////////////////////////////////////////////////////////
/// @brief merge the baby-object results. (all shards version)
/// results contians the result from all shards in any order.
/// resultBody will be cleared and contains the merged result after this
/// function
/// errorCounter will correctly compute the NOT_FOUND counter, all other
/// codes remain unmodified.
///
/// The merge is executed the following way:
/// FOR every expected document we scan iterate over the corresponding
/// response
/// of each shard. If any of them returned sth. different than NOT_FOUND
/// we take this result as correct.
/// If none returned sth different than NOT_FOUND we return NOT_FOUND as
/// well
////////////////////////////////////////////////////////////////////////////////
static void mergeResultsAllShards(
std::vector<std::shared_ptr<VPackBuilder>> const& results,
std::shared_ptr<VPackBuilder>& resultBody,
std::unordered_map<int, size_t>& errorCounter,
VPackValueLength const expectedResults) {
// errorCounter is not allowed to contain any NOT_FOUND entry.
TRI_ASSERT(errorCounter.find(TRI_ERROR_ARANGO_DOCUMENT_NOT_FOUND) ==
errorCounter.end());
size_t realNotFound = 0;
VPackBuilder cmp;
cmp.openObject();
cmp.add("error", VPackValue(true));
cmp.add("errorNum", VPackValue(TRI_ERROR_ARANGO_DOCUMENT_NOT_FOUND));
cmp.close();
VPackSlice notFound = cmp.slice();
resultBody->clear();
resultBody->openArray();
for (VPackValueLength currentIndex = 0; currentIndex < expectedResults;
++currentIndex) {
bool foundRes = false;
for (auto const& it : results) {
VPackSlice oneRes = it->slice();
TRI_ASSERT(oneRes.isArray());
oneRes = oneRes.at(currentIndex);
if (!oneRes.equals(notFound)) {
// This is the correct result
// Use it
resultBody->add(oneRes);
foundRes = true;
break;
}
}
if (!foundRes) {
// Found none, use NOT_FOUND
resultBody->add(notFound);
realNotFound++;
}
}
resultBody->close();
if (realNotFound > 0) {
errorCounter.emplace(TRI_ERROR_ARANGO_DOCUMENT_NOT_FOUND, realNotFound);
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief Extract all error baby-style error codes and store them in a map
////////////////////////////////////////////////////////////////////////////////
static void extractErrorCodes(ClusterCommResult const& res,
std::unordered_map<int, size_t>& errorCounter,
bool includeNotFound) {
auto resultHeaders = res.answer->headers();
auto codes = resultHeaders.find(StaticStrings::ErrorCodes);
if (codes != resultHeaders.end()) {
auto parsedCodes = VPackParser::fromJson(codes->second);
VPackSlice codesSlice = parsedCodes->slice();
TRI_ASSERT(codesSlice.isObject());
for (auto const& code : VPackObjectIterator(codesSlice)) {
VPackValueLength codeLength;
char const* codeString = code.key.getString(codeLength);
int codeNr = static_cast<int>(arangodb::basics::StringUtils::int64(
codeString, static_cast<size_t>(codeLength)));
if (includeNotFound || codeNr != TRI_ERROR_ARANGO_DOCUMENT_NOT_FOUND) {
errorCounter[codeNr] += code.value.getNumericValue<size_t>();
}
}
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief Distribute one document onto a shard map. If this returns
/// TRI_ERROR_NO_ERROR the correct shard could be determined, if
/// it returns sth. else this document is NOT contained in the shardMap
////////////////////////////////////////////////////////////////////////////////
static int distributeBabyOnShards(
std::unordered_map<ShardID, std::vector<VPackValueLength>>& shardMap,
ClusterInfo* ci, std::string const& collid,
std::shared_ptr<CollectionInfo> collinfo,
std::vector<std::pair<ShardID, VPackValueLength>>& reverseMapping,
VPackSlice const node, VPackValueLength const index) {
// Now find the responsible shard:
bool usesDefaultShardingAttributes;
ShardID shardID;
int error = ci->getResponsibleShard(collid, node, false, shardID,
usesDefaultShardingAttributes);
if (error == TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND) {
return TRI_ERROR_CLUSTER_SHARD_GONE;
}
if (error != TRI_ERROR_NO_ERROR) {
// We can not find a responsible shard
return error;
}
// We found the responsible shard. Add it to the list.
auto it = shardMap.find(shardID);
if (it == shardMap.end()) {
std::vector<VPackValueLength> counter({index});
shardMap.emplace(shardID, counter);
reverseMapping.emplace_back(shardID, 0);
} else {
it->second.emplace_back(index);
reverseMapping.emplace_back(shardID, it->second.size() - 1);
}
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief Distribute one document onto a shard map. If this returns
/// TRI_ERROR_NO_ERROR the correct shard could be determined, if
/// it returns sth. else this document is NOT contained in the shardMap.
/// Also generates a key if necessary.
////////////////////////////////////////////////////////////////////////////////
static int distributeBabyOnShards(
std::unordered_map<ShardID,
std::vector<std::pair<VPackValueLength, std::string>>>&
shardMap,
ClusterInfo* ci, std::string const& collid,
std::shared_ptr<CollectionInfo> collinfo,
std::vector<std::pair<ShardID, VPackValueLength>>& reverseMapping,
VPackSlice const node, VPackValueLength const index) {
ShardID shardID;
bool userSpecifiedKey = false;
std::string _key = "";
if (!node.isObject()) {
// We have invalid input at this point.
// However we can work with the other babies.
// This is for compatibility with single server
// We just asign it to any shard and pretend the user has given a key
std::shared_ptr<std::vector<ShardID>> shards = ci->getShardList(collid);
shardID = shards->at(0);
userSpecifiedKey = true;
} else {
// Sort out the _key attribute:
// The user is allowed to specify _key, provided that _key is the one
// and only sharding attribute, because in this case we can delegate
// the responsibility to make _key attributes unique to the responsible
// shard. Otherwise, we ensure uniqueness here and now by taking a
// cluster-wide unique number. Note that we only know the sharding
// attributes a bit further down the line when we have determined
// the responsible shard.
VPackSlice keySlice = node.get(StaticStrings::KeyString);
if (keySlice.isNone()) {
// The user did not specify a key, let's create one:
uint64_t uid = ci->uniqid();
_key = arangodb::basics::StringUtils::itoa(uid);
} else {
userSpecifiedKey = true;
}
// Now find the responsible shard:
bool usesDefaultShardingAttributes;
int error = TRI_ERROR_NO_ERROR;
if (userSpecifiedKey) {
error = ci->getResponsibleShard(collid, node, true, shardID,
usesDefaultShardingAttributes);
} else {
error = ci->getResponsibleShard(collid, node, true, shardID,
usesDefaultShardingAttributes, _key);
}
if (error == TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND) {
return TRI_ERROR_CLUSTER_SHARD_GONE;
}
// Now perform the above mentioned check:
if (userSpecifiedKey &&
(!usesDefaultShardingAttributes || !collinfo->allowUserKeys())) {
return TRI_ERROR_CLUSTER_MUST_NOT_SPECIFY_KEY;
}
}
// We found the responsible shard. Add it to the list.
auto it = shardMap.find(shardID);
if (it == shardMap.end()) {
std::vector<std::pair<VPackValueLength, std::string>> counter(
{{index, _key}});
shardMap.emplace(shardID, counter);
reverseMapping.emplace_back(shardID, 0);
} else {
it->second.emplace_back(index, _key);
reverseMapping.emplace_back(shardID, it->second.size() - 1);
}
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief Collect the results from all shards (fastpath variant)
/// All result bodies are stored in resultMap
////////////////////////////////////////////////////////////////////////////////
template <typename T>
static void collectResultsFromAllShards(
std::unordered_map<ShardID, std::vector<T>> const& shardMap,
std::vector<ClusterCommRequest>& requests,
std::unordered_map<int, size_t>& errorCounter,
std::unordered_map<ShardID, std::shared_ptr<VPackBuilder>>& resultMap,
GeneralResponse::ResponseCode& responseCode) {
// If none of the shards responds we return a SERVER_ERROR;
responseCode = GeneralResponse::ResponseCode::SERVER_ERROR;
for (auto const& req : requests) {
auto res = req.result;
int commError = handleGeneralCommErrors(&res);
if (commError != TRI_ERROR_NO_ERROR) {
auto tmpBuilder = std::make_shared<VPackBuilder>();
// If there was no answer whatsoever, we cannot rely on the shardId
// being present in the result struct:
ShardID sId = req.destination.substr(6);
auto weSend = shardMap.find(sId);
TRI_ASSERT(weSend != shardMap.end()); // We send sth there earlier.
size_t count = weSend->second.size();
for (size_t i = 0; i < count; ++i) {
tmpBuilder->openObject();
tmpBuilder->add("error", VPackValue(true));
tmpBuilder->add("errorNum", VPackValue(commError));
tmpBuilder->close();
}
resultMap.emplace(sId, tmpBuilder);
} else {
TRI_ASSERT(res.answer != nullptr);
resultMap.emplace(res.shardID,
res.answer->toVelocyPack(&VPackOptions::Defaults));
extractErrorCodes(res, errorCounter, true);
responseCode = res.answer_code;
}
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief creates a copy of all HTTP headers to forward
////////////////////////////////////////////////////////////////////////////////
std::unordered_map<std::string, std::string> getForwardableRequestHeaders(
arangodb::GeneralRequest* request) {
std::unordered_map<std::string, std::string> const& headers =
request->headers();
std::unordered_map<std::string, std::string>::const_iterator it =
headers.begin();
std::unordered_map<std::string, std::string> result;
while (it != headers.end()) {
std::string const& key = (*it).first;
// ignore the following headers
if (key != "x-arango-async" && key != "authorization" &&
key != "content-length" && key != "connection" && key != "expect" &&
key != "host" && key != "origin" && key != StaticStrings::ErrorCodes &&
key.substr(0, 14) != "access-control") {
result.emplace(key, (*it).second);
}
++it;
}
if (request != nullptr) {
result["content-length"] = StringUtils::itoa(request->contentLength());
}
return result;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief check if a list of attributes have the same values in two vpack
/// documents
////////////////////////////////////////////////////////////////////////////////
bool shardKeysChanged(std::string const& dbname, std::string const& collname,
VPackSlice const& oldValue, VPackSlice const& newValue,
bool isPatch) {
if (!oldValue.isObject() || !newValue.isObject()) {
// expecting two objects. everything else is an error
return true;
}
ClusterInfo* ci = ClusterInfo::instance();
std::shared_ptr<CollectionInfo> c = ci->getCollection(dbname, collname);
std::vector<std::string> const& shardKeys = c->shardKeys();
for (size_t i = 0; i < shardKeys.size(); ++i) {
if (shardKeys[i] == StaticStrings::KeyString) {
continue;
}
VPackSlice n = newValue.get(shardKeys[i]);
if (n.isNone() && isPatch) {
// attribute not set in patch document. this means no update
continue;
}
// a temporary buffer to hold a null value
char buffer[1];
VPackSlice nullValue =
arangodb::velocypack::buildNullValue(&buffer[0], sizeof(buffer));
VPackSlice o = oldValue.get(shardKeys[i]);
if (o.isNone()) {
// if attribute is undefined, use "null" instead
o = nullValue;
}
if (n.isNone()) {
// if attribute is undefined, use "null" instead
n = nullValue;
}
if (arangodb::basics::VelocyPackHelper::compare(n, o, false) != 0) {
return true;
}
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief returns revision for a sharded collection
////////////////////////////////////////////////////////////////////////////////
int revisionOnCoordinator(std::string const& dbname,
std::string const& collname, TRI_voc_rid_t& rid) {
// Set a few variables needed for our work:
ClusterInfo* ci = ClusterInfo::instance();
ClusterComm* cc = ClusterComm::instance();
// First determine the collection ID from the name:
std::shared_ptr<CollectionInfo> collinfo =
ci->getCollection(dbname, collname);
if (collinfo->empty()) {
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
}
rid = 0;
// If we get here, the sharding attributes are not only _key, therefore
// we have to contact everybody:
auto shards = collinfo->shardIds();
CoordTransactionID coordTransactionID = TRI_NewTickServer();
for (auto const& p : *shards) {
auto headers =
std::make_unique<std::unordered_map<std::string, std::string>>();
cc->asyncRequest(
"", coordTransactionID, "shard:" + p.first,
arangodb::GeneralRequest::RequestType::GET,
"/_db/" + StringUtils::urlEncode(dbname) + "/_api/collection/" +
StringUtils::urlEncode(p.first) + "/revision",
std::shared_ptr<std::string const>(), headers, nullptr, 300.0);
}
// Now listen to the results:
int count;
int nrok = 0;
for (count = (int)shards->size(); count > 0; count--) {
auto res = cc->wait("", coordTransactionID, 0, "", 0.0);
if (res.status == CL_COMM_RECEIVED) {
if (res.answer_code == arangodb::GeneralResponse::ResponseCode::OK) {
std::shared_ptr<VPackBuilder> answerBuilder = ExtractAnswer(res);
VPackSlice answer = answerBuilder->slice();
if (answer.isObject()) {
VPackSlice r = answer.get("revision");
if (r.isString()) {
TRI_voc_rid_t cmp = StringUtils::uint64(r.copyString());
if (cmp > rid) {
// get the maximum value
rid = cmp;
}
}
nrok++;
}
}
}
}
if (nrok != (int)shards->size()) {
return TRI_ERROR_INTERNAL;
}
return TRI_ERROR_NO_ERROR; // the cluster operation was OK, however,
// the DBserver could have reported an error.
}
////////////////////////////////////////////////////////////////////////////////
/// @brief returns figures for a sharded collection
////////////////////////////////////////////////////////////////////////////////
int figuresOnCoordinator(std::string const& dbname, std::string const& collname,
TRI_doc_collection_info_t*& result) {
// Set a few variables needed for our work:
ClusterInfo* ci = ClusterInfo::instance();
ClusterComm* cc = ClusterComm::instance();
// First determine the collection ID from the name:
std::shared_ptr<CollectionInfo> collinfo =
ci->getCollection(dbname, collname);
if (collinfo->empty()) {
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
}
// prefill with 0s
result = (TRI_doc_collection_info_t*)TRI_Allocate(
TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_doc_collection_info_t), true);
if (result == nullptr) {
return TRI_ERROR_OUT_OF_MEMORY;
}
// If we get here, the sharding attributes are not only _key, therefore
// we have to contact everybody:
auto shards = collinfo->shardIds();
CoordTransactionID coordTransactionID = TRI_NewTickServer();
for (auto const& p : *shards) {
auto headers =
std::make_unique<std::unordered_map<std::string, std::string>>();
cc->asyncRequest(
"", coordTransactionID, "shard:" + p.first,
arangodb::GeneralRequest::RequestType::GET,
"/_db/" + StringUtils::urlEncode(dbname) + "/_api/collection/" +
StringUtils::urlEncode(p.first) + "/figures",
std::shared_ptr<std::string const>(), headers, nullptr, 300.0);
}
// Now listen to the results:
int count;
int nrok = 0;
for (count = (int)shards->size(); count > 0; count--) {
auto res = cc->wait("", coordTransactionID, 0, "", 0.0);
if (res.status == CL_COMM_RECEIVED) {
if (res.answer_code == arangodb::GeneralResponse::ResponseCode::OK) {
std::shared_ptr<VPackBuilder> answerBuilder = ExtractAnswer(res);
VPackSlice answer = answerBuilder->slice();
if (answer.isObject()) {
VPackSlice figures = answer.get("figures");
if (figures.isObject()) {
// add to the total
result->_numberAlive +=
ExtractFigure<TRI_voc_ssize_t>(figures, "alive", "count");
result->_numberDead +=
ExtractFigure<TRI_voc_ssize_t>(figures, "dead", "count");
result->_numberDeletions +=
ExtractFigure<TRI_voc_ssize_t>(figures, "dead", "deletion");
result->_numberIndexes +=
ExtractFigure<TRI_voc_ssize_t>(figures, "indexes", "count");
result->_sizeAlive +=
ExtractFigure<int64_t>(figures, "alive", "size");
result->_sizeDead +=
ExtractFigure<int64_t>(figures, "dead", "size");
result->_sizeIndexes +=
ExtractFigure<int64_t>(figures, "indexes", "size");
result->_numberDatafiles +=
ExtractFigure<TRI_voc_ssize_t>(figures, "datafiles", "count");
result->_numberJournalfiles +=
ExtractFigure<TRI_voc_ssize_t>(figures, "journals", "count");
result->_numberCompactorfiles +=
ExtractFigure<TRI_voc_ssize_t>(figures, "compactors", "count");
result->_datafileSize +=
ExtractFigure<int64_t>(figures, "datafiles", "fileSize");
result->_journalfileSize +=
ExtractFigure<int64_t>(figures, "journals", "fileSize");
result->_compactorfileSize +=
ExtractFigure<int64_t>(figures, "compactors", "fileSize");
result->_numberDocumentDitches +=
arangodb::basics::VelocyPackHelper::getNumericValue<uint64_t>(
figures, "documentReferences", 0);
}
nrok++;
}
}
}
}
if (nrok != (int)shards->size()) {
TRI_Free(TRI_UNKNOWN_MEM_ZONE, result);
result = 0;
return TRI_ERROR_INTERNAL;
}
return TRI_ERROR_NO_ERROR; // the cluster operation was OK, however,
// the DBserver could have reported an error.
}
////////////////////////////////////////////////////////////////////////////////
/// @brief counts number of documents in a coordinator
////////////////////////////////////////////////////////////////////////////////
int countOnCoordinator(std::string const& dbname, std::string const& collname,
uint64_t& result) {
// Set a few variables needed for our work:
ClusterInfo* ci = ClusterInfo::instance();
ClusterComm* cc = ClusterComm::instance();
result = 0;
// First determine the collection ID from the name:
std::shared_ptr<CollectionInfo> collinfo =
ci->getCollection(dbname, collname);
if (collinfo->empty()) {
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
}
auto shards = collinfo->shardIds();
std::vector<ClusterCommRequest> requests;
auto body = std::make_shared<std::string>();
for (auto const& p : *shards) {
requests.emplace_back("shard:" + p.first,
arangodb::GeneralRequest::RequestType::GET,
"/_db/" + StringUtils::urlEncode(dbname) +
"/_api/collection/" +
StringUtils::urlEncode(p.first) + "/count", body);
}
size_t nrDone = 0;
cc->performRequests(requests, CL_DEFAULT_TIMEOUT, nrDone, Logger::QUERIES);
for (auto& req : requests) {
auto& res = req.result;
if (res.status == CL_COMM_RECEIVED) {
if (res.answer_code == arangodb::GeneralResponse::ResponseCode::OK) {
std::shared_ptr<VPackBuilder> answerBuilder = ExtractAnswer(res);
VPackSlice answer = answerBuilder->slice();
if (answer.isObject()) {
// add to the total
result +=
arangodb::basics::VelocyPackHelper::getNumericValue<uint64_t>(
answer, "count", 0);
} else {
return TRI_ERROR_INTERNAL;
}
} else {
return static_cast<int>(res.answer_code);
}
} else {
return TRI_ERROR_CLUSTER_BACKEND_UNAVAILABLE;
}
}
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief creates one or many documents in a coordinator
///
/// In case of many documents (slice is a VPackArray) it will send to each
/// shard all the relevant documents for this shard only.
/// If one of them fails, this error is reported.
/// There is NO guarantee for the stored documents of all other shards, they may
/// be stored or not. All answers of these shards are dropped.
/// If we return with NO_ERROR it is guaranteed that all shards reported success
/// for their documents.
////////////////////////////////////////////////////////////////////////////////
int createDocumentOnCoordinator(
std::string const& dbname, std::string const& collname,
arangodb::OperationOptions const& options, VPackSlice const& slice,
arangodb::GeneralResponse::ResponseCode& responseCode,
std::unordered_map<int, size_t>& errorCounter,
std::shared_ptr<VPackBuilder>& resultBody) {
// Set a few variables needed for our work:
ClusterInfo* ci = ClusterInfo::instance();
ClusterComm* cc = ClusterComm::instance();
// First determine the collection ID from the name:
std::shared_ptr<CollectionInfo> collinfo =
ci->getCollection(dbname, collname);
if (collinfo->empty()) {
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
}
std::string const collid = StringUtils::itoa(collinfo->id());
std::unordered_map<
ShardID, std::vector<std::pair<VPackValueLength, std::string>>> shardMap;
std::vector<std::pair<ShardID, VPackValueLength>> reverseMapping;
bool useMultiple = slice.isArray();
int res = TRI_ERROR_NO_ERROR;
if (useMultiple) {
VPackValueLength length = slice.length();
for (VPackValueLength idx = 0; idx < length; ++idx) {
res = distributeBabyOnShards(shardMap, ci, collid, collinfo,
reverseMapping, slice.at(idx), idx);
if (res != TRI_ERROR_NO_ERROR) {
return res;
}
}
} else {
res = distributeBabyOnShards(shardMap, ci, collid, collinfo, reverseMapping,
slice, 0);
if (res != TRI_ERROR_NO_ERROR) {
return res;
}
}
std::string const baseUrl =
"/_db/" + StringUtils::urlEncode(dbname) + "/_api/document?collection=";
std::string const optsUrlPart =
std::string("&waitForSync=") + (options.waitForSync ? "true" : "false") +
"&returnNew=" + (options.returnNew ? "true" : "false") + "&returnOld=" +
(options.returnOld ? "true" : "false");
VPackBuilder reqBuilder;
// Now prepare the requests:
std::vector<ClusterCommRequest> requests;
auto body = std::make_shared<std::string>();
for (auto const& it : shardMap) {
if (!useMultiple) {
TRI_ASSERT(it.second.size() == 1);
auto idx = it.second.front();
if (idx.second.empty()) {
body = std::make_shared<std::string>(slice.toJson());
} else {
reqBuilder.clear();
reqBuilder.openObject();
reqBuilder.add(StaticStrings::KeyString, VPackValue(idx.second));
TRI_SanitizeObject(slice, reqBuilder);
reqBuilder.close();
body = std::make_shared<std::string>(reqBuilder.slice().toJson());
}
} else {
reqBuilder.clear();
reqBuilder.openArray();
for (auto const& idx : it.second) {
if (idx.second.empty()) {
reqBuilder.add(slice.at(idx.first));
} else {
reqBuilder.openObject();
reqBuilder.add(StaticStrings::KeyString, VPackValue(idx.second));
TRI_SanitizeObject(slice.at(idx.first), reqBuilder);
reqBuilder.close();
}
}
reqBuilder.close();
body = std::make_shared<std::string>(reqBuilder.slice().toJson());
}
requests.emplace_back(
"shard:" + it.first, arangodb::GeneralRequest::RequestType::POST,
baseUrl + StringUtils::urlEncode(it.first) + optsUrlPart, body);
}
// Perform the requests
size_t nrDone = 0;
cc->performRequests(requests, CL_DEFAULT_TIMEOUT, nrDone, Logger::REQUESTS);
// Now listen to the results:
if (!useMultiple) {
TRI_ASSERT(requests.size() == 1);
auto const& req = requests[0];
auto& res = req.result;
int commError = handleGeneralCommErrors(&res);
if (commError != TRI_ERROR_NO_ERROR) {
return commError;
}
responseCode = res.answer_code;
TRI_ASSERT(res.answer != nullptr);
auto parsedResult = res.answer->toVelocyPack(&VPackOptions::Defaults);
resultBody.swap(parsedResult);
return TRI_ERROR_NO_ERROR;
}
std::unordered_map<ShardID, std::shared_ptr<VPackBuilder>> resultMap;
collectResultsFromAllShards<std::pair<VPackValueLength, std::string>>(
shardMap, requests, errorCounter, resultMap, responseCode);
responseCode =
(options.waitForSync ? GeneralResponse::ResponseCode::CREATED
: GeneralResponse::ResponseCode::ACCEPTED);
mergeResults(reverseMapping, resultMap, resultBody);
// the cluster operation was OK, however,
// the DBserver could have reported an error.
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief deletes a document in a coordinator
////////////////////////////////////////////////////////////////////////////////
int deleteDocumentOnCoordinator(
std::string const& dbname, std::string const& collname,
VPackSlice const slice, arangodb::OperationOptions const& options,
arangodb::GeneralResponse::ResponseCode& responseCode,
std::unordered_map<int, size_t>& errorCounter,
std::shared_ptr<arangodb::velocypack::Builder>& resultBody) {
// Set a few variables needed for our work:
ClusterInfo* ci = ClusterInfo::instance();
ClusterComm* cc = ClusterComm::instance();
// First determine the collection ID from the name:
std::shared_ptr<CollectionInfo> collinfo =
ci->getCollection(dbname, collname);
if (collinfo->empty()) {
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
}
bool useDefaultSharding = collinfo->usesDefaultShardKeys();
std::string collid = StringUtils::itoa(collinfo->id());
bool useMultiple = slice.isArray();
std::string const baseUrl =
"/_db/" + StringUtils::urlEncode(dbname) + "/_api/document/";
std::string const optsUrlPart =
std::string("?waitForSync=") + (options.waitForSync ? "true" : "false") +
"&returnOld=" + (options.returnOld ? "true" : "false") + "&ignoreRevs=" +
(options.ignoreRevs ? "true" : "false");
VPackBuilder reqBuilder;
if (useDefaultSharding) {
// fastpath we know which server is responsible.
// decompose the input into correct shards.
// Send the correct documents to the correct shards
// Merge the results with static merge helper
std::unordered_map<ShardID, std::vector<VPackValueLength>> shardMap;
std::vector<std::pair<ShardID, VPackValueLength>> reverseMapping;
auto workOnOneNode = [&shardMap, &ci, &collid, &collinfo, &reverseMapping](
VPackSlice const node, VPackValueLength const index) -> int {
// Sort out the _key attribute and identify the shard responsible for it.
StringRef _key(Transaction::extractKeyPart(node));
ShardID shardID;
if (_key.empty()) {
// We have invalid input at this point.
// However we can work with the other babies.
// This is for compatibility with single server
// We just asign it to any shard and pretend the user has given a key
std::shared_ptr<std::vector<ShardID>> shards = ci->getShardList(collid);
shardID = shards->at(0);
} else {
// Now find the responsible shard:
bool usesDefaultShardingAttributes;
int error = ci->getResponsibleShard(
collid, arangodb::basics::VelocyPackHelper::EmptyObjectValue(), true,
shardID, usesDefaultShardingAttributes, _key.toString());
if (error == TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND) {
return TRI_ERROR_CLUSTER_SHARD_GONE;
}
}
// We found the responsible shard. Add it to the list.
auto it = shardMap.find(shardID);
if (it == shardMap.end()) {
std::vector<VPackValueLength> counter({index});
shardMap.emplace(shardID, counter);
reverseMapping.emplace_back(shardID, 0);
} else {
it->second.emplace_back(index);
reverseMapping.emplace_back(shardID, it->second.size() - 1);
}
return TRI_ERROR_NO_ERROR;
};
if (useMultiple) {
for (VPackValueLength idx = 0; idx < slice.length(); ++idx) {
int res = workOnOneNode(slice.at(idx), idx);
if (res != TRI_ERROR_NO_ERROR) {
// Is early abortion correct?
return res;
}
}
} else {
int res = workOnOneNode(slice, 0);
if (res != TRI_ERROR_NO_ERROR) {
return res;
}
}
// We sorted the shards correctly.
// Now prepare the requests:
std::vector<ClusterCommRequest> requests;
auto body = std::make_shared<std::string>();
for (auto const& it : shardMap) {
if (!useMultiple) {
TRI_ASSERT(it.second.size() == 1);
body = std::make_shared<std::string>(slice.toJson());
} else {
reqBuilder.clear();
reqBuilder.openArray();
for (auto const& idx : it.second) {
reqBuilder.add(slice.at(idx));
}
reqBuilder.close();
body = std::make_shared<std::string>(reqBuilder.slice().toJson());
}
requests.emplace_back(
"shard:" + it.first,
arangodb::GeneralRequest::RequestType::DELETE_REQ,
baseUrl + StringUtils::urlEncode(it.first) + optsUrlPart, body);
}
// Perform the requests
size_t nrDone = 0;
cc->performRequests(requests, CL_DEFAULT_TIMEOUT, nrDone, Logger::REQUESTS);
// Now listen to the results:
if (!useMultiple) {
TRI_ASSERT(requests.size() == 1);
auto const& req = requests[0];
auto& res = req.result;
int commError = handleGeneralCommErrors(&res);
if (commError != TRI_ERROR_NO_ERROR) {
return commError;
}
responseCode = res.answer_code;
TRI_ASSERT(res.answer != nullptr);
auto parsedResult = res.answer->toVelocyPack(&VPackOptions::Defaults);
resultBody.swap(parsedResult);
return TRI_ERROR_NO_ERROR;
}
std::unordered_map<ShardID, std::shared_ptr<VPackBuilder>> resultMap;
collectResultsFromAllShards<VPackValueLength>(
shardMap, requests, errorCounter, resultMap, responseCode);
mergeResults(reverseMapping, resultMap, resultBody);
return TRI_ERROR_NO_ERROR; // the cluster operation was OK, however,
// the DBserver could have reported an error.
}
// slowpath we do not know which server is responsible ask all of them.
// We simply send the body to all shards and await their results.
// As soon as we have the results we merge them in the following way:
// For 1 .. slice.length()
// for res : allResults
// if res != NOT_FOUND => insert this result. skip other results
// end
// if (!skipped) => insert NOT_FOUND
auto body = std::make_shared<std::string>(slice.toJson());
std::vector<ClusterCommRequest> requests;
auto shardList = ci->getShardList(collid);
for (auto const& shard : *shardList) {
requests.emplace_back(
"shard:" + shard, arangodb::GeneralRequest::RequestType::DELETE_REQ,
baseUrl + StringUtils::urlEncode(shard) + optsUrlPart, body);
}
// Perform the requests
size_t nrDone = 0;
cc->performRequests(requests, CL_DEFAULT_TIMEOUT, nrDone, Logger::REQUESTS);
// Now listen to the results:
if (!useMultiple) {
// Only one can answer, we react a bit differently
size_t count;
int nrok = 0;
for (count = requests.size(); count > 0; count--) {
auto const& req = requests[count - 1];
auto res = req.result;
if (res.status == CL_COMM_RECEIVED) {
if (res.answer_code !=
arangodb::GeneralResponse::ResponseCode::NOT_FOUND ||
(nrok == 0 && count == 1)) {
nrok++;
responseCode = res.answer_code;
TRI_ASSERT(res.answer != nullptr);
auto parsedResult = res.answer->toVelocyPack(&VPackOptions::Defaults);
resultBody.swap(parsedResult);
}
}
}
// Note that nrok is always at least 1!
if (nrok > 1) {
return TRI_ERROR_CLUSTER_GOT_CONTRADICTING_ANSWERS;
}
return TRI_ERROR_NO_ERROR; // the cluster operation was OK, however,
// the DBserver could have reported an error.
}
// We select all results from all shards an merge them back again.
std::vector<std::shared_ptr<VPackBuilder>> allResults;
allResults.reserve(shardList->size());
// If no server responds we return 500
responseCode = GeneralResponse::ResponseCode::SERVER_ERROR;
for (auto const& req : requests) {
auto res = req.result;
int error = handleGeneralCommErrors(&res);
if (error != TRI_ERROR_NO_ERROR) {
// Local data structures are automatically freed
return error;
}
if (res.answer_code == GeneralResponse::ResponseCode::OK ||
res.answer_code == GeneralResponse::ResponseCode::ACCEPTED) {
responseCode = res.answer_code;
}
TRI_ASSERT(res.answer != nullptr);
allResults.emplace_back(res.answer->toVelocyPack(&VPackOptions::Defaults));
extractErrorCodes(res, errorCounter, false);
}
// If we get here we get exactly one result for every shard.
TRI_ASSERT(allResults.size() == shardList->size());
mergeResultsAllShards(allResults, resultBody, errorCounter,
static_cast<size_t>(slice.length()));
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief truncate a cluster collection on a coordinator
////////////////////////////////////////////////////////////////////////////////
int truncateCollectionOnCoordinator(std::string const& dbname,
std::string const& collname) {
// Set a few variables needed for our work:
ClusterInfo* ci = ClusterInfo::instance();
ClusterComm* cc = ClusterComm::instance();
// First determine the collection ID from the name:
std::shared_ptr<CollectionInfo> collinfo =
ci->getCollection(dbname, collname);
if (collinfo->empty()) {
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
}
// Some stuff to prepare cluster-intern requests:
// We have to contact everybody:
auto shards = collinfo->shardIds();
CoordTransactionID coordTransactionID = TRI_NewTickServer();
for (auto const& p : *shards) {
auto headers =
std::make_unique<std::unordered_map<std::string, std::string>>();
cc->asyncRequest("", coordTransactionID, "shard:" + p.first,
arangodb::GeneralRequest::RequestType::PUT,
"/_db/" + StringUtils::urlEncode(dbname) +
"/_api/collection/" + p.first + "/truncate",
std::shared_ptr<std::string>(), headers, nullptr, 60.0);
}
// Now listen to the results:
unsigned int count;
unsigned int nrok = 0;
for (count = (unsigned int)shards->size(); count > 0; count--) {
auto res = cc->wait("", coordTransactionID, 0, "", 0.0);
if (res.status == CL_COMM_RECEIVED) {
if (res.answer_code == arangodb::GeneralResponse::ResponseCode::OK) {
nrok++;
}
}
}
// Note that nrok is always at least 1!
if (nrok < shards->size()) {
return TRI_ERROR_CLUSTER_COULD_NOT_TRUNCATE_COLLECTION;
}
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get a document in a coordinator
////////////////////////////////////////////////////////////////////////////////
int getDocumentOnCoordinator(
std::string const& dbname, std::string const& collname,
VPackSlice const slice, OperationOptions const& options,
std::unique_ptr<std::unordered_map<std::string, std::string>>& headers,
arangodb::GeneralResponse::ResponseCode& responseCode,
std::unordered_map<int, size_t>& errorCounter,
std::shared_ptr<VPackBuilder>& resultBody) {
// Set a few variables needed for our work:
ClusterInfo* ci = ClusterInfo::instance();
ClusterComm* cc = ClusterComm::instance();
// First determine the collection ID from the name:
std::shared_ptr<CollectionInfo> collinfo =
ci->getCollection(dbname, collname);
if (collinfo->empty()) {
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
}
std::string collid = StringUtils::itoa(collinfo->id());
// If _key is the one and only sharding attribute, we can do this quickly,
// because we can easily determine which shard is responsible for the
// document. Otherwise we have to contact all shards and ask them to
// delete the document. All but one will not know it.
// Now find the responsible shard(s)
ShardID shardID;
std::unordered_map<ShardID, std::vector<VPackValueLength>> shardMap;
std::vector<std::pair<ShardID, VPackValueLength>> reverseMapping;
bool useMultiple = slice.isArray();
int res = TRI_ERROR_NO_ERROR;
bool canUseFastPath = true;
if (useMultiple) {
VPackValueLength length = slice.length();
for (VPackValueLength idx = 0; idx < length; ++idx) {
res = distributeBabyOnShards(shardMap, ci, collid, collinfo,
reverseMapping, slice.at(idx), idx);
if (res != TRI_ERROR_NO_ERROR) {
canUseFastPath = false;
shardMap.clear();
reverseMapping.clear();
break;
}
}
} else {
res = distributeBabyOnShards(shardMap, ci, collid, collinfo, reverseMapping,
slice, 0);
if (res != TRI_ERROR_NO_ERROR) {
canUseFastPath = false;
}
}
// Some stuff to prepare cluster-internal requests:
std::string baseUrl =
"/_db/" + StringUtils::urlEncode(dbname) + "/_api/document/";
std::string optsUrlPart =
std::string("?ignoreRevs=") + (options.ignoreRevs ? "true" : "false");
arangodb::GeneralRequest::RequestType reqType;
if (!useMultiple) {
if (options.silent) {
reqType = arangodb::GeneralRequest::RequestType::HEAD;
} else {
reqType = arangodb::GeneralRequest::RequestType::GET;
}
} else {
reqType = arangodb::GeneralRequest::RequestType::PUT;
if (options.silent) {
optsUrlPart += std::string("&silent=true");
}
optsUrlPart += std::string("&onlyget=true");
}
if (canUseFastPath) {
// All shard keys are known in all documents.
// Contact all shards directly with the correct information.
VPackBuilder reqBuilder;
// Now prepare the requests:
std::vector<ClusterCommRequest> requests;
auto body = std::make_shared<std::string>();
for (auto const& it : shardMap) {
if (!useMultiple) {
TRI_ASSERT(it.second.size() == 1);
if (!options.ignoreRevs && slice.hasKey(StaticStrings::RevString)) {
headers->emplace("if-match",
slice.get(StaticStrings::RevString).copyString());
}
VPackSlice keySlice = slice;
if (slice.isObject()) {
keySlice = slice.get(StaticStrings::KeyString);
}
// We send to single endpoint
requests.emplace_back(
"shard:" + it.first, reqType,
baseUrl + StringUtils::urlEncode(it.first) + "/" +
StringUtils::urlEncode(keySlice.copyString()) +
optsUrlPart,
body);
requests[0].setHeaders(headers);
} else {
reqBuilder.clear();
reqBuilder.openArray();
for (auto const& idx : it.second) {
reqBuilder.add(slice.at(idx));
}
reqBuilder.close();
body = std::make_shared<std::string>(reqBuilder.slice().toJson());
// We send to Babies endpoint
requests.emplace_back(
"shard:" + it.first, reqType,
baseUrl + StringUtils::urlEncode(it.first) + optsUrlPart, body);
}
}
// Perform the requests
size_t nrDone = 0;
cc->performRequests(requests, CL_DEFAULT_TIMEOUT, nrDone, Logger::REQUESTS);
// Now listen to the results:
if (!useMultiple) {
TRI_ASSERT(requests.size() == 1);
auto const& req = requests[0];
auto res = req.result;
int commError = handleGeneralCommErrors(&res);
if (commError != TRI_ERROR_NO_ERROR) {
return commError;
}
responseCode = res.answer_code;
TRI_ASSERT(res.answer != nullptr);
auto parsedResult = res.answer->toVelocyPack(&VPackOptions::Defaults);
resultBody.swap(parsedResult);
return TRI_ERROR_NO_ERROR;
}
std::unordered_map<ShardID, std::shared_ptr<VPackBuilder>> resultMap;
collectResultsFromAllShards<VPackValueLength>(
shardMap, requests, errorCounter, resultMap, responseCode);
mergeResults(reverseMapping, resultMap, resultBody);
// the cluster operation was OK, however,
// the DBserver could have reported an error.
return TRI_ERROR_NO_ERROR;
}
// Not all shard keys are known in all documents.
// We contact all shards with the complete body and ignore NOT_FOUND
std::vector<ClusterCommRequest> requests;
auto shardList = ci->getShardList(collid);
if (!useMultiple) {
if (!options.ignoreRevs && slice.hasKey(StaticStrings::RevString)) {
headers->emplace("if-match",
slice.get(StaticStrings::RevString).copyString());
}
for (auto const& shard : *shardList) {
VPackSlice keySlice = slice;
if (slice.isObject()) {
keySlice = slice.get(StaticStrings::KeyString);
}
ClusterCommRequest req(
"shard:" + shard, reqType,
baseUrl + StringUtils::urlEncode(shard) + "/" +
StringUtils::urlEncode(keySlice.copyString()) +
optsUrlPart,
nullptr);
auto headersCopy =
std::make_unique<std::unordered_map<std::string, std::string>>(
*headers);
req.setHeaders(headersCopy);
requests.emplace_back(std::move(req));
}
} else {
auto body = std::make_shared<std::string>(slice.toJson());
for (auto const& shard : *shardList) {
requests.emplace_back(
"shard:" + shard, reqType,
baseUrl + StringUtils::urlEncode(shard) + optsUrlPart, body);
}
}
// Perform the requests
size_t nrDone = 0;
cc->performRequests(requests, CL_DEFAULT_TIMEOUT, nrDone, Logger::REQUESTS);
// Now listen to the results:
if (!useMultiple) {
// Only one can answer, we react a bit differently
size_t count;
int nrok = 0;
int commError = TRI_ERROR_NO_ERROR;
for (count = requests.size(); count > 0; count--) {
auto const& req = requests[count - 1];
auto res = req.result;
if (res.status == CL_COMM_RECEIVED) {
if (res.answer_code !=
arangodb::GeneralResponse::ResponseCode::NOT_FOUND ||
(nrok == 0 && count == 1 && commError == TRI_ERROR_NO_ERROR)) {
nrok++;
responseCode = res.answer_code;
TRI_ASSERT(res.answer != nullptr);
auto parsedResult = res.answer->toVelocyPack(&VPackOptions::Defaults);
resultBody.swap(parsedResult);
}
} else {
commError = handleGeneralCommErrors(&res);
}
}
if (nrok == 0) {
// This can only happen, if a commError was encountered!
return commError;
}
if (nrok > 1) {
return TRI_ERROR_CLUSTER_GOT_CONTRADICTING_ANSWERS;
}
return TRI_ERROR_NO_ERROR; // the cluster operation was OK, however,
// the DBserver could have reported an error.
}
// We select all results from all shards and merge them back again.
std::vector<std::shared_ptr<VPackBuilder>> allResults;
allResults.reserve(shardList->size());
// If no server responds we return 500
responseCode = GeneralResponse::ResponseCode::SERVER_ERROR;
for (auto const& req : requests) {
auto& res = req.result;
int error = handleGeneralCommErrors(&res);
if (error != TRI_ERROR_NO_ERROR) {
// Local data structores are automatically freed
return error;
}
if (res.answer_code == GeneralResponse::ResponseCode::OK ||
res.answer_code == GeneralResponse::ResponseCode::ACCEPTED) {
responseCode = res.answer_code;
}
TRI_ASSERT(res.answer != nullptr);
allResults.emplace_back(res.answer->toVelocyPack(&VPackOptions::Defaults));
extractErrorCodes(res, errorCounter, false);
}
// If we get here we get exactly one result for every shard.
TRI_ASSERT(allResults.size() == shardList->size());
mergeResultsAllShards(allResults, resultBody, errorCounter,
static_cast<size_t>(slice.length()));
return TRI_ERROR_NO_ERROR;
}
static void insertIntoShardMap(
ClusterInfo* ci, std::string const& dbname, std::string const& documentId,
std::unordered_map<ShardID, std::vector<std::string>>& shardMap) {
std::vector<std::string> splitId =
arangodb::basics::StringUtils::split(documentId, '/');
TRI_ASSERT(splitId.size() == 2);
// First determine the collection ID from the name:
std::shared_ptr<CollectionInfo> collinfo =
ci->getCollection(dbname, splitId[0]);
if (collinfo->empty()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND,
"Collection not found: " + splitId[0]);
}
std::string collid = StringUtils::itoa(collinfo->id());
if (collinfo->usesDefaultShardKeys()) {
// We only need add one resp. shard
VPackBuilder partial;
partial.openObject();
partial.add(StaticStrings::KeyString, VPackValue(splitId[1]));
partial.close();
bool usesDefaultShardingAttributes;
ShardID shardID;
int error = ci->getResponsibleShard(collid, partial.slice(), true, shardID,
usesDefaultShardingAttributes);
if (error != TRI_ERROR_NO_ERROR) {
THROW_ARANGO_EXCEPTION(error);
}
TRI_ASSERT(usesDefaultShardingAttributes); // If this is false the if
// condition should be false in
// the first place
auto it = shardMap.find(shardID);
if (it == shardMap.end()) {
shardMap.emplace(shardID, std::vector<std::string>({splitId[1]}));
} else {
it->second.push_back(splitId[1]);
}
} else {
// Sorry we do not know the responsible shard yet
// Ask all of them
auto shardList = ci->getShardList(collid);
for (auto const& shard : *shardList) {
auto it = shardMap.find(shard);
if (it == shardMap.end()) {
shardMap.emplace(shard, std::vector<std::string>({splitId[1]}));
} else {
it->second.push_back(splitId[1]);
}
}
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get a list of filtered documents in a coordinator
/// All found documents will be inserted into result.
/// After execution documentIds will contain all id's of documents
/// that could not be found.
////////////////////////////////////////////////////////////////////////////////
int getFilteredDocumentsOnCoordinator(
std::string const& dbname,
std::vector<traverser::TraverserExpression*> const& expressions,
std::unordered_set<std::string>& documentIds,
std::unordered_map<std::string, std::shared_ptr<VPackBuffer<uint8_t>>>&
result) {
// Set a few variables needed for our work:
ClusterInfo* ci = ClusterInfo::instance();
ClusterComm* cc = ClusterComm::instance();
std::unordered_map<ShardID, std::vector<std::string>> shardRequestMap;
for (auto const& doc : documentIds) {
try {
insertIntoShardMap(ci, dbname, doc, shardRequestMap);
} catch (...) {
}
}
// Now start the request.
// We do not have to care for shard attributes esp. shard by key.
// If it is by key the key was only added to one key list, if not
// it is contained multiple times.
std::vector<ClusterCommRequest> requests;
VPackBuilder bodyBuilder;
for (auto const& shard : shardRequestMap) {
bodyBuilder.clear();
bodyBuilder.openObject();
bodyBuilder.add("collection", VPackValue(shard.first));
bodyBuilder.add("keys", VPackValue(VPackValueType::Array));
for (auto const& key : shard.second) {
bodyBuilder.add(VPackValue(key));
}
bodyBuilder.close(); // keys
if (!expressions.empty()) {
bodyBuilder.add("filter", VPackValue(VPackValueType::Array));
for (auto const& e : expressions) {
e->toVelocyPack(bodyBuilder);
}
bodyBuilder.close(); // filter
}
bodyBuilder.close(); // Object
auto bodyString = std::make_shared<std::string>(bodyBuilder.toJson());
requests.emplace_back("shard:" + shard.first,
arangodb::GeneralRequest::RequestType::PUT,
"/_db/" + StringUtils::urlEncode(dbname) +
"/_api/simple/lookup-by-keys",
bodyString);
}
// Perform the requests
size_t nrDone = 0;
cc->performRequests(requests, CL_DEFAULT_TIMEOUT, nrDone, Logger::REQUESTS);
// All requests send, now collect results.
for (auto const& req : requests) {
auto& res = req.result;
if (res.status == CL_COMM_RECEIVED) {
std::shared_ptr<VPackBuilder> resultBody =
res.answer->toVelocyPack(&VPackOptions::Defaults);
VPackSlice resSlice = resultBody->slice();
if (!resSlice.isObject()) {
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_INTERNAL, "Received an invalid result in cluster.");
}
bool isError = arangodb::basics::VelocyPackHelper::getBooleanValue(
resSlice, "error", false);
if (isError) {
return arangodb::basics::VelocyPackHelper::getNumericValue<int>(
resSlice, "errorNum", TRI_ERROR_INTERNAL);
}
VPackSlice documents = resSlice.get("documents");
if (!documents.isArray()) {
THROW_ARANGO_EXCEPTION_MESSAGE(
TRI_ERROR_INTERNAL, "Received an invalid result in cluster.");
}
for (auto const& element : VPackArrayIterator(documents)) {
std::string id = arangodb::basics::VelocyPackHelper::getStringValue(
element, StaticStrings::IdString, "");
VPackBuilder tmp;
tmp.add(element);
result.emplace(id, tmp.steal());
}
VPackSlice filtered = resSlice.get("filtered");
if (filtered.isArray()) {
for (auto const& element : VPackArrayIterator(filtered)) {
if (element.isString()) {
std::string id = element.copyString();
documentIds.erase(id);
}
}
}
}
}
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief get all edges on coordinator using a Traverser Filter
////////////////////////////////////////////////////////////////////////////////
int getFilteredEdgesOnCoordinator(
std::string const& dbname, std::string const& collname,
std::string const& vertex, TRI_edge_direction_e const& direction,
std::vector<traverser::TraverserExpression*> const& expressions,
arangodb::GeneralResponse::ResponseCode& responseCode,
VPackBuilder& result) {
TRI_ASSERT(result.isOpenObject());
// Set a few variables needed for our work:
ClusterInfo* ci = ClusterInfo::instance();
ClusterComm* cc = ClusterComm::instance();
// First determine the collection ID from the name:
std::shared_ptr<CollectionInfo> collinfo =
ci->getCollection(dbname, collname);
if (collinfo->empty()) {
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
}
auto shards = collinfo->shardIds();
std::string queryParameters = "?vertex=" + StringUtils::urlEncode(vertex);
if (direction == TRI_EDGE_IN) {
queryParameters += "&direction=in";
} else if (direction == TRI_EDGE_OUT) {
queryParameters += "&direction=out";
}
auto reqBodyString = std::make_shared<std::string>();
if (!expressions.empty()) {
VPackBuilder bodyBuilder;
bodyBuilder.openArray();
for (auto& e : expressions) {
e->toVelocyPack(bodyBuilder);
}
bodyBuilder.close();
reqBodyString->append(bodyBuilder.toJson());
}
std::vector<ClusterCommRequest> requests;
std::string baseUrl = "/_db/" + StringUtils::urlEncode(dbname) + "/_api/edges/";
for (auto const& p : *shards) {
requests.emplace_back(
"shard:" + p.first, arangodb::GeneralRequest::RequestType::PUT,
baseUrl + StringUtils::urlEncode(p.first) + queryParameters,
reqBodyString);
}
// Perform the requests
size_t nrDone = 0;
cc->performRequests(requests, CL_DEFAULT_TIMEOUT, nrDone, Logger::REQUESTS);
size_t filtered = 0;
size_t scannedIndex = 0;
responseCode = arangodb::GeneralResponse::ResponseCode::OK;
result.add("edges", VPackValue(VPackValueType::Array));
// All requests send, now collect results.
for (auto const& req : requests) {
auto& res = req.result;
int error = handleGeneralCommErrors(&res);
if (error != TRI_ERROR_NO_ERROR) {
// Cluster is in bad state. Report.
return error;
}
TRI_ASSERT(res.answer != nullptr);
std::shared_ptr<VPackBuilder> shardResult = res.answer->toVelocyPack(&VPackOptions::Defaults);
if (shardResult == nullptr) {
return TRI_ERROR_INTERNAL;
}
VPackSlice shardSlice = shardResult->slice();
if (!shardSlice.isObject()) {
return TRI_ERROR_INTERNAL;
}
bool const isError = arangodb::basics::VelocyPackHelper::getBooleanValue(
shardSlice, "error", false);
if (isError) {
// shard returned an error
return arangodb::basics::VelocyPackHelper::getNumericValue<int>(
shardSlice, "errorNum", TRI_ERROR_INTERNAL);
}
VPackSlice docs = shardSlice.get("edges");
if (!docs.isArray()) {
return TRI_ERROR_INTERNAL;
}
for (auto const& doc : VPackArrayIterator(docs)) {
result.add(doc);
}
VPackSlice stats = shardSlice.get("stats");
if (stats.isObject()) {
filtered += arangodb::basics::VelocyPackHelper::getNumericValue<size_t>(
stats, "filtered", 0);
scannedIndex +=
arangodb::basics::VelocyPackHelper::getNumericValue<size_t>(
stats, "scannedIndex", 0);
}
}
result.close(); // edges
result.add("stats", VPackValue(VPackValueType::Object));
result.add("scannedIndex", VPackValue(scannedIndex));
result.add("filtered", VPackValue(filtered));
result.close(); // stats
// Leave outer Object open
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief modify a document in a coordinator
////////////////////////////////////////////////////////////////////////////////
int modifyDocumentOnCoordinator(
std::string const& dbname, std::string const& collname,
VPackSlice const& slice, arangodb::OperationOptions const& options,
bool isPatch,
std::unique_ptr<std::unordered_map<std::string, std::string>>& headers,
arangodb::GeneralResponse::ResponseCode& responseCode,
std::unordered_map<int, size_t>& errorCounter,
std::shared_ptr<VPackBuilder>& resultBody) {
// Set a few variables needed for our work:
ClusterInfo* ci = ClusterInfo::instance();
ClusterComm* cc = ClusterComm::instance();
// First determine the collection ID from the name:
std::shared_ptr<CollectionInfo> collinfo =
ci->getCollection(dbname, collname);
if (collinfo->empty()) {
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
}
std::string collid = StringUtils::itoa(collinfo->id());
// We have a fast path and a slow path. The fast path only asks one shard
// to do the job and the slow path asks them all and expects to get
// "not found" from all but one shard. We have to cover the following
// cases:
// isPatch == false (this is a "replace" operation)
// Here, the complete new document is given, we assume that we
// can read off the responsible shard, therefore can use the fast
// path, this is always true if _key is the one and only sharding
// attribute, however, if there is any other sharding attribute,
// it is possible that the user has changed the values in any of
// them, in that case we will get a "not found" or a "sharding
// attributes changed answer" in the fast path. In the first case
// we have to delegate to the slow path.
// isPatch == true (this is an "update" operation)
// In this case we might or might not have all sharding attributes
// specified in the partial document given. If _key is the one and
// only sharding attribute, it is always given, if not all sharding
// attributes are explicitly given (at least as value `null`), we must
// assume that the fast path cannot be used. If all sharding attributes
// are given, we first try the fast path, but might, as above,
// have to use the slow path after all.
ShardID shardID;
std::unordered_map<ShardID, std::vector<VPackValueLength>> shardMap;
std::vector<std::pair<ShardID, VPackValueLength>> reverseMapping;
bool useMultiple = slice.isArray();
int res = TRI_ERROR_NO_ERROR;
bool canUseFastPath = true;
if (useMultiple) {
VPackValueLength length = slice.length();
for (VPackValueLength idx = 0; idx < length; ++idx) {
res = distributeBabyOnShards(shardMap, ci, collid, collinfo,
reverseMapping, slice.at(idx), idx);
if (res != TRI_ERROR_NO_ERROR) {
if (!isPatch) {
return res;
}
canUseFastPath = false;
shardMap.clear();
reverseMapping.clear();
break;
}
}
} else {
res = distributeBabyOnShards(shardMap, ci, collid, collinfo, reverseMapping,
slice, 0);
if (res != TRI_ERROR_NO_ERROR) {
if (!isPatch) {
return res;
}
canUseFastPath = false;
}
}
// Some stuff to prepare cluster-internal requests:
std::string baseUrl =
"/_db/" + StringUtils::urlEncode(dbname) + "/_api/document/";
std::string optsUrlPart =
std::string("?waitForSync=") + (options.waitForSync ? "true" : "false");
optsUrlPart +=
std::string("&ignoreRevs=") + (options.ignoreRevs ? "true" : "false");
arangodb::GeneralRequest::RequestType reqType;
if (isPatch) {
reqType = arangodb::GeneralRequest::RequestType::PATCH;
if (!options.keepNull) {
optsUrlPart += "&keepNull=false";
}
if (options.mergeObjects) {
optsUrlPart += "&mergeObjects=true";
} else {
optsUrlPart += "&mergeObjects=false";
}
} else {
reqType = arangodb::GeneralRequest::RequestType::PUT;
}
if (options.returnNew) {
optsUrlPart += "&returnNew=true";
}
if (options.returnOld) {
optsUrlPart += "&returnOld=true";
}
if (canUseFastPath) {
// All shard keys are known in all documents.
// Contact all shards directly with the correct information.
std::vector<ClusterCommRequest> requests;
VPackBuilder reqBuilder;
auto body = std::make_shared<std::string>();
for (auto const& it : shardMap) {
if (!useMultiple) {
TRI_ASSERT(it.second.size() == 1);
body = std::make_shared<std::string>(slice.toJson());
// We send to single endpoint
requests.emplace_back(
"shard:" + it.first, reqType,
baseUrl + StringUtils::urlEncode(it.first) + "/" +
slice.get(StaticStrings::KeyString).copyString() + optsUrlPart,
body);
} else {
reqBuilder.clear();
reqBuilder.openArray();
for (auto const& idx : it.second) {
reqBuilder.add(slice.at(idx));
}
reqBuilder.close();
body = std::make_shared<std::string>(reqBuilder.slice().toJson());
// We send to Babies endpoint
requests.emplace_back(
"shard:" + it.first, reqType,
baseUrl + StringUtils::urlEncode(it.first) + optsUrlPart, body);
}
}
// Perform the requests
size_t nrDone = 0;
cc->performRequests(requests, CL_DEFAULT_TIMEOUT, nrDone, Logger::REQUESTS);
// Now listen to the results:
if (!useMultiple) {
TRI_ASSERT(requests.size() == 1);
auto res = requests[0].result;
int commError = handleGeneralCommErrors(&res);
if (commError != TRI_ERROR_NO_ERROR) {
return commError;
}
responseCode = res.answer_code;
TRI_ASSERT(res.answer != nullptr);
auto parsedResult = res.answer->toVelocyPack(&VPackOptions::Defaults);
resultBody.swap(parsedResult);
return TRI_ERROR_NO_ERROR;
}
std::unordered_map<ShardID, std::shared_ptr<VPackBuilder>> resultMap;
collectResultsFromAllShards<VPackValueLength>(
shardMap, requests, errorCounter, resultMap, responseCode);
mergeResults(reverseMapping, resultMap, resultBody);
// the cluster operation was OK, however,
// the DBserver could have reported an error.
return TRI_ERROR_NO_ERROR;
}
// Not all shard keys are known in all documents.
// We contact all shards with the complete body and ignore NOT_FOUND
std::vector<ClusterCommRequest> requests;
auto body = std::make_shared<std::string>(slice.toJson());
auto shardList = ci->getShardList(collid);
if (!useMultiple) {
std::string key = slice.get(StaticStrings::KeyString).copyString();
for (auto const& shard : *shardList) {
requests.emplace_back(
"shard:" + shard, reqType,
baseUrl + StringUtils::urlEncode(shard) + "/" + key + optsUrlPart,
body);
}
} else {
for (auto const& shard : *shardList) {
requests.emplace_back(
"shard:" + shard, reqType,
baseUrl + StringUtils::urlEncode(shard) + optsUrlPart, body);
}
}
// Perform the requests
size_t nrDone = 0;
cc->performRequests(requests, CL_DEFAULT_TIMEOUT, nrDone, Logger::REQUESTS);
// Now listen to the results:
if (!useMultiple) {
// Only one can answer, we react a bit differently
int nrok = 0;
int commError = TRI_ERROR_NO_ERROR;
for (size_t count = shardList->size(); count > 0; count--) {
auto const& req = requests[count - 1];
auto res = req.result;
if (res.status == CL_COMM_RECEIVED) {
if (res.answer_code !=
arangodb::GeneralResponse::ResponseCode::NOT_FOUND ||
(nrok == 0 && count == 1 && commError == TRI_ERROR_NO_ERROR)) {
nrok++;
responseCode = res.answer_code;
TRI_ASSERT(res.answer != nullptr);
auto parsedResult = res.answer->toVelocyPack(&VPackOptions::Defaults);
resultBody.swap(parsedResult);
}
} else {
commError = handleGeneralCommErrors(&res);
}
}
if (nrok == 0) {
// This can only happen, if a commError was encountered!
return commError;
}
if (nrok > 1) {
return TRI_ERROR_CLUSTER_GOT_CONTRADICTING_ANSWERS;
}
return TRI_ERROR_NO_ERROR; // the cluster operation was OK, however,
// the DBserver could have reported an error.
}
responseCode = GeneralResponse::ResponseCode::SERVER_ERROR;
// We select all results from all shards an merge them back again.
std::vector<std::shared_ptr<VPackBuilder>> allResults;
allResults.reserve(requests.size());
for (auto const& req : requests) {
auto res = req.result;
int error = handleGeneralCommErrors(&res);
if (error != TRI_ERROR_NO_ERROR) {
// Cluster is in bad state. Just report.
// Local data structores are automatically freed
return error;
}
if (res.answer_code == GeneralResponse::ResponseCode::OK ||
res.answer_code == GeneralResponse::ResponseCode::ACCEPTED) {
responseCode = res.answer_code;
}
TRI_ASSERT(res.answer != nullptr);
allResults.emplace_back(res.answer->toVelocyPack(&VPackOptions::Defaults));
extractErrorCodes(res, errorCounter, false);
}
// If we get here we get exactly one result for every shard.
TRI_ASSERT(allResults.size() == shardList->size());
mergeResultsAllShards(allResults, resultBody, errorCounter,
static_cast<size_t>(slice.length()));
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief flush Wal on all DBservers
////////////////////////////////////////////////////////////////////////////////
int flushWalOnAllDBServers(bool waitForSync, bool waitForCollector) {
ClusterInfo* ci = ClusterInfo::instance();
ClusterComm* cc = ClusterComm::instance();
std::vector<ServerID> DBservers = ci->getCurrentDBServers();
CoordTransactionID coordTransactionID = TRI_NewTickServer();
std::string url = std::string("/_admin/wal/flush?waitForSync=") +
(waitForSync ? "true" : "false") + "&waitForCollector=" +
(waitForCollector ? "true" : "false");
auto body = std::make_shared<std::string const>();
for (auto it = DBservers.begin(); it != DBservers.end(); ++it) {
auto headers =
std::make_unique<std::unordered_map<std::string, std::string>>();
// set collection name (shard id)
cc->asyncRequest("", coordTransactionID, "server:" + *it,
arangodb::GeneralRequest::RequestType::PUT, url, body,
headers, nullptr, 120.0);
}
// Now listen to the results:
int count;
int nrok = 0;
for (count = (int)DBservers.size(); count > 0; count--) {
auto res = cc->wait("", coordTransactionID, 0, "", 0.0);
if (res.status == CL_COMM_RECEIVED) {
if (res.answer_code == arangodb::GeneralResponse::ResponseCode::OK) {
nrok++;
}
}
}
if (nrok != (int)DBservers.size()) {
return TRI_ERROR_INTERNAL;
}
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief compute a shard distribution for a new collection, the list
/// dbServers must be a list of DBserver ids to distribute across.
/// If this list is empty, the complete current list of DBservers is
/// fetched from ClusterInfo and with random_shuffle to mix it up.
////////////////////////////////////////////////////////////////////////////////
std::map<std::string, std::vector<std::string>> distributeShards(
uint64_t numberOfShards,
uint64_t replicationFactor,
std::vector<std::string>& dbServers) {
std::map<std::string, std::vector<std::string>> shards;
ClusterInfo* ci = ClusterInfo::instance();
if (dbServers.size() == 0) {
dbServers = ci->getCurrentDBServers();
if (dbServers.empty()) {
return shards;
}
random_shuffle(dbServers.begin(), dbServers.end());
}
// fetch a unique id for each shard to create
uint64_t const id = ci->uniqid(numberOfShards);
// now create the shards
size_t count = 0;
for (uint64_t i = 0; i < numberOfShards; ++i) {
// determine responsible server(s)
std::vector<std::string> serverIds;
for (uint64_t j = 0; j < replicationFactor; ++j) {
std::string candidate;
size_t count2 = 0;
bool found = true;
do {
candidate = dbServers[count++];
if (count >= dbServers.size()) {
count = 0;
}
if (++count2 == dbServers.size() + 1) {
LOG(WARN) << "createCollectionCoordinator: replicationFactor is "
"too large for the number of DBservers";
found = false;
break;
}
} while (std::find(serverIds.begin(), serverIds.end(), candidate) !=
serverIds.end());
if (found) {
serverIds.push_back(candidate);
}
}
// determine shard id
std::string shardId = "s" + StringUtils::itoa(id + 1 + i);
shards.insert(std::make_pair(shardId, serverIds));
}
return shards;
}
} // namespace arangodb