1
0
Fork 0

Merge branch 'sharding' of https://github.com/triAGENS/ArangoDB into sharding

This commit is contained in:
Michael Hackstein 2014-01-23 16:31:41 +01:00
commit 25dee09874
13 changed files with 136 additions and 21 deletions

View File

@ -807,7 +807,7 @@ void ClusterInfo::loadCurrentCollections (bool acquireLock) {
// Now take note of this shard and its responsible server: // Now take note of this shard and its responsible server:
std::string DBserver = triagens::basics::JsonHelper::getStringValue std::string DBserver = triagens::basics::JsonHelper::getStringValue
(json, "DBserver", ""); (json, "DBServer", "");
if (DBserver != "") { if (DBserver != "") {
_shardIds.insert(make_pair<ShardID, ServerID>(shardID, DBserver)); _shardIds.insert(make_pair<ShardID, ServerID>(shardID, DBserver));
} }
@ -1578,7 +1578,6 @@ ShardID ClusterInfo::getResponsibleShard (CollectionID const& collectionID,
int tries = 0; int tries = 0;
TRI_shared_ptr<vector<string> > shardKeysPtr; TRI_shared_ptr<vector<string> > shardKeysPtr;
char const** shardKeys = 0; char const** shardKeys = 0;
int nrShardKeys = 0;
TRI_shared_ptr<vector<ShardID> > shards; TRI_shared_ptr<vector<ShardID> > shards;
while (++tries <= 2) { while (++tries <= 2) {
@ -1610,7 +1609,8 @@ ShardID ClusterInfo::getResponsibleShard (CollectionID const& collectionID,
return string(""); return string("");
} }
uint64_t hash = TRI_HashJsonByAttributes(json, shardKeys, nrShardKeys); uint64_t hash = TRI_HashJsonByAttributes(json, shardKeys,
shardKeysPtr->size());
delete[] shardKeys; delete[] shardKeys;
return shards->at(hash % shards->size()); return shards->at(hash % shards->size());

View File

@ -595,7 +595,7 @@ namespace triagens {
if (it != _jsons.end()) { if (it != _jsons.end()) {
TRI_json_t* _json = _jsons.begin()->second; TRI_json_t* _json = _jsons.begin()->second;
return triagens::basics::JsonHelper::getStringValue return triagens::basics::JsonHelper::getStringValue
(_json, "DBserver", ""); (_json, "DBServer", "");
} }
return string(""); return string("");
} }

View File

@ -845,8 +845,7 @@ static v8::Handle<v8::Value> JS_GetCollectionInfoCurrentClusterInfo (v8::Argumen
result->Set(v8::String::New("waitForSync"), v8::Boolean::New(cic.waitForSync(shardID))); result->Set(v8::String::New("waitForSync"), v8::Boolean::New(cic.waitForSync(shardID)));
result->Set(v8::String::New("journalSize"), v8::Number::New(cic.journalSize(shardID))); result->Set(v8::String::New("journalSize"), v8::Number::New(cic.journalSize(shardID)));
const std::string serverID = cic.responsibleServer(shardID); const std::string serverID = cic.responsibleServer(shardID);
result->Set(v8::String::New("responsibleServer"), result->Set(v8::String::New("DBServer"), v8::String::New(serverID.c_str(), serverID.size()));
v8::String::New(serverID.c_str(), serverID.size()));
// TODO: fill "indexes" // TODO: fill "indexes"
v8::Handle<v8::Array> indexes = v8::Array::New(); v8::Handle<v8::Array> indexes = v8::Array::New();

View File

@ -325,6 +325,7 @@ bool RestDocumentHandler::createDocument () {
#ifdef TRI_ENABLE_CLUSTER #ifdef TRI_ENABLE_CLUSTER
if (ServerState::instance()->isCoordinator()) { if (ServerState::instance()->isCoordinator()) {
// json will be freed inside!
return createDocumentCoordinator(collection, waitForSync, json); return createDocumentCoordinator(collection, waitForSync, json);
} }
#endif #endif
@ -404,12 +405,74 @@ bool RestDocumentHandler::createDocument () {
bool RestDocumentHandler::createDocumentCoordinator (char const* collection, bool RestDocumentHandler::createDocumentCoordinator (char const* collection,
bool waitForSync, bool waitForSync,
TRI_json_t* json) { TRI_json_t* json) {
// Find collectionID from collection, which is the name // Set a few variables needed for our work:
// ask ClusterInfo for the responsible shard ClusterInfo* ci = ClusterInfo::instance();
// send a synchronous request to that shard using ClusterComm ClusterComm* cc = ClusterComm::instance();
// if not successful prepare error and return false string const& dbname = _request->originalDatabaseName();
// prepare successful answer (created or accepted depending on waitForSync) CollectionID const collname(collection);
return true;
// First determine the collection ID from the name:
CollectionInfo collinfo = ci->getCollection(dbname, collname);
string collid = StringUtils::itoa(collinfo.id());
// Now find the responsible shard:
ShardID shardID = ci->getResponsibleShard( collid, json, true );
if (shardID == "") {
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json);
generateTransactionError(collection, TRI_ERROR_SHARD_GONE);
return false;
}
// Now sort out the _key attribute:
// FIXME: we have to be cleverer here, depending on shard attributes
uint64_t uid = ci->uniqid();
string _key = triagens::basics::StringUtils::itoa(uid);
TRI_InsertArrayJson(TRI_UNKNOWN_MEM_ZONE, json, "_key",
TRI_CreateStringReference2Json(TRI_UNKNOWN_MEM_ZONE,
_key.c_str(), _key.size()));
string body = JsonHelper::toString(json);
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json);
// Send a synchronous request to that shard using ClusterComm:
ClusterCommResult* res;
map<string, string> headers;
res = cc->syncRequest("", TRI_NewTickServer(), "shard:"+shardID,
triagens::rest::HttpRequest::HTTP_REQUEST_POST,
"/_db/"+dbname+"/_api/document?collection="+
StringUtils::urlEncode(shardID)+"&waitForSync="+
(waitForSync ? "true" : "false"),
body.c_str(), body.size(), headers, 60.0);
if (res->status == CL_COMM_TIMEOUT) {
// No reply, we give up:
generateTransactionError(collection, TRI_ERROR_CLUSTER_TIMEOUT);
return false;
}
bool resultflag = true;
if (res->status == CL_COMM_ERROR) {
// This could be a broken connection or an Http error:
if (!res->result->isComplete()) {
generateTransactionError(collection, TRI_ERROR_CLUSTER_CONNECTION_LOST);
return false;
}
// In this case a proper HTTP error was reported by the DBserver,
// this can be 400 or 404, we simply forward the result.
resultflag = false;
// We intentionally fall through here.
}
_response = createResponse(
static_cast<rest::HttpResponse::HttpResponseCode>
(res->result->getHttpReturnCode()));
//cout << "CreateDoc: result code: " << res->result->getHttpReturnCode()
// << endl;
_response->setContentType(res->result->getContentType(false));
//cout << "CreateDoc: contentType: " << res->result->getContentType(false)
// << endl;
body = res->result->getBody().str(); // FIXME: a bad unnecessary copy!
//cout << "CreateDoc: body" << endl << body << endl;
//_response->body().appendText(body.c_str(), body.size());
return resultflag;
} }
#endif #endif

View File

@ -437,6 +437,16 @@ void RestVocbaseBaseHandler::generateTransactionError (const string& collectionN
generatePreconditionFailed(_resolver.getCollectionId(collectionName), key ? key : (TRI_voc_key_t) "unknown", rid); generatePreconditionFailed(_resolver.getCollectionId(collectionName), key ? key : (TRI_voc_key_t) "unknown", rid);
return; return;
#ifdef TRI_ENABLE_CLUSTER
case TRI_ERROR_SHARD_GONE:
generateError(HttpResponse::SERVER_ERROR, res,
"coordinator: no responsible shard found");
return;
case TRI_ERROR_CLUSTER_TIMEOUT:
generateError(HttpResponse::SERVER_ERROR, res);
return;
#endif
default: default:
generateError(HttpResponse::SERVER_ERROR, TRI_ERROR_INTERNAL, "failed with error: " + string(TRI_errno_string(res))); generateError(HttpResponse::SERVER_ERROR, TRI_ERROR_INTERNAL, "failed with error: " + string(TRI_errno_string(res)));
} }

View File

@ -25,5 +25,3 @@ echo start arangod with:
echo "Pavel: bin/arangod --cluster.my-id Pavel --cluster.agency-prefix $NAME --cluster.agency-endpoint tcp://127.0.0.1:4001 --server.endpoint tcp://127.0.0.1:8530 data-pavel" echo "Pavel: bin/arangod --cluster.my-id Pavel --cluster.agency-prefix $NAME --cluster.agency-endpoint tcp://127.0.0.1:4001 --server.endpoint tcp://127.0.0.1:8530 data-pavel"
echo "Perry: bin/arangod --cluster.my-id Perry --cluster.agency-prefix $NAME --cluster.agency-endpoint tcp://127.0.0.1:4001 --server.endpoint tcp://127.0.0.1:8531 data-perry" echo "Perry: bin/arangod --cluster.my-id Perry --cluster.agency-prefix $NAME --cluster.agency-endpoint tcp://127.0.0.1:4001 --server.endpoint tcp://127.0.0.1:8531 data-perry"
echo "Claus: bin/arangod --cluster.my-id Claus --cluster.agency-prefix $NAME --cluster.agency-endpoint tcp://127.0.0.1:4001 --server.endpoint tcp://127.0.0.1:8529 data-claus" echo "Claus: bin/arangod --cluster.my-id Claus --cluster.agency-prefix $NAME --cluster.agency-endpoint tcp://127.0.0.1:4001 --server.endpoint tcp://127.0.0.1:8529 data-claus"
echo test with:
echo curl -X GET http://localhost:8529/_admin/sharding-test/_admin/time

View File

@ -125,6 +125,8 @@
"ERROR_CLUSTER_COULD_NOT_CREATE_DATABASE" : { "code" : 1462, "message" : "could not create database" }, "ERROR_CLUSTER_COULD_NOT_CREATE_DATABASE" : { "code" : 1462, "message" : "could not create database" },
"ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_PLAN" : { "code" : 1463, "message" : "could not remove database from plan" }, "ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_PLAN" : { "code" : 1463, "message" : "could not remove database from plan" },
"ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_CURRENT" : { "code" : 1464, "message" : "could not remove database from current" }, "ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_CURRENT" : { "code" : 1464, "message" : "could not remove database from current" },
"ERROR_SHARD_GONE" : { "code" : 1465, "message" : "no responsible shard found" },
"ERROR_CLUSTER_CONNECTION_LOST" : { "code" : 1466, "message" : "cluster internal HTTP connection broken" },
"ERROR_QUERY_KILLED" : { "code" : 1500, "message" : "query killed" }, "ERROR_QUERY_KILLED" : { "code" : 1500, "message" : "query killed" },
"ERROR_QUERY_PARSE" : { "code" : 1501, "message" : "%s" }, "ERROR_QUERY_PARSE" : { "code" : 1501, "message" : "%s" },
"ERROR_QUERY_EMPTY" : { "code" : 1502, "message" : "query is empty" }, "ERROR_QUERY_EMPTY" : { "code" : 1502, "message" : "query is empty" },

View File

@ -125,6 +125,8 @@
"ERROR_CLUSTER_COULD_NOT_CREATE_DATABASE" : { "code" : 1462, "message" : "could not create database" }, "ERROR_CLUSTER_COULD_NOT_CREATE_DATABASE" : { "code" : 1462, "message" : "could not create database" },
"ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_PLAN" : { "code" : 1463, "message" : "could not remove database from plan" }, "ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_PLAN" : { "code" : 1463, "message" : "could not remove database from plan" },
"ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_CURRENT" : { "code" : 1464, "message" : "could not remove database from current" }, "ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_CURRENT" : { "code" : 1464, "message" : "could not remove database from current" },
"ERROR_SHARD_GONE" : { "code" : 1465, "message" : "no responsible shard found" },
"ERROR_CLUSTER_CONNECTION_LOST" : { "code" : 1466, "message" : "cluster internal HTTP connection broken" },
"ERROR_QUERY_KILLED" : { "code" : 1500, "message" : "query killed" }, "ERROR_QUERY_KILLED" : { "code" : 1500, "message" : "query killed" },
"ERROR_QUERY_PARSE" : { "code" : 1501, "message" : "%s" }, "ERROR_QUERY_PARSE" : { "code" : 1501, "message" : "%s" },
"ERROR_QUERY_EMPTY" : { "code" : 1502, "message" : "query is empty" }, "ERROR_QUERY_EMPTY" : { "code" : 1502, "message" : "query is empty" },

View File

@ -248,6 +248,9 @@ function createLocalDatabases (plannedDatabases) {
try { try {
db._createDatabase(payload.name); db._createDatabase(payload.name);
payload.error = false;
payload.errorNum = 0;
payload.errorMessage = "no error";
} }
catch (err) { catch (err) {
payload.error = true; payload.error = true;
@ -426,7 +429,7 @@ function createLocalCollections (plannedCollections) {
payload.errorMessage = err2.errorMessage; payload.errorMessage = err2.errorMessage;
} }
payload.DBserver = ourselves; payload.DBServer = ourselves;
writeLocked({ part: "Current" }, writeLocked({ part: "Current" },
createCollectionAgency, createCollectionAgency,
[ database, shard, payload ]); [ database, shard, payload ]);
@ -449,6 +452,10 @@ function createLocalCollections (plannedCollections) {
shard); shard);
db._collection(shard).load(); db._collection(shard).load();
} }
payload.error = false;
payload.errorNum = 0;
payload.errorMessage = "no error";
payload.DBServer = ourselves;
writeLocked({ part: "Current" }, writeLocked({ part: "Current" },
createCollectionAgency, createCollectionAgency,
@ -483,7 +490,7 @@ function createLocalCollections (plannedCollections) {
payload.errorMessage = err3.errorMessage; payload.errorMessage = err3.errorMessage;
} }
payload.DBserver = ourselves; payload.DBServer = ourselves;
writeLocked({ part: "Current" }, writeLocked({ part: "Current" },
createCollectionAgency, createCollectionAgency,
[ database, shard, payload ]); [ database, shard, payload ]);
@ -611,9 +618,11 @@ function cleanupCurrentCollections (plannedCollections) {
for (shard in shards) { for (shard in shards) {
if (shards.hasOwnProperty(shard)) { if (shards.hasOwnProperty(shard)) {
if (! shardMap.hasOwnProperty(shard) ||
shardMap[shard] !== ourselves) {
if (shards[shard].DBServer === ourselves &&
(! shardMap.hasOwnProperty(shard) ||
shardMap[shard] !== ourselves)) {
// found a shard we are entered for but that we don't have locally
console.info("cleaning up entry for unknown shard '%s' of '%s/%s", console.info("cleaning up entry for unknown shard '%s' of '%s/%s",
shard, shard,
database, database,

View File

@ -160,6 +160,8 @@ ERROR_CLUSTER_COULD_NOT_CREATE_DATABASE_IN_PLAN,1461,"could not create database
ERROR_CLUSTER_COULD_NOT_CREATE_DATABASE,1462,"could not create database","Will be raised when a coordinator in a cluster notices that some DBServers report problems when creating databases for a new cluster wide database." ERROR_CLUSTER_COULD_NOT_CREATE_DATABASE,1462,"could not create database","Will be raised when a coordinator in a cluster notices that some DBServers report problems when creating databases for a new cluster wide database."
ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_PLAN,1463,"could not remove database from plan","Will be raised when a coordinator in a cluster cannot remove an entry for a database in the Plan hierarchy in the agency." ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_PLAN,1463,"could not remove database from plan","Will be raised when a coordinator in a cluster cannot remove an entry for a database in the Plan hierarchy in the agency."
ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_CURRENT,1464,"could not remove database from current","Will be raised when a coordinator in a cluster cannot remove an entry for a database in the Current hierarchy in the agency." ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_CURRENT,1464,"could not remove database from current","Will be raised when a coordinator in a cluster cannot remove an entry for a database in the Current hierarchy in the agency."
ERROR_SHARD_GONE,1465,"no responsible shard found","Will be raised when a coordinator in a cluster cannot determine the shard that is responsible for a given document."
ERROR_CLUSTER_CONNECTION_LOST,1466,"cluster internal HTTP connection broken","Will be raised when a coordinator in a cluster loses an HTTP connection to a DBserver in the cluster whilst transferring data."
################################################################################ ################################################################################
## ArangoDB query errors ## ArangoDB query errors

View File

@ -203,7 +203,7 @@ void TRI_DestroyStringBuffer (TRI_string_buffer_t * self) {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief frees the string buffer and cleans the buffer /// @brief frees the string buffer and cleans the buffer
/// ///
/// @warning You must call free after or destroy using the string buffer. /// @warning You must call free or destroy after using the string buffer.
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void TRI_AnnihilateStringBuffer (TRI_string_buffer_t * self) { void TRI_AnnihilateStringBuffer (TRI_string_buffer_t * self) {

View File

@ -121,6 +121,8 @@ void TRI_InitialiseErrorMessages (void) {
REG_ERROR(ERROR_CLUSTER_COULD_NOT_CREATE_DATABASE, "could not create database"); REG_ERROR(ERROR_CLUSTER_COULD_NOT_CREATE_DATABASE, "could not create database");
REG_ERROR(ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_PLAN, "could not remove database from plan"); REG_ERROR(ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_PLAN, "could not remove database from plan");
REG_ERROR(ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_CURRENT, "could not remove database from current"); REG_ERROR(ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_CURRENT, "could not remove database from current");
REG_ERROR(ERROR_SHARD_GONE, "no responsible shard found");
REG_ERROR(ERROR_CLUSTER_CONNECTION_LOST, "cluster internal HTTP connection broken");
REG_ERROR(ERROR_QUERY_KILLED, "query killed"); REG_ERROR(ERROR_QUERY_KILLED, "query killed");
REG_ERROR(ERROR_QUERY_PARSE, "%s"); REG_ERROR(ERROR_QUERY_PARSE, "%s");
REG_ERROR(ERROR_QUERY_EMPTY, "query is empty"); REG_ERROR(ERROR_QUERY_EMPTY, "query is empty");

View File

@ -280,6 +280,12 @@ extern "C" {
/// - 1464: @LIT{could not remove database from current} /// - 1464: @LIT{could not remove database from current}
/// Will be raised when a coordinator in a cluster cannot remove an entry for /// Will be raised when a coordinator in a cluster cannot remove an entry for
/// a database in the Current hierarchy in the agency. /// a database in the Current hierarchy in the agency.
/// - 1465: @LIT{no responsible shard found}
/// Will be raised when a coordinator in a cluster cannot determine the shard
/// that is responsible for a given document.
/// - 1466: @LIT{cluster internal HTTP connection broken}
/// Will be raised when a coordinator in a cluster loses an HTTP connection
/// to a DBserver in the cluster whilst transferring data.
/// - 1500: @LIT{query killed} /// - 1500: @LIT{query killed}
/// Will be raised when a running query is killed by an explicit admin /// Will be raised when a running query is killed by an explicit admin
/// command. /// command.
@ -1624,6 +1630,28 @@ void TRI_InitialiseErrorMessages (void);
#define TRI_ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_CURRENT (1464) #define TRI_ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_CURRENT (1464)
////////////////////////////////////////////////////////////////////////////////
/// @brief 1465: ERROR_SHARD_GONE
///
/// no responsible shard found
///
/// Will be raised when a coordinator in a cluster cannot determine the shard
/// that is responsible for a given document.
////////////////////////////////////////////////////////////////////////////////
#define TRI_ERROR_SHARD_GONE (1465)
////////////////////////////////////////////////////////////////////////////////
/// @brief 1466: ERROR_CLUSTER_CONNECTION_LOST
///
/// cluster internal HTTP connection broken
///
/// Will be raised when a coordinator in a cluster loses an HTTP connection to
/// a DBserver in the cluster whilst transferring data.
////////////////////////////////////////////////////////////////////////////////
#define TRI_ERROR_CLUSTER_CONNECTION_LOST (1466)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief 1500: ERROR_QUERY_KILLED /// @brief 1500: ERROR_QUERY_KILLED
/// ///