diff --git a/arangod/Cluster/ServerState.cpp b/arangod/Cluster/ServerState.cpp index f81e9adda1..657051d7ec 100644 --- a/arangod/Cluster/ServerState.cpp +++ b/arangod/Cluster/ServerState.cpp @@ -665,6 +665,36 @@ void ServerState::setDisableDispatcherKickstarter (bool value) { _disableDispatcherKickstarter = value; } +//////////////////////////////////////////////////////////////////////////////// +/// @brief redetermine the server role, we do this after a plan change. +/// This is needed for automatic failover. This calls determineRole with +/// previous values of _info and _id. In particular, the _id will usually +/// already be set. If the current role cannot be determined from the +/// agency or is not unique, then the system keeps the old role. +/// Returns true if there is a change and false otherwise. +//////////////////////////////////////////////////////////////////////////////// + +bool ServerState::redetermineRole () { + std::string saveIdOfPrimary = _idOfPrimary; + RoleEnum role = determineRole(_localInfo, _id); + std::string roleString = roleToString(role); + LOG_INFO("Redetermined role from agency: %s", roleString.c_str()); + if (role == ServerState::ROLE_UNDEFINED) { + return false; + } + RoleEnum oldRole = loadRole(); + if (role != oldRole) { + LOG_INFO("Changed role to: %s", roleString.c_str()); + storeRole(role); + return true; + } + if (_idOfPrimary != saveIdOfPrimary) { + LOG_INFO("The ID of our primary has changed!"); + return true; + } + return false; +} + // ----------------------------------------------------------------------------- // --SECTION-- private methods // ----------------------------------------------------------------------------- diff --git a/arangod/Cluster/ServerState.h b/arangod/Cluster/ServerState.h index a4f1aef692..94149f1cbd 100644 --- a/arangod/Cluster/ServerState.h +++ b/arangod/Cluster/ServerState.h @@ -399,6 +399,17 @@ namespace triagens { void setDisableDispatcherKickstarter (bool); +//////////////////////////////////////////////////////////////////////////////// +/// @brief redetermine the server role, we do this after a plan change. +/// This is needed for automatic failover. This calls determineRole with +/// previous values of _info and _id. In particular, the _id will usually +/// already be set. If the current role cannot be determined from the +/// agency or is not unique, then the system keeps the old role. +/// Returns true if there is a change and false otherwise. +//////////////////////////////////////////////////////////////////////////////// + + bool redetermineRole (); + // ----------------------------------------------------------------------------- // --SECTION-- private methods // ----------------------------------------------------------------------------- diff --git a/arangod/Cluster/v8-cluster.cpp b/arangod/Cluster/v8-cluster.cpp index 14071f4dad..efcafc6704 100644 --- a/arangod/Cluster/v8-cluster.cpp +++ b/arangod/Cluster/v8-cluster.cpp @@ -1511,6 +1511,28 @@ static void JS_SetRoleServerState (const v8::FunctionCallbackInfo& ar TRI_V8_TRY_CATCH_END } +//////////////////////////////////////////////////////////////////////////////// +/// @brief redetermines the role from the agency +//////////////////////////////////////////////////////////////////////////////// + +static void JS_RedetermineRoleServerState (const v8::FunctionCallbackInfo& args) { + TRI_V8_TRY_CATCH_BEGIN(isolate); + v8::HandleScope scope(isolate); + + if (args.Length() != 0) { + TRI_V8_THROW_EXCEPTION_USAGE("redetermineRole()"); + } + + bool changed = ServerState::instance()->redetermineRole(); + if (changed) { + TRI_V8_RETURN_TRUE(); + } + else { + TRI_V8_RETURN_FALSE(); + } + TRI_V8_TRY_CATCH_END +} + //////////////////////////////////////////////////////////////////////////////// /// @brief returns the server state //////////////////////////////////////////////////////////////////////////////// @@ -2201,6 +2223,7 @@ void TRI_InitV8Cluster (v8::Isolate* isolate, v8::Handle context) { TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("setLocalInfo"), JS_SetLocalInfoServerState, true); TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("setId"), JS_SetIdServerState, true); TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("setRole"), JS_SetRoleServerState, true); + TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("redetermineRole"), JS_RedetermineRoleServerState, true); TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("status"), JS_StatusServerState); TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING("getClusterAuthentication"), JS_GetClusterAuthentication); diff --git a/js/server/modules/org/arangodb/cluster.js b/js/server/modules/org/arangodb/cluster.js index 1ec397f0e7..74c55ed900 100644 --- a/js/server/modules/org/arangodb/cluster.js +++ b/js/server/modules/org/arangodb/cluster.js @@ -829,13 +829,97 @@ function setupReplication () { db._useDatabase("_system"); } +//////////////////////////////////////////////////////////////////////////////// +/// @brief role change from secondary to primary +//////////////////////////////////////////////////////////////////////////////// + +function secondaryToPrimary () { + console.info("Switching role from secondary to primary..."); + var db = require("internal").db; + var rep = require("org/arangodb/replication"); + var dbs = db._listDatabases(); + var i; + try { + for (i = 0; i < dbs.length; i++) { + var database = dbs[i]; + console.info("Stopping asynchronous replication for db " + + database + "..."); + db._useDatabase(database); + var state = rep.applier.state(); + if (state.state.running === true) { + try { + rep.applier.stop(); + } + catch (err) { + console.info("Exception caught whilst stopping replication!"); + } + } + rep.applier.forget(); + } + } + finally { + db._useDatabase("_system"); + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// @brief role change from primary to secondary +//////////////////////////////////////////////////////////////////////////////// + +function primaryToSecondary () { + console.info("Switching role from primary to secondary..."); +} + //////////////////////////////////////////////////////////////////////////////// /// @brief change handling trampoline function //////////////////////////////////////////////////////////////////////////////// function handleChanges (plan, current) { - handleDatabaseChanges(plan, current); + var changed = false; var role = ArangoServerState.role(); + if (role === "PRIMARY" || role === "SECONDARY") { + // Need to check role change for automatic failover: + var myId = ArangoServerState.id(); + if (role === "PRIMARY") { + if (! plan.hasOwnProperty("Plan/DBServers/"+myId)) { + // Ooops! We do not seem to be a primary any more! + changed = ArangoServerState.redetermineRole(); + } + } + else { // role === "SECONDARY" + if (plan.hasOwnProperty("Plan/DBServers/"+myId)) { + // Ooops! We are now a primary! + changed = ArangoServerState.redetermineRole(); + } + else { + var found = null; + var p; + for (p in plan) { + if (plan.hasOwnProperty(p) && plan[p] === myId) { + found = p; + break; + } + } + if (found !== ArangoServerState.idOfPrimary()) { + // Note this includes the case that we are not found at all! + changed = ArangoServerState.redetermineRole(); + } + } + } + } + var oldRole = role; + if (changed) { + role = ArangoServerState.role(); + console.log("Our role has changed to " + role); + if (oldRole === "SECONDARY" && role === "PRIMARY") { + secondaryToPrimary(); + } + else if (oldRole === "PRIMARY" && role === "SECONDARY") { + primaryToSecondary(); + } + } + + handleDatabaseChanges(plan, current); if (role === "PRIMARY" || role === "COORDINATOR") { // Note: This is only ever called for DBservers (primary and secondary), // we keep the coordinator case here just in case...