1
0
Fork 0

[3.4] Bug fix 3.4/allow agency ops in active failover (#9960)

* Backported #9881

* Fixed changelog

* Fixed merge
This commit is contained in:
Tobias Gödderz 2019-09-19 10:45:49 +02:00 committed by KVS85
parent 59a90f28c0
commit 78fad805b5
3 changed files with 98 additions and 106 deletions

View File

@ -1,6 +1,9 @@
v3.4.9 (XXXX-XX-XX)
-------------------
* Fixed "ArangoDB is not running in cluster mode" errors in active failover setups.
This affected at least /_admin/cluster/health.
* Harden database creation against spurious "duplicate name" errors that
were caused by other parallel operations lazily creating required
system collections in the same database.
@ -45,7 +48,6 @@ v3.4.9 (XXXX-XX-XX)
route /_api/replication/addFollower. The new field `syncerId` is also added to
the response of /_api/replication/logger-state.
v3.4.8 (2019-09-09)
-------------------

View File

@ -28,10 +28,12 @@
#include "Agency/AgencyComm.h"
#include "ApplicationFeatures/ApplicationServer.h"
#include "Basics/Exceptions.h"
#include "Cluster/ClusterComm.h"
#include "Cluster/ClusterInfo.h"
#include "Cluster/ServerState.h"
#include "GeneralServer/AuthenticationFeature.h"
#include "Replication/ReplicationFeature.h"
#include "Sharding/ShardDistributionReporter.h"
#include "V8/v8-buffer.h"
#include "V8/v8-conv.h"
@ -53,12 +55,24 @@ using namespace arangodb::basics;
CreateAgencyException(args, data); \
return;
#define ONLY_IN_CLUSTER \
if (!ServerState::instance()->isRunningInCluster()) { \
TRI_V8_THROW_EXCEPTION_INTERNAL( \
"ArangoDB is not running in cluster mode"); \
static void onlyInCluster() {
if (ServerState::instance()->isRunningInCluster()) {
return;
}
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "ArangoDB is not running in cluster mode");
}
static void onlyInClusterOrActiveFailover() {
auto replicationFeature = ReplicationFeature::INSTANCE;
if (replicationFeature != nullptr && replicationFeature->isActiveFailoverEnabled()) {
// active failover enabled
return;
}
return onlyInCluster();
}
static void CreateAgencyException(v8::FunctionCallbackInfo<v8::Value> const& args,
AgencyCommResult const& result) {
v8::Isolate* isolate = args.GetIsolate();
@ -100,7 +114,7 @@ static void JS_CasAgency(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER;
onlyInClusterOrActiveFailover();
if (args.Length() < 3) {
TRI_V8_THROW_EXCEPTION_USAGE(
@ -162,7 +176,7 @@ static void JS_CreateDirectoryAgency(v8::FunctionCallbackInfo<v8::Value> const&
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER;
onlyInClusterOrActiveFailover();
if (args.Length() != 1) {
TRI_V8_THROW_EXCEPTION_USAGE("createDirectory(<key>)");
@ -209,7 +223,7 @@ static void JS_IncreaseVersionAgency(v8::FunctionCallbackInfo<v8::Value> const&
TRI_V8_TRY_CATCH_BEGIN(isolate)
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER;
onlyInClusterOrActiveFailover();
if (args.Length() != 1) {
TRI_V8_THROW_EXCEPTION_USAGE("increaseVersion(<key>)");
@ -235,7 +249,7 @@ static void JS_GetAgency(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate)
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER;
onlyInClusterOrActiveFailover();
if (args.Length() < 1) {
TRI_V8_THROW_EXCEPTION_USAGE("get(<key>)");
@ -277,7 +291,7 @@ static void JS_APIAgency(std::string const& envelope,
TRI_V8_TRY_CATCH_BEGIN(isolate)
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER;
onlyInClusterOrActiveFailover();
if (args.Length() < 1) {
TRI_V8_THROW_EXCEPTION_USAGE(std::string(envelope) + "([[...]])");
@ -342,7 +356,7 @@ static void JS_RemoveAgency(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER;
onlyInClusterOrActiveFailover();
if (args.Length() < 1) {
TRI_V8_THROW_EXCEPTION_USAGE("remove(<key>, <recursive>)");
@ -374,7 +388,7 @@ static void JS_SetAgency(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER;
onlyInClusterOrActiveFailover();
if (args.Length() < 2) {
TRI_V8_THROW_EXCEPTION_USAGE("set(<key>, <value>, <ttl>)");
@ -413,7 +427,7 @@ static void JS_Agency(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate)
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER;
onlyInClusterOrActiveFailover();
if (args.Length() > 0) {
TRI_V8_THROW_EXCEPTION_USAGE("agency()");
@ -456,7 +470,7 @@ static void JS_EndpointsAgency(v8::FunctionCallbackInfo<v8::Value> const& args)
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER;
onlyInClusterOrActiveFailover();
if (args.Length() != 0) {
TRI_V8_THROW_EXCEPTION_USAGE("endpoints()");
@ -501,7 +515,7 @@ static void JS_UniqidAgency(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER;
onlyInClusterOrActiveFailover();
if (args.Length() > 2) {
TRI_V8_THROW_EXCEPTION_USAGE("uniqid(<count>, <timeout>)");
@ -538,7 +552,7 @@ static void JS_VersionAgency(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER;
onlyInClusterOrActiveFailover();
if (args.Length() != 0) {
TRI_V8_THROW_EXCEPTION_USAGE("version()");
@ -559,7 +573,8 @@ static void JS_DoesDatabaseExistClusterInfo(v8::FunctionCallbackInfo<v8::Value>
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() != 1) {
TRI_V8_THROW_EXCEPTION_USAGE("doesDatabaseExist(<database-id>)");
}
@ -582,11 +597,12 @@ static void JS_Databases(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
onlyInCluster();
if (args.Length() != 0) {
TRI_V8_THROW_EXCEPTION_USAGE("databases()");
}
ONLY_IN_CLUSTER
std::vector<DatabaseID> res = ClusterInfo::instance()->databases(true);
v8::Handle<v8::Array> a = v8::Array::New(isolate, (int)res.size());
std::vector<DatabaseID>::iterator it;
@ -606,7 +622,8 @@ static void JS_FlushClusterInfo(v8::FunctionCallbackInfo<v8::Value> const& args)
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() != 0) {
TRI_V8_THROW_EXCEPTION_USAGE("flush()");
}
@ -625,7 +642,8 @@ static void JS_GetCollectionInfoClusterInfo(v8::FunctionCallbackInfo<v8::Value>
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() != 2) {
TRI_V8_THROW_EXCEPTION_USAGE(
"getCollectionInfo(<database-id>, <collection-id>)");
@ -694,7 +712,8 @@ static void JS_GetCollectionInfoCurrentClusterInfo(v8::FunctionCallbackInfo<v8::
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() != 3) {
TRI_V8_THROW_EXCEPTION_USAGE(
"getCollectionInfoCurrent(<database-id>, <collection-id>, <shardID>)");
@ -770,7 +789,8 @@ static void JS_GetResponsibleServerClusterInfo(v8::FunctionCallbackInfo<v8::Valu
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() != 1) {
TRI_V8_THROW_EXCEPTION_USAGE("getResponsibleServer(<shard-id>)");
}
@ -795,7 +815,8 @@ static void JS_GetResponsibleShardClusterInfo(v8::FunctionCallbackInfo<v8::Value
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() < 2 || args.Length() > 3) {
TRI_V8_THROW_EXCEPTION_USAGE(
"getResponsibleShard(<collection-id>, <document>, "
@ -858,7 +879,8 @@ static void JS_GetServerEndpointClusterInfo(v8::FunctionCallbackInfo<v8::Value>
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() != 1) {
TRI_V8_THROW_EXCEPTION_USAGE("getServerEndpoint(<server-id>)");
}
@ -878,7 +900,8 @@ static void JS_GetServerNameClusterInfo(v8::FunctionCallbackInfo<v8::Value> cons
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() != 1) {
TRI_V8_THROW_EXCEPTION_USAGE("getServerName(<endpoint>)");
}
@ -898,7 +921,8 @@ static void JS_GetDBServers(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() != 0) {
TRI_V8_THROW_EXCEPTION_USAGE("getDBServers()");
}
@ -939,7 +963,8 @@ static void JS_ReloadDBServers(v8::FunctionCallbackInfo<v8::Value> const& args)
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() != 0) {
TRI_V8_THROW_EXCEPTION_USAGE("reloadDBServers()");
}
@ -957,7 +982,7 @@ static void JS_GetCoordinators(v8::FunctionCallbackInfo<v8::Value> const& args)
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() != 0) {
TRI_V8_THROW_EXCEPTION_USAGE("getCoordinators()");
@ -1037,7 +1062,8 @@ static void JS_IdServerState(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInClusterOrActiveFailover();
if (args.Length() != 0) {
TRI_V8_THROW_EXCEPTION_USAGE("id()");
}
@ -1125,40 +1151,6 @@ static void JS_GetFoxxmasterSince(v8::FunctionCallbackInfo<v8::Value> const& arg
TRI_V8_TRY_CATCH_END
}
////////////////////////////////////////////////////////////////////////////////
/// @brief return the primary servers id (only for secondaries)
////////////////////////////////////////////////////////////////////////////////
static void JS_IdOfPrimaryServerState(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
if (args.Length() != 0) {
TRI_V8_THROW_EXCEPTION_USAGE("idOfPrimary()");
}
TRI_V8_RETURN_STRING(""); // no more secondaries
TRI_V8_TRY_CATCH_END
}
////////////////////////////////////////////////////////////////////////////////
/// @brief returns the javascript startup path
////////////////////////////////////////////////////////////////////////////////
static void JS_JavaScriptPathServerState(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
if (args.Length() != 0) {
TRI_V8_THROW_EXCEPTION_USAGE("javaScriptPath()");
}
std::string const path = ServerState::instance()->getJavaScriptPath();
TRI_V8_RETURN_STD_STRING(path);
TRI_V8_TRY_CATCH_END
}
////////////////////////////////////////////////////////////////////////////////
/// @brief return whether the cluster is initialized
////////////////////////////////////////////////////////////////////////////////
@ -1240,29 +1232,6 @@ static void JS_SetRoleServerState(v8::FunctionCallbackInfo<v8::Value> const& arg
TRI_V8_TRY_CATCH_END
}
////////////////////////////////////////////////////////////////////////////////
/// @brief redetermines the role from the agency
////////////////////////////////////////////////////////////////////////////////
static void JS_RedetermineRoleServerState(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
if (args.Length() != 0) {
TRI_V8_THROW_EXCEPTION_USAGE("redetermineRole()");
}
/*bool changed = ServerState::instance()->redetermineRole();
if (changed) {
TRI_V8_RETURN_TRUE();
} else {
}*/
TRI_V8_RETURN_FALSE();
TRI_V8_TRY_CATCH_END
}
////////////////////////////////////////////////////////////////////////////////
/// @brief returns the server state
////////////////////////////////////////////////////////////////////////////////
@ -1297,7 +1266,8 @@ static void PrepareClusterCommRequest(
v8::Isolate* isolate = args.GetIsolate();
TRI_V8_CURRENT_GLOBALS_AND_SCOPE;
ONLY_IN_CLUSTER
onlyInClusterOrActiveFailover();
TRI_ASSERT(args.Length() >= 4);
reqType = arangodb::rest::RequestType::GET;
@ -1544,7 +1514,8 @@ static void Return_PrepareClusterCommResultForJS(v8::FunctionCallbackInfo<v8::Va
static void JS_AsyncRequest(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInClusterOrActiveFailover();
if (args.Length() < 4 || args.Length() > 7) {
TRI_V8_THROW_EXCEPTION_USAGE(
@ -1604,7 +1575,8 @@ static void JS_AsyncRequest(v8::FunctionCallbackInfo<v8::Value> const& args) {
static void JS_SyncRequest(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() < 4 || args.Length() > 7) {
TRI_V8_THROW_EXCEPTION_USAGE(
@ -1668,7 +1640,8 @@ static void JS_SyncRequest(v8::FunctionCallbackInfo<v8::Value> const& args) {
static void JS_Enquire(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
v8::HandleScope scope(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() != 1) {
TRI_V8_THROW_EXCEPTION_USAGE("enquire(operationID)");
@ -1699,7 +1672,8 @@ static void JS_Enquire(v8::FunctionCallbackInfo<v8::Value> const& args) {
static void JS_Wait(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
TRI_V8_CURRENT_GLOBALS_AND_SCOPE;
ONLY_IN_CLUSTER
onlyInClusterOrActiveFailover();
if (args.Length() != 1) {
TRI_V8_THROW_EXCEPTION_USAGE("wait(obj)");
@ -1767,7 +1741,8 @@ static void JS_Wait(v8::FunctionCallbackInfo<v8::Value> const& args) {
static void JS_Drop(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
TRI_V8_CURRENT_GLOBALS_AND_SCOPE;
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() != 1) {
TRI_V8_THROW_EXCEPTION_USAGE("drop(obj)");
@ -1872,7 +1847,8 @@ static void JS_ClusterDownload(v8::FunctionCallbackInfo<v8::Value> const& args)
static void JS_GetShardDistribution(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
v8::HandleScope scope(isolate);
auto& vocbase = GetContextVocBase(isolate);
@ -1891,7 +1867,8 @@ static void JS_GetShardDistribution(v8::FunctionCallbackInfo<v8::Value> const& a
static void JS_GetCollectionShardDistribution(v8::FunctionCallbackInfo<v8::Value> const& args) {
TRI_V8_TRY_CATCH_BEGIN(isolate);
ONLY_IN_CLUSTER
onlyInCluster();
if (args.Length() != 1) {
TRI_V8_THROW_EXCEPTION_USAGE(
@ -2046,11 +2023,6 @@ void TRI_InitV8Cluster(v8::Isolate* isolate, v8::Handle<v8::Context> context) {
TRI_AddMethodVocbase(isolate, rt,
TRI_V8_ASCII_STRING(isolate, "getFoxxmasterSince"),
JS_GetFoxxmasterSince);
TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING(isolate, "idOfPrimary"),
JS_IdOfPrimaryServerState);
TRI_AddMethodVocbase(isolate, rt,
TRI_V8_ASCII_STRING(isolate, "javaScriptPath"),
JS_JavaScriptPathServerState);
TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING(isolate, "initialized"),
JS_InitializedServerState);
TRI_AddMethodVocbase(isolate, rt,
@ -2059,9 +2031,6 @@ void TRI_InitV8Cluster(v8::Isolate* isolate, v8::Handle<v8::Context> context) {
TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING(isolate, "role"), JS_RoleServerState);
TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING(isolate, "setRole"),
JS_SetRoleServerState, true);
TRI_AddMethodVocbase(isolate, rt,
TRI_V8_ASCII_STRING(isolate, "redetermineRole"),
JS_RedetermineRoleServerState, true);
TRI_AddMethodVocbase(isolate, rt, TRI_V8_ASCII_STRING(isolate, "status"), JS_StatusServerState);
v8g->ServerStateTempl.Reset(isolate, rt);

View File

@ -26,7 +26,8 @@
const jsunity = require('jsunity');
const internal = require('internal');
const fs = require('fs');
const console = require('console');
const expect = require('chai').expect;
const arangosh = require('@arangodb/arangosh');
const crypto = require('@arangodb/crypto');
@ -74,12 +75,12 @@ function getUrl(endpoint) {
function baseUrl() {
return getUrl(arango.getEndpoint());
};
}
function connectToServer(leader) {
arango.reconnect(leader, "_system", "root", "");
db._flushCache();
};
}
// getEndponts works with any server
function getClusterEndpoints() {
@ -471,7 +472,7 @@ function ActiveFailoverSuite() {
assertTrue(checkInSync(currentLead, servers));
assertEqual(checkData(currentLead), 10000);
}
},
// Try to cleanup everything that was created
/*testCleanup: function () {
@ -488,6 +489,26 @@ function ActiveFailoverSuite() {
assertTrue(checkInSync(lead, servers));
}*/
// Regression test. This endpoint was broken due to added checks in v8-cluster.cpp,
// which allowed certain calls only in cluster mode, but not in active failover.
testClusterHealth: function () {
console.warn({currentLead: getUrl(currentLead)});
const res = request.get({
url: getUrl(currentLead) + "/_admin/cluster/health",
auth: {
bearer: jwtRoot,
},
timeout: 30
});
console.warn(JSON.stringify(res));
console.warn(res.json);
expect(res).to.be.an.instanceof(request.Response);
// expect(res).to.be.have.property('statusCode', 200);
expect(res).to.have.property('json');
expect(res.json).to.include({error: false, code: 200});
expect(res.json).to.have.property('Health');
},
};
}