diff --git a/CHANGELOG b/CHANGELOG index 0bf6f5fc9c..7d50666f0f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,9 @@ devel ----- +* added --cluster.system-replication-factor in order to adjust the + replication factor for new system collections + * fixed issue #2012 * added a memory expection in case V8 memory gets too low diff --git a/arangod/Cluster/ClusterFeature.cpp b/arangod/Cluster/ClusterFeature.cpp index d0479d8cf1..b350cbf5b3 100644 --- a/arangod/Cluster/ClusterFeature.cpp +++ b/arangod/Cluster/ClusterFeature.cpp @@ -38,6 +38,7 @@ #include "ProgramOptions/Section.h" #include "RestServer/DatabaseServerFeature.h" #include "SimpleHttpClient/ConnectionManager.h" +#include "V8Server/V8DealerFeature.h" #include "VocBase/server.h" using namespace arangodb; @@ -67,7 +68,7 @@ ClusterFeature::ClusterFeature(application_features::ApplicationServer* server) ClusterFeature::~ClusterFeature() { delete _heartbeatThread; - + if (_enableCluster) { AgencyComm::cleanup(); } @@ -126,6 +127,10 @@ void ClusterFeature::collectOptions(std::shared_ptr options) { options->addOption("--cluster.coordinator-config", "path to the coordinator configuration", new StringParameter(&_coordinatorConfig)); + + options->addOption("--cluster.system-replication-factor", + "replication factor for system collections", + new UInt32Parameter(&_systemReplicationFactor)); } void ClusterFeature::validateOptions(std::shared_ptr options) { @@ -180,6 +185,12 @@ void ClusterFeature::validateOptions(std::shared_ptr options) { FATAL_ERROR_EXIT(); } } + + // validate system-replication-factor + if (_systemReplicationFactor == 0) { + LOG(FATAL) << "system replication factor must be greater 0"; + FATAL_ERROR_EXIT(); + } } void ClusterFeature::prepare() { @@ -190,6 +201,12 @@ void ClusterFeature::prepare() { ServerState::instance()->setDBserverConfig(_dbserverConfig); ServerState::instance()->setCoordinatorConfig(_coordinatorConfig); + V8DealerFeature* v8Dealer = + ApplicationServer::getFeature("V8Dealer"); + + v8Dealer->defineDouble("SYS_DEFAULT_REPLICATION_FACTOR_SYSTEM", + _systemReplicationFactor); + // create callback registery _agencyCallbackRegistry.reset( new AgencyCallbackRegistry(agencyCallbacksPath())); @@ -203,8 +220,9 @@ void ClusterFeature::prepare() { // create an instance (this will not yet create a thread) ClusterComm::instance(); - AgencyFeature* agency = - application_features::ApplicationServer::getFeature("Agency"); + AgencyFeature* agency = + application_features::ApplicationServer::getFeature( + "Agency"); if (agency->isEnabled() || _enableCluster) { // initialize ClusterComm library, must call initialize only once @@ -335,12 +353,11 @@ void ClusterFeature::prepare() { << "' specified for --cluster.my-address"; FATAL_ERROR_EXIT(); } - } -//YYY #ifdef ARANGODB_ENABLE_MAINTAINER_MODE -//YYY #warning FRANK split into methods -//YYY #endif +// YYY #ifdef ARANGODB_ENABLE_MAINTAINER_MODE +// YYY #warning FRANK split into methods +// YYY #endif void ClusterFeature::start() { // return if cluster is disabled @@ -371,53 +388,49 @@ void ClusterFeature::start() { AgencyCommResult result = comm.getValues("Sync/HeartbeatIntervalMs"); if (result.successful()) { - velocypack::Slice HeartbeatIntervalMs = - result.slice()[0].get(std::vector( - {AgencyComm::prefix(), "Sync", "HeartbeatIntervalMs"})); - + result.slice()[0].get(std::vector( + {AgencyComm::prefix(), "Sync", "HeartbeatIntervalMs"})); + if (HeartbeatIntervalMs.isInteger()) { try { _heartbeatInterval = HeartbeatIntervalMs.getUInt(); LOG(INFO) << "using heartbeat interval value '" << _heartbeatInterval << " ms' from agency"; - } - catch (...) { + } catch (...) { // Ignore if it is not a small int or uint } - } } - + // no value set in agency. use default if (_heartbeatInterval == 0) { _heartbeatInterval = 5000; // 1/s - + LOG(WARN) << "unable to read heartbeat interval from agency. Using " << "default value '" << _heartbeatInterval << " ms'"; } - + // start heartbeat thread _heartbeatThread = new HeartbeatThread(DatabaseServerFeature::SERVER, _agencyCallbackRegistry.get(), _heartbeatInterval * 1000, 5); - + if (!_heartbeatThread->init() || !_heartbeatThread->start()) { LOG(FATAL) << "heartbeat could not connect to agency endpoints (" << endpoints << ")"; FATAL_ERROR_EXIT(); } - + while (!_heartbeatThread->isReady()) { // wait until heartbeat is ready usleep(10000); } } - + AgencyCommResult result; while (true) { - VPackBuilder builder; try { VPackObjectBuilder b(&builder); @@ -429,7 +442,7 @@ void ClusterFeature::start() { result = comm.setValue("Current/ServersRegistered/" + _myId, builder.slice(), 0.0); - + if (!result.successful()) { LOG(FATAL) << "unable to register server in agency: http code: " << result.httpCode() << ", body: " << result.body(); @@ -437,7 +450,7 @@ void ClusterFeature::start() { } else { break; } - + sleep(1); } @@ -449,7 +462,7 @@ void ClusterFeature::start() { ServerState::instance()->setState(ServerState::STATE_SYNCING); } - DispatcherFeature* dispatcher = + DispatcherFeature* dispatcher = ApplicationServer::getFeature("Dispatcher"); dispatcher->buildAqlQueue(); @@ -460,13 +473,13 @@ void ClusterFeature::unprepare() { if (_heartbeatThread != nullptr) { _heartbeatThread->beginShutdown(); } - + // change into shutdown state ServerState::instance()->setState(ServerState::STATE_SHUTDOWN); - + AgencyComm comm; comm.sendServerState(0.0); - + if (_heartbeatThread != nullptr) { int counter = 0; while (_heartbeatThread->isRunning()) { @@ -493,32 +506,30 @@ void ClusterFeature::unprepare() { AgencyComm comm; comm.sendServerState(0.0); - + // Try only once to unregister because maybe the agencycomm // is shutting down as well... - + ServerState::RoleEnum role = ServerState::instance()->getRole(); - + AgencyWriteTransaction unreg; // Remove from role if (role == ServerState::ROLE_PRIMARY) { - unreg.operations.push_back( - AgencyOperation("Current/DBServers/" + _myId, - AgencySimpleOperationType::DELETE_OP)); + unreg.operations.push_back(AgencyOperation( + "Current/DBServers/" + _myId, AgencySimpleOperationType::DELETE_OP)); } else if (role == ServerState::ROLE_COORDINATOR) { - unreg.operations.push_back( - AgencyOperation("Current/Coordinators/" + _myId, - AgencySimpleOperationType::DELETE_OP)); + unreg.operations.push_back(AgencyOperation( + "Current/Coordinators/" + _myId, AgencySimpleOperationType::DELETE_OP)); } - - // Unregister + + // Unregister unreg.operations.push_back( - AgencyOperation("Current/ServersRegistered/" + _myId, - AgencySimpleOperationType::DELETE_OP)); - + AgencyOperation("Current/ServersRegistered/" + _myId, + AgencySimpleOperationType::DELETE_OP)); + comm.sendTransactionWithFailover(unreg, 120.0); - + while (_heartbeatThread->isRunning()) { usleep(50000); } diff --git a/arangod/Cluster/ClusterFeature.h b/arangod/Cluster/ClusterFeature.h index 1fae04544d..2c9e1cbadc 100644 --- a/arangod/Cluster/ClusterFeature.h +++ b/arangod/Cluster/ClusterFeature.h @@ -58,6 +58,7 @@ class ClusterFeature : public application_features::ApplicationFeature { std::string _arangodPath; std::string _dbserverConfig; std::string _coordinatorConfig; + uint32_t _systemReplicationFactor = 2; public: AgencyCallbackRegistry* agencyCallbackRegistry() const { diff --git a/js/client/modules/@arangodb/testing.js b/js/client/modules/@arangodb/testing.js index f05fc57a43..22f13a9c8e 100644 --- a/js/client/modules/@arangodb/testing.js +++ b/js/client/modules/@arangodb/testing.js @@ -478,10 +478,10 @@ function analyzeServerCrash (arangod, options, checkStr) { var cpf = "/proc/sys/kernel/core_pattern"; if (fs.isFile(cpf)) { - var matchApport=/.*apport.*/ - var matchVarTmp=/\/var\/tmp/ + var matchApport = /.*apport.*/; + var matchVarTmp = /\/var\/tmp/; var corePattern = fs.readBuffer(cpf); - var cp = corePattern.asciiSlice(0, corePattern.length) + var cp = corePattern.asciiSlice(0, corePattern.length); if (matchApport.exec(cp) != null) { print(RED + "apport handles corefiles on your system. Uninstall it if you want us to get corefiles for analysis."); diff --git a/js/common/modules/@arangodb/foxx/manager-utils.js b/js/common/modules/@arangodb/foxx/manager-utils.js index 4e6bfbfba3..0974cefa30 100644 --- a/js/common/modules/@arangodb/foxx/manager-utils.js +++ b/js/common/modules/@arangodb/foxx/manager-utils.js @@ -44,6 +44,8 @@ var mountAppRegEx = /\/APP(\/|$)/i; var mountNumberRegEx = /^\/[\d\-%]/; var pathRegex = /^((\.{0,2}(\/|\\))|(~\/)|[a-zA-Z]:\\)/; +const DEFAULT_REPLICATION_FACTOR_SYSTEM = internal.DEFAULT_REPLICATION_FACTOR_SYSTEM; + var getReadableName = function (name) { return name.split(/([-_]|\s)+/).map(function (token) { return token.slice(0, 1).toUpperCase() + token.slice(1); @@ -53,7 +55,7 @@ var getReadableName = function (name) { var getStorage = function () { var c = db._collection('_apps'); if (c === null) { - c = db._create('_apps', {isSystem: true, replicationFactor: 2, + c = db._create('_apps', {isSystem: true, replicationFactor: DEFAULT_REPLICATION_FACTOR_SYSTEM, distributeShardsLike: '_graphs', journalSize: 4 * 1024 * 1024}); c.ensureIndex({ type: 'hash', fields: [ 'mount' ], unique: true }); } diff --git a/js/server/bootstrap/modules/internal.js b/js/server/bootstrap/modules/internal.js index dfcc67470b..e105273305 100644 --- a/js/server/bootstrap/modules/internal.js +++ b/js/server/bootstrap/modules/internal.js @@ -346,4 +346,13 @@ exports.sendChunk = global.SYS_SEND_CHUNK; delete global.SYS_SEND_CHUNK; } + + // ////////////////////////////////////////////////////////////////////////////// + // / @brief default replication factor + // ////////////////////////////////////////////////////////////////////////////// + + if (global.SYS_DEFAULT_REPLICATION_FACTOR_SYSTEM) { + exports.DEFAULT_REPLICATION_FACTOR_SYSTEM = global.SYS_DEFAULT_REPLICATION_FACTOR_SYSTEM; + delete global.SYS_DEFAULT_REPLICATION_FACTOR_SYSTEM; + } }()); diff --git a/js/server/modules/@arangodb/statistics.js b/js/server/modules/@arangodb/statistics.js index 29561ad05c..f313f2dd6c 100644 --- a/js/server/modules/@arangodb/statistics.js +++ b/js/server/modules/@arangodb/statistics.js @@ -27,9 +27,10 @@ // / @author Copyright 2014, triAGENS GmbH, Cologne, Germany // ////////////////////////////////////////////////////////////////////////////// -var internal = require('internal'); -var cluster = require('@arangodb/cluster'); -var db = internal.db; +const internal = require('internal'); +const cluster = require('@arangodb/cluster'); +const db = internal.db; +const DEFAULT_REPLICATION_FACTOR_SYSTEM = internal.DEFAULT_REPLICATION_FACTOR_SYSTEM; // ////////////////////////////////////////////////////////////////////////////// // / @brief initialized @@ -51,7 +52,7 @@ function createStatisticsCollection (name) { try { r = db._create(name, { isSystem: true, waitForSync: false, - replicationFactor: 2, + replicationFactor: DEFAULT_REPLICATION_FACTOR_SYSTEM, journalSize: 8 * 1024 * 1024, distributeShardsLike: '_graphs' }); } catch (err) {} diff --git a/js/server/upgrade-database.js b/js/server/upgrade-database.js index 040ec82426..4802b95ab1 100644 --- a/js/server/upgrade-database.js +++ b/js/server/upgrade-database.js @@ -48,7 +48,7 @@ function upgrade () { // default replication factor for system collections - const DEFAULT_REPLICATION_FACTOR_SYSTEM = 2; + const DEFAULT_REPLICATION_FACTOR_SYSTEM = internal.DEFAULT_REPLICATION_FACTOR_SYSTEM; // system database only const DATABASE_SYSTEM = 1000;