From d35ebbe6a11b37a0c8bac6e5e433b948f543aa2f Mon Sep 17 00:00:00 2001 From: Matthew Von-Maszewski Date: Wed, 27 Dec 2017 10:36:42 -0500 Subject: [PATCH] move from devel to 3.3 the dynamic chooseTimeout() feature. (#4166) --- arangod/CMakeLists.txt | 1 + arangod/Cluster/ClusterFeature.cpp | 4 -- arangod/Cluster/ClusterFeature.h | 6 -- arangod/Cluster/ReplicationTimeoutFeature.cpp | 58 +++++++++++++++++ arangod/Cluster/ReplicationTimeoutFeature.h | 49 ++++++++++++++ arangod/MMFiles/MMFilesEngine.h | 3 + arangod/RestServer/arangod.cpp | 2 + arangod/RocksDBEngine/RocksDBEngine.h | 3 + arangod/StorageEngine/StorageEngine.h | 3 + arangod/Transaction/Methods.cpp | 65 +++++++------------ arangod/Transaction/Methods.h | 13 ++-- 11 files changed, 150 insertions(+), 57 deletions(-) create mode 100644 arangod/Cluster/ReplicationTimeoutFeature.cpp create mode 100644 arangod/Cluster/ReplicationTimeoutFeature.h diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index 619c91ad3b..1df7b1f299 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -204,6 +204,7 @@ SET(ARANGOD_SOURCES Cluster/FollowerInfo.cpp Cluster/DBServerAgencySync.cpp Cluster/HeartbeatThread.cpp + Cluster/ReplicationTimeoutFeature.cpp Cluster/RestAgencyCallbacksHandler.cpp Cluster/RestClusterHandler.cpp Cluster/ServerState.cpp diff --git a/arangod/Cluster/ClusterFeature.cpp b/arangod/Cluster/ClusterFeature.cpp index ed39518b8d..2eac922686 100644 --- a/arangod/Cluster/ClusterFeature.cpp +++ b/arangod/Cluster/ClusterFeature.cpp @@ -131,10 +131,6 @@ void ClusterFeature::collectOptions(std::shared_ptr options) { "replication factor for system collections", new UInt32Parameter(&_systemReplicationFactor)); - options->addOption("--cluster.synchronous-replication-timeout-factor", - "all synchronous replication timeouts are multiplied by this factor", - new DoubleParameter(&_syncReplTimeoutFactor)); - options->addHiddenOption("--cluster.create-waits-for-sync-replication", "active coordinator will wait for all replicas to create collection", new BooleanParameter(&_createWaitsForSyncReplication)); diff --git a/arangod/Cluster/ClusterFeature.h b/arangod/Cluster/ClusterFeature.h index 514ac83759..f1c2ffca0f 100644 --- a/arangod/Cluster/ClusterFeature.h +++ b/arangod/Cluster/ClusterFeature.h @@ -50,15 +50,10 @@ class ClusterFeature : public application_features::ApplicationFeature { return _agencyEndpoints; } - std::string agencyPrefix() { return _agencyPrefix; } - double syncReplTimeoutFactor() { - return _syncReplTimeoutFactor; - } - private: std::vector _agencyEndpoints; std::string _agencyPrefix; @@ -66,7 +61,6 @@ class ClusterFeature : public application_features::ApplicationFeature { std::string _myAddress; uint32_t _systemReplicationFactor = 2; bool _createWaitsForSyncReplication = true; - double _syncReplTimeoutFactor = 1.0; private: void reportRole(ServerState::RoleEnum); diff --git a/arangod/Cluster/ReplicationTimeoutFeature.cpp b/arangod/Cluster/ReplicationTimeoutFeature.cpp new file mode 100644 index 0000000000..6c3d943bbf --- /dev/null +++ b/arangod/Cluster/ReplicationTimeoutFeature.cpp @@ -0,0 +1,58 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Jan Steemann +//////////////////////////////////////////////////////////////////////////////// + +#include "ReplicationTimeoutFeature.h" +#include "StorageEngine/EngineSelectorFeature.h" +#include "StorageEngine/StorageEngine.h" + +using namespace arangodb; +using namespace arangodb::options; + +double ReplicationTimeoutFeature::timeoutFactor = 1.0; +double ReplicationTimeoutFeature::timeoutPer4k = 0.1; +double ReplicationTimeoutFeature::lowerLimit = 0.5; + +ReplicationTimeoutFeature::ReplicationTimeoutFeature(application_features::ApplicationServer* server) + : ApplicationFeature(server, "ReplicationTimeout") { + setOptional(true); + requiresElevatedPrivileges(false); + startsAfter("EngineSelector"); + startsBefore("StorageEngine"); +} + +void ReplicationTimeoutFeature::collectOptions(std::shared_ptr options) { + options->addSection("cluster", "Configure the cluster"); + + options->addOption("--cluster.synchronous-replication-timeout-factor", + "all synchronous replication timeouts are multiplied by this factor", + new DoubleParameter(&timeoutFactor)); + + options->addHiddenOption("--cluster.synchronous-replication-timeout-per-4k", + "all synchronous replication timeouts are increased by this amount per 4096 bytes (in seconds)", + new DoubleParameter(&timeoutPer4k)); +} + +void ReplicationTimeoutFeature::prepare() { + // set minimum timeout. this depends on the selected storage engine + lowerLimit = EngineSelectorFeature::ENGINE->minimumSyncReplicationTimeout(); +} diff --git a/arangod/Cluster/ReplicationTimeoutFeature.h b/arangod/Cluster/ReplicationTimeoutFeature.h new file mode 100644 index 0000000000..463ded90ab --- /dev/null +++ b/arangod/Cluster/ReplicationTimeoutFeature.h @@ -0,0 +1,49 @@ +//////////////////////////////////////////////////////////////////////////////// +/// DISCLAIMER +/// +/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Jan Steemann +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ARANGOD_CLUSTER_REPLICATION_TIMEOUT_FEATURE_H +#define ARANGOD_CLUSTER_REPLICATION_TIMEOUT_FEATURE_H 1 + +#include "Basics/Common.h" + +#include "ApplicationFeatures/ApplicationFeature.h" + +namespace arangodb { + +class ReplicationTimeoutFeature : public application_features::ApplicationFeature { + public: + explicit ReplicationTimeoutFeature(application_features::ApplicationServer*); + + public: + void collectOptions(std::shared_ptr) override final; + void prepare() override final; + + public: + static double timeoutFactor; + static double timeoutPer4k; + static double lowerLimit; +}; + +} + +#endif diff --git a/arangod/MMFiles/MMFilesEngine.h b/arangod/MMFiles/MMFilesEngine.h index 289c0ec2fa..07244f4367 100644 --- a/arangod/MMFiles/MMFilesEngine.h +++ b/arangod/MMFiles/MMFilesEngine.h @@ -86,6 +86,9 @@ class MMFilesEngine final : public StorageEngine { // flush wal wait for collector void stop() override; + // minimum timeout for the synchronous replication + double minimumSyncReplicationTimeout() const override { return 0.5; } + bool supportsDfdb() const override { return true; } bool useRawDocumentPointers() override { return true; } diff --git a/arangod/RestServer/arangod.cpp b/arangod/RestServer/arangod.cpp index b3dc6da8e7..112d386c76 100644 --- a/arangod/RestServer/arangod.cpp +++ b/arangod/RestServer/arangod.cpp @@ -50,6 +50,7 @@ #include "Basics/ArangoGlobalContext.h" #include "Cache/CacheManagerFeature.h" #include "Cluster/ClusterFeature.h" +#include "Cluster/ReplicationTimeoutFeature.h" #include "GeneralServer/AuthenticationFeature.h" #include "GeneralServer/GeneralServerFeature.h" #include "Logger/LoggerBufferFeature.h" @@ -167,6 +168,7 @@ static int runServer(int argc, char** argv, ArangoGlobalContext &context) { server.addFeature(new PrivilegeFeature(&server)); server.addFeature(new RandomFeature(&server)); server.addFeature(new ReplicationFeature(&server)); + server.addFeature(new ReplicationTimeoutFeature(&server)); server.addFeature(new QueryRegistryFeature(&server)); server.addFeature(new SchedulerFeature(&server)); server.addFeature(new ScriptFeature(&server, &ret)); diff --git a/arangod/RocksDBEngine/RocksDBEngine.h b/arangod/RocksDBEngine/RocksDBEngine.h index ae28a139f2..6c5be436a1 100644 --- a/arangod/RocksDBEngine/RocksDBEngine.h +++ b/arangod/RocksDBEngine/RocksDBEngine.h @@ -88,6 +88,9 @@ class RocksDBEngine final : public StorageEngine { void stop() override; void unprepare() override; + // minimum timeout for the synchronous replication + double minimumSyncReplicationTimeout() const override { return 1.0; } + bool supportsDfdb() const override { return false; } bool useRawDocumentPointers() override { return false; } diff --git a/arangod/StorageEngine/StorageEngine.h b/arangod/StorageEngine/StorageEngine.h index 41b79f6f1c..b3aa2f5c78 100644 --- a/arangod/StorageEngine/StorageEngine.h +++ b/arangod/StorageEngine/StorageEngine.h @@ -107,6 +107,9 @@ class StorageEngine : public application_features::ApplicationFeature { // create storage-engine specific view virtual PhysicalView* createPhysicalView(LogicalView*, VPackSlice const&) = 0; + // minimum timeout for the synchronous replication + virtual double minimumSyncReplicationTimeout() const = 0; + // status functionality // -------------------- diff --git a/arangod/Transaction/Methods.cpp b/arangod/Transaction/Methods.cpp index f761e0a155..977287cd33 100644 --- a/arangod/Transaction/Methods.cpp +++ b/arangod/Transaction/Methods.cpp @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// -/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany +/// Copyright 2014-2017 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); @@ -37,6 +37,7 @@ #include "Cluster/ClusterFeature.h" #include "Cluster/ClusterMethods.h" #include "Cluster/FollowerInfo.h" +#include "Cluster/ReplicationTimeoutFeature.h" #include "Cluster/ServerState.h" #include "Indexes/Index.h" #include "Logger/Logger.h" @@ -724,7 +725,7 @@ Result transaction::Methods::commit() { if (_state == nullptr || _state->status() != transaction::Status::RUNNING) { // transaction not created or not running - return Result(TRI_ERROR_TRANSACTION_INTERNAL); + return Result(TRI_ERROR_TRANSACTION_INTERNAL, "transaction not running on commit"); } ExecContext const* exe = ExecContext::CURRENT; @@ -751,7 +752,7 @@ Result transaction::Methods::commit() { Result transaction::Methods::abort() { if (_state == nullptr || _state->status() != transaction::Status::RUNNING) { // transaction not created or not running - return TRI_ERROR_TRANSACTION_INTERNAL; + return Result(TRI_ERROR_TRANSACTION_INTERNAL, "transaction not running on abort"); } CallbackInvoker invoker(this); @@ -769,16 +770,7 @@ Result transaction::Methods::abort() { /// @brief finish a transaction (commit or abort), based on the previous state Result transaction::Methods::finish(int errorNum) { - if (errorNum == TRI_ERROR_NO_ERROR) { - // there was no previous error, so we'll commit - return this->commit(); - } - - // there was a previous error, so we'll abort - this->abort(); - - // return original error number - return errorNum; + return finish(Result(errorNum)); } /// @brief finish a transaction (commit or abort), based on the previous state @@ -1350,33 +1342,21 @@ OperationResult transaction::Methods::insertCoordinator( /// @brief choose a timeout for synchronous replication, based on the /// number of documents we ship over -static double chooseTimeout(size_t count) { - static bool timeoutQueried = false; - static double timeoutFactor = 1.0; - static double lowerLimit = 0.5; - if (!timeoutQueried) { - // Multithreading is no problem here because these static variables - // are only ever set once in the lifetime of the server. - auto feature = application_features::ApplicationServer::getFeature("Cluster"); - timeoutFactor = feature->syncReplTimeoutFactor(); - timeoutQueried = true; - auto feature2 = application_features::ApplicationServer::getFeature("EngineSelector"); - if (feature2->engineName() == arangodb::RocksDBEngine::EngineName) { - lowerLimit = 1.0; - } - } - +static double chooseTimeout(size_t count, size_t totalBytes) { // We usually assume that a server can process at least 2500 documents // per second (this is a low estimate), and use a low limit of 0.5s // and a high timeout of 120s - double timeout = static_cast(count / 2500); - if (timeout < lowerLimit) { - return lowerLimit * timeoutFactor; - } else if (timeout > 120) { - return 120.0 * timeoutFactor; - } else { - return timeout * timeoutFactor; + double timeout = count / 2500.0; + + // Really big documents need additional adjustment. Using total size + // of all messages to handle worst case scenario of constrained resource + // processing all + timeout += (totalBytes / 4096) * ReplicationTimeoutFeature::timeoutPer4k; + + if (timeout < ReplicationTimeoutFeature::lowerLimit) { + return ReplicationTimeoutFeature::lowerLimit * ReplicationTimeoutFeature::timeoutFactor; } + return (std::min)(120.0, timeout) * ReplicationTimeoutFeature::timeoutFactor; } /// @brief create one or multiple documents in a collection, local @@ -1535,7 +1515,8 @@ OperationResult transaction::Methods::insertLocal( if (cc != nullptr) { // nullptr only happens on controlled shutdown size_t nrDone = 0; - size_t nrGood = cc->performRequests(requests, chooseTimeout(count), + size_t nrGood = cc->performRequests(requests, + chooseTimeout(count, body->size()*followers->size()), nrDone, Logger::REPLICATION, false); if (nrGood < followers->size()) { // If any would-be-follower refused to follow there must be a @@ -1883,7 +1864,8 @@ OperationResult transaction::Methods::modifyLocal( path, body); } size_t nrDone = 0; - size_t nrGood = cc->performRequests(requests, chooseTimeout(count), + size_t nrGood = cc->performRequests(requests, + chooseTimeout(count, body->size()*followers->size()), nrDone, Logger::REPLICATION, false); if (nrGood < followers->size()) { // If any would-be-follower refused to follow there must be a @@ -2161,7 +2143,8 @@ OperationResult transaction::Methods::removeLocal( body); } size_t nrDone = 0; - size_t nrGood = cc->performRequests(requests, chooseTimeout(count), + size_t nrGood = cc->performRequests(requests, + chooseTimeout(count, body->size()*followers->size()), nrDone, Logger::REPLICATION, false); if (nrGood < followers->size()) { // If any would-be-follower refused to follow there must be a @@ -2866,7 +2849,7 @@ bool transaction::Methods::isLocked(LogicalCollection* document, Result transaction::Methods::lockRecursive(TRI_voc_cid_t cid, AccessMode::Type type) { if (_state == nullptr || _state->status() != transaction::Status::RUNNING) { - return TRI_ERROR_TRANSACTION_INTERNAL; + return Result(TRI_ERROR_TRANSACTION_INTERNAL, "transaction not running on lock"); } TransactionCollection* trxColl = trxCollection(cid, type); TRI_ASSERT(trxColl != nullptr); @@ -2877,7 +2860,7 @@ Result transaction::Methods::lockRecursive(TRI_voc_cid_t cid, Result transaction::Methods::unlockRecursive(TRI_voc_cid_t cid, AccessMode::Type type) { if (_state == nullptr || _state->status() != transaction::Status::RUNNING) { - return TRI_ERROR_TRANSACTION_INTERNAL; + return Result(TRI_ERROR_TRANSACTION_INTERNAL, "transaction not running on unlock"); } TransactionCollection* trxColl = trxCollection(cid, type); TRI_ASSERT(trxColl != nullptr); diff --git a/arangod/Transaction/Methods.h b/arangod/Transaction/Methods.h index e04189ddc0..cca606ca26 100644 --- a/arangod/Transaction/Methods.h +++ b/arangod/Transaction/Methods.h @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// -/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany +/// Copyright 2014-2017 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); @@ -38,6 +38,12 @@ #include +#ifdef USE_ENTERPRISE + #define ENTERPRISE_VIRT virtual +#else + #define ENTERPRISE_VIRT +#endif + namespace arangodb { namespace basics { @@ -81,11 +87,6 @@ class TransactionState; class TransactionCollection; namespace transaction { -#ifdef USE_ENTERPRISE - #define ENTERPRISE_VIRT virtual -#else - #define ENTERPRISE_VIRT -#endif class Methods { friend class traverser::BaseEngine;