From f62042ef3a3313805a04a7f8dbee58462ac68af6 Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Tue, 24 Jan 2017 12:11:57 +0100 Subject: [PATCH 1/4] Clean up code without changing behaviour. --- arangod/Cluster/ClusterInfo.cpp | 59 ++++++++------------------------- 1 file changed, 14 insertions(+), 45 deletions(-) diff --git a/arangod/Cluster/ClusterInfo.cpp b/arangod/Cluster/ClusterInfo.cpp index 284aa67ccc..691a0734ba 100644 --- a/arangod/Cluster/ClusterInfo.cpp +++ b/arangod/Cluster/ClusterInfo.cpp @@ -2360,35 +2360,19 @@ void ClusterInfo::loadCurrentDBServers() { std::vector ClusterInfo::getCurrentDBServers() { std::vector result; - int tries = 0; if (!_DBServersProt.isValid) { loadCurrentDBServers(); - tries++; } - while (true) { - { - // return a consistent state of servers - READ_LOCKER(readLocker, _DBServersProt.lock); + // return a consistent state of servers + READ_LOCKER(readLocker, _DBServersProt.lock); - result.reserve(_DBServers.size()); + result.reserve(_DBServers.size()); - for (auto& it : _DBServers) { - result.emplace_back(it.first); - } - - return result; - } - - if (++tries >= 2) { - break; - } - - // loadCurrentDBServers needs the write lock - loadCurrentDBServers(); + for (auto& it : _DBServers) { + result.emplace_back(it.first); } - // note that the result will be empty if we get here return result; } @@ -2560,35 +2544,20 @@ int ClusterInfo::getResponsibleShard(LogicalCollection* collInfo, std::vector ClusterInfo::getCurrentCoordinators() { std::vector result; - int tries = 0; if (!_coordinatorsProt.isValid) { loadCurrentCoordinators(); - tries++; - } - while (true) { - { - // return a consistent state of servers - READ_LOCKER(readLocker, _coordinatorsProt.lock); - - result.reserve(_coordinators.size()); - - for (auto& it : _coordinators) { - result.emplace_back(it.first); - } - - return result; - } - - if (++tries >= 2) { - break; - } - - // loadCurrentCoordinators needs the write lock - loadCurrentCoordinators(); } - // note that the result will be empty if we get here + // return a consistent state of servers + READ_LOCKER(readLocker, _coordinatorsProt.lock); + + result.reserve(_coordinators.size()); + + for (auto& it : _coordinators) { + result.emplace_back(it.first); + } + return result; } From e11c7c96c19e2f17c665e40f593227046a28db78 Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Tue, 24 Jan 2017 12:30:07 +0100 Subject: [PATCH 2/4] Please static analyis. --- arangod/Agency/Node.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arangod/Agency/Node.cpp b/arangod/Agency/Node.cpp index 978829dc5f..1dab13d96b 100644 --- a/arangod/Agency/Node.cpp +++ b/arangod/Agency/Node.cpp @@ -139,6 +139,8 @@ std::string Node::uri() const { /// Move constructor Node::Node(Node&& other) : _node_name(std::move(other._node_name)), + _parent(nullptr), + _store(nullptr), _children(std::move(other._children)), _value(std::move(other._value)), _vecBuf(std::move(other._vecBuf)), From dc3c380904dae8af5de1c5a0d569a397176524c4 Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Tue, 24 Jan 2017 12:30:32 +0100 Subject: [PATCH 3/4] Fix bug found by static analysis. --- arangod/Cluster/ClusterInfo.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arangod/Cluster/ClusterInfo.h b/arangod/Cluster/ClusterInfo.h index 1a0f7f48ea..0f4fa1634b 100644 --- a/arangod/Cluster/ClusterInfo.h +++ b/arangod/Cluster/ClusterInfo.h @@ -186,8 +186,7 @@ class CollectionInfoCurrent { bool getFlag(char const* name, ShardID const& shardID) const { auto it = _vpacks.find(shardID); if (it != _vpacks.end()) { - return arangodb::basics::VelocyPackHelper::getBooleanValue(it->second->slice(), "errorMessage", - ""); + return arangodb::basics::VelocyPackHelper::getBooleanValue(it->second->slice(), name, false); } return false; } From bdb3d1d8b94ee2a48c1e727274582ed58f22cedc Mon Sep 17 00:00:00 2001 From: jsteemann Date: Tue, 24 Jan 2017 12:44:21 +0100 Subject: [PATCH 4/4] added hidden parameter `--wal.flush-timeout` --- arangod/StorageEngine/MMFilesWalSlots.cpp | 10 ++++++++-- arangod/Wal/LogfileManager.cpp | 7 +++++-- arangod/Wal/LogfileManager.h | 1 + 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arangod/StorageEngine/MMFilesWalSlots.cpp b/arangod/StorageEngine/MMFilesWalSlots.cpp index c00d87334a..3732f5d74c 100644 --- a/arangod/StorageEngine/MMFilesWalSlots.cpp +++ b/arangod/StorageEngine/MMFilesWalSlots.cpp @@ -488,11 +488,12 @@ void MMFilesWalSlots::getActiveTickRange(wal::Logfile* logfile, TRI_voc_tick_t& /// @brief close a logfile int MMFilesWalSlots::closeLogfile(MMFilesWalSlot::TickType& lastCommittedTick, bool& worked) { - int iterations = 0; bool hasWaited = false; worked = false; - while (++iterations < 1000) { + double const maxWait = 30.0; + double const end = TRI_microtime() + maxWait; + while (true) { { MUTEX_LOCKER(mutexLocker, _lock); @@ -593,6 +594,11 @@ int MMFilesWalSlots::closeLogfile(MMFilesWalSlot::TickType& lastCommittedTick, b if (mustWait) { guard.wait(10 * 1000); } + + if (TRI_microtime() >= end) { + // time's up! + break; + } } return TRI_ERROR_ARANGO_NO_JOURNAL; diff --git a/arangod/Wal/LogfileManager.cpp b/arangod/Wal/LogfileManager.cpp index ea3c40d7dc..752ccc09f7 100644 --- a/arangod/Wal/LogfileManager.cpp +++ b/arangod/Wal/LogfileManager.cpp @@ -174,6 +174,9 @@ void LogfileManager::collectOptions(std::shared_ptr options) { "--wal.ignore-recovery-errors", "continue recovery even if re-applying operations fails", new BooleanParameter(&_ignoreRecoveryErrors)); + + options->addHiddenOption("--wal.flush-timeout", "flush timeout (in milliseconds)", + new UInt64Parameter(&_flushTimeout)); options->addOption("--wal.logfile-size", "size of each logfile (in bytes)", new UInt32Parameter(&_filesize)); @@ -1319,7 +1322,7 @@ int LogfileManager::getWriteableLogfile(uint32_t size, } size_t iterations = 0; - double const end = TRI_microtime() + 15.0; + double const end = TRI_microtime() + (_flushTimeout / 1000.0); while (true) { { @@ -1383,7 +1386,7 @@ int LogfileManager::getWriteableLogfile(uint32_t size, } TRI_ASSERT(result == nullptr); - LOG(ERR) << "unable to acquire writeable WAL logfile after 15 s"; + LOG(ERR) << "unable to acquire writeable WAL logfile after " << _flushTimeout << " ms"; return TRI_ERROR_LOCK_TIMEOUT; } diff --git a/arangod/Wal/LogfileManager.h b/arangod/Wal/LogfileManager.h index 0c7e1db086..b4ed3943a9 100644 --- a/arangod/Wal/LogfileManager.h +++ b/arangod/Wal/LogfileManager.h @@ -465,6 +465,7 @@ class LogfileManager final : public application_features::ApplicationFeature { uint32_t _historicLogfiles = 10; bool _ignoreLogfileErrors = false; bool _ignoreRecoveryErrors = false; + uint64_t _flushTimeout = 15000; uint32_t _filesize = 32 * 1024 * 1024; uint32_t _maxOpenLogfiles = 0; uint32_t _reserveLogfiles = 3;