1
0
Fork 0

check for duplicate server endpoints on cluster startup (#9863)

* check for duplicate server endpoints on cluster startup

* Update arangod/Cluster/ServerState.cpp
This commit is contained in:
Jan 2019-09-03 10:49:08 +02:00 committed by KVS85
parent f8c156a44f
commit 2fe0527d25
3 changed files with 104 additions and 32 deletions

View File

@ -1,6 +1,16 @@
v3.5.1 (XXXX-XX-XX)
-------------------
* Check for duplicate server endpoints registered in the agency in sub-keys of
`/Current/ServersRegistered`.
Duplicate endpoints will be registered if more than one arangod instance is
started with the same value for startup option `--cluster.my-address`. This can
happen unintentionally due to typos in the configuration, copy&paste remainders etc.
In case a duplicate endpoint is detected on startup, a warning will be written
to the log, indicating which other server has already "acquired" the same endpoint.
* Make graph operations in general-graph transaction-aware.
* Fixed adding an orphan collections as the first collection in a SmartGraph.

View File

@ -25,6 +25,8 @@
#include <algorithm>
#include <iomanip>
#include <regex>
#include <unordered_map>
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_generators.hpp>
@ -45,9 +47,21 @@
#include "StorageEngine/EngineSelectorFeature.h"
#include "VocBase/ticks.h"
#include <velocypack/Iterator.h>
#include <velocypack/velocypack-aliases.h>
using namespace arangodb;
using namespace arangodb::basics;
namespace {
// whenever the format of the generated UUIDs changes, please make sure to
// adjust this regex too!
std::regex const uuidRegex("^(SNGL|CRDN|PRMR|AGNT)-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$");
}
static constexpr char const* currentServersRegisteredPref =
"/Current/ServersRegistered/";
////////////////////////////////////////////////////////////////////////////////
/// @brief single instance of ServerState - will live as long as the server is
/// running
@ -67,7 +81,6 @@ ServerState::ServerState()
_host(),
_state(STATE_UNDEFINED),
_initialized(false),
_foxxmaster(),
_foxxmasterQueueupdate(false) {
setRole(ROLE_UNDEFINED);
}
@ -158,6 +171,8 @@ std::string ServerState::roleToString(ServerState::RoleEnum role) {
}
std::string ServerState::roleToShortString(ServerState::RoleEnum role) {
// whenever anything here changes, please make sure to
// adjust ::uuidRegex too!
switch (role) {
case ROLE_UNDEFINED:
return "NONE";
@ -414,7 +429,55 @@ bool ServerState::integrateIntoCluster(ServerState::RoleEnum role,
<< roleToString(role) << " and our id is " << id;
// now overwrite the entry in /Current/ServersRegistered/<myId>
return registerAtAgencyPhase2(comm, hadPersistedId);
bool registered = registerAtAgencyPhase2(comm, hadPersistedId);
if (!registered) {
return false;
}
// now check the configuration of the different servers for duplicate endpoints
AgencyCommResult result = comm.getValues(currentServersRegisteredPref);
if (result.successful()) {
auto slicePath = AgencyCommManager::slicePath(currentServersRegisteredPref);
auto valueSlice = result.slice()[0].get(slicePath);
if (valueSlice.isObject()) {
// map from server UUID to endpoint
std::unordered_map<std::string, std::string> endpoints;
for (auto const& it : VPackObjectIterator(valueSlice)) {
std::string const serverId = it.key.copyString();
if (!isUuid(serverId)) {
continue;
}
if (!it.value.isObject()) {
continue;
}
VPackSlice endpointSlice = it.value.get("endpoint");
if (!endpointSlice.isString()) {
continue;
}
auto it2 = endpoints.emplace(endpointSlice.copyString(), serverId);
if (!it2.second && it2.first->first != serverId) {
// duplicate entry!
LOG_TOPIC("9a134", WARN, Logger::CLUSTER)
<< "found duplicate server entry for endpoint '"
<< endpointSlice.copyString() << "', already used by other server " << it2.first->second
<< ". it looks like this is a (mis)configuration issue";
// anyway, continue with startup
}
}
}
}
return true;
}
/// @brief whether or not "value" is a server UUID
bool ServerState::isUuid(std::string const& value) const {
// whenever the format of the generated UUIDs changes, please make sure to
// adjust ::uuidRegex too!
return std::regex_match(value, ::uuidRegex);
}
//////////////////////////////////////////////////////////////////////////////
@ -451,7 +514,7 @@ void mkdir(std::string const& path) {
}
}
std::string ServerState::getUuidFilename() {
std::string ServerState::getUuidFilename() const {
auto dbpath = application_features::ApplicationServer::getFeature<DatabasePathFeature>(
"DatabasePath");
TRI_ASSERT(dbpath != nullptr);
@ -481,6 +544,8 @@ bool ServerState::writePersistedId(std::string const& id) {
}
std::string ServerState::generatePersistedId(RoleEnum const& role) {
// whenever the format of the generated UUID changes, please make sure to
// adjust ::uuidRegex too!
std::string id =
roleToShortString(role) + "-" + to_string(boost::uuids::random_generator()());
writePersistedId(id);
@ -507,9 +572,6 @@ std::string ServerState::getPersistedId() {
FATAL_ERROR_EXIT();
}
static constexpr char const* currentServersRegisteredPref =
"/Current/ServersRegistered/";
/// @brief check equality of engines with other registered servers
bool ServerState::checkEngineEquality(AgencyComm& comm) {
std::string engineName = EngineSelectorFeature::engineName();
@ -540,7 +602,7 @@ bool ServerState::checkEngineEquality(AgencyComm& comm) {
/// @brief create an id for a specified role
//////////////////////////////////////////////////////////////////////////////
bool ServerState::registerAtAgencyPhase1(AgencyComm& comm, const ServerState::RoleEnum& role) {
bool ServerState::registerAtAgencyPhase1(AgencyComm& comm, ServerState::RoleEnum const& role) {
std::string const agencyListKey = roleToAgencyListKey(role);
std::string const latestIdKey = "Latest" + roleToAgencyKey(role) + "Id";
@ -675,12 +737,19 @@ bool ServerState::registerAtAgencyPhase1(AgencyComm& comm, const ServerState::Ro
bool ServerState::registerAtAgencyPhase2(AgencyComm& comm, bool const hadPersistedId) {
TRI_ASSERT(!_id.empty() && !_myEndpoint.empty());
while (!application_features::ApplicationServer::isStopping()) {
std::string serverRegistrationPath = currentServersRegisteredPref + _id;
std::string rebootIdPath = "/Current/ServersKnown/" + _id + "/rebootId";
std::string const serverRegistrationPath = currentServersRegisteredPref + _id;
std::string const rebootIdPath = "/Current/ServersKnown/" + _id + "/rebootId";
// If we generated a new UUID, this *must not* exist in the Agency, so we
// should fail to register.
std::vector<AgencyPrecondition> pre;
if (!hadPersistedId) {
pre.emplace_back(AgencyPrecondition(rebootIdPath, AgencyPrecondition::Type::EMPTY, true));
}
while (!application_features::ApplicationServer::isStopping()) {
VPackBuilder builder;
try {
{
VPackObjectBuilder b(&builder);
builder.add("endpoint", VPackValue(_myEndpoint));
builder.add("advertisedEndpoint", VPackValue(_advertisedEndpoint));
@ -688,16 +757,6 @@ bool ServerState::registerAtAgencyPhase2(AgencyComm& comm, bool const hadPersist
builder.add("version", VPackValue(rest::Version::getNumericServerVersion()));
builder.add("versionString", VPackValue(rest::Version::getServerVersion()));
builder.add("engine", VPackValue(EngineSelectorFeature::engineName()));
} catch (...) {
LOG_TOPIC("de625", FATAL, arangodb::Logger::CLUSTER) << "out of memory";
FATAL_ERROR_EXIT();
}
// If we generated a new UUID, this *must not* exist in the Agency, so we
// should fail to register.
std::vector<AgencyPrecondition> pre;
if (!hadPersistedId) {
pre.emplace_back(AgencyPrecondition(rebootIdPath, AgencyPrecondition::Type::EMPTY, true));
}
AgencyWriteTransaction trx(
@ -770,7 +829,7 @@ void ServerState::setId(std::string const& id) {
/// @brief get the short server id
////////////////////////////////////////////////////////////////////////////////
uint32_t ServerState::getShortId() {
uint32_t ServerState::getShortId() const {
return _shortId.load(std::memory_order_relaxed);
}
@ -791,7 +850,7 @@ uint64_t ServerState::getRebootId() const {
return _rebootId;
}
void ServerState::setRebootId(uint64_t const rebootId) {
void ServerState::setRebootId(uint64_t rebootId) {
TRI_ASSERT(rebootId > 0);
_rebootId = rebootId;
}

View File

@ -203,14 +203,14 @@ class ServerState {
void setId(std::string const&);
/// @brief get the short id
uint32_t getShortId();
uint32_t getShortId() const;
/// @brief set the server short id
void setShortId(uint32_t);
uint64_t getRebootId() const;
void setRebootId(uint64_t const rebootId);
void setRebootId(uint64_t rebootId);
/// @brief get the server endpoint
std::string getEndpoint();
@ -256,8 +256,8 @@ class ServerState {
/// @brief sets server mode and propagates new mode to agency
Result propagateClusterReadOnly(bool);
/// file where the server persists it's UUID
std::string getUuidFilename();
/// file where the server persists its UUID
std::string getUuidFilename() const;
private:
/// @brief atomically fetches the server role
@ -278,13 +278,16 @@ class ServerState {
ResultT<uint64_t> readRebootIdFromAgency(AgencyComm& comm);
/// @brief register at agency, might already be done
bool registerAtAgencyPhase1(AgencyComm&, const RoleEnum&);
bool registerAtAgencyPhase1(AgencyComm&, RoleEnum const&);
/// @brief write the Current/ServersRegistered entry
bool registerAtAgencyPhase2(AgencyComm&, bool const hadPersistedId);
bool registerAtAgencyPhase2(AgencyComm&, bool hadPersistedId);
void setFoxxmasterSinceNow();
/// @brief whether or not "value" is a server UUID
bool isUuid(std::string const& value) const;
private:
/// @brief server role
std::atomic<RoleEnum> _role;
@ -334,10 +337,10 @@ class ServerState {
/// @brief whether or not the cluster was initialized
bool _initialized;
std::string _foxxmaster;
bool _foxxmasterQueueupdate;
std::string _foxxmaster;
// @brief point in time since which this server is the Foxxmaster
TRI_voc_tick_t _foxxmasterSince;
};