mirror of https://gitee.com/bigwinds/arangodb
check for duplicate server endpoints on cluster startup (#9863)
* check for duplicate server endpoints on cluster startup * Update arangod/Cluster/ServerState.cpp
This commit is contained in:
parent
f8c156a44f
commit
2fe0527d25
10
CHANGELOG
10
CHANGELOG
|
@ -1,6 +1,16 @@
|
|||
v3.5.1 (XXXX-XX-XX)
|
||||
-------------------
|
||||
|
||||
* Check for duplicate server endpoints registered in the agency in sub-keys of
|
||||
`/Current/ServersRegistered`.
|
||||
|
||||
Duplicate endpoints will be registered if more than one arangod instance is
|
||||
started with the same value for startup option `--cluster.my-address`. This can
|
||||
happen unintentionally due to typos in the configuration, copy&paste remainders etc.
|
||||
|
||||
In case a duplicate endpoint is detected on startup, a warning will be written
|
||||
to the log, indicating which other server has already "acquired" the same endpoint.
|
||||
|
||||
* Make graph operations in general-graph transaction-aware.
|
||||
|
||||
* Fixed adding an orphan collections as the first collection in a SmartGraph.
|
||||
|
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <iomanip>
|
||||
#include <regex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <boost/uuid/uuid.hpp>
|
||||
#include <boost/uuid/uuid_generators.hpp>
|
||||
|
@ -45,9 +47,21 @@
|
|||
#include "StorageEngine/EngineSelectorFeature.h"
|
||||
#include "VocBase/ticks.h"
|
||||
|
||||
#include <velocypack/Iterator.h>
|
||||
#include <velocypack/velocypack-aliases.h>
|
||||
|
||||
using namespace arangodb;
|
||||
using namespace arangodb::basics;
|
||||
|
||||
namespace {
|
||||
// whenever the format of the generated UUIDs changes, please make sure to
|
||||
// adjust this regex too!
|
||||
std::regex const uuidRegex("^(SNGL|CRDN|PRMR|AGNT)-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$");
|
||||
}
|
||||
|
||||
static constexpr char const* currentServersRegisteredPref =
|
||||
"/Current/ServersRegistered/";
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief single instance of ServerState - will live as long as the server is
|
||||
/// running
|
||||
|
@ -67,7 +81,6 @@ ServerState::ServerState()
|
|||
_host(),
|
||||
_state(STATE_UNDEFINED),
|
||||
_initialized(false),
|
||||
_foxxmaster(),
|
||||
_foxxmasterQueueupdate(false) {
|
||||
setRole(ROLE_UNDEFINED);
|
||||
}
|
||||
|
@ -158,6 +171,8 @@ std::string ServerState::roleToString(ServerState::RoleEnum role) {
|
|||
}
|
||||
|
||||
std::string ServerState::roleToShortString(ServerState::RoleEnum role) {
|
||||
// whenever anything here changes, please make sure to
|
||||
// adjust ::uuidRegex too!
|
||||
switch (role) {
|
||||
case ROLE_UNDEFINED:
|
||||
return "NONE";
|
||||
|
@ -414,7 +429,55 @@ bool ServerState::integrateIntoCluster(ServerState::RoleEnum role,
|
|||
<< roleToString(role) << " and our id is " << id;
|
||||
|
||||
// now overwrite the entry in /Current/ServersRegistered/<myId>
|
||||
return registerAtAgencyPhase2(comm, hadPersistedId);
|
||||
bool registered = registerAtAgencyPhase2(comm, hadPersistedId);
|
||||
if (!registered) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// now check the configuration of the different servers for duplicate endpoints
|
||||
AgencyCommResult result = comm.getValues(currentServersRegisteredPref);
|
||||
|
||||
if (result.successful()) {
|
||||
auto slicePath = AgencyCommManager::slicePath(currentServersRegisteredPref);
|
||||
auto valueSlice = result.slice()[0].get(slicePath);
|
||||
|
||||
if (valueSlice.isObject()) {
|
||||
// map from server UUID to endpoint
|
||||
std::unordered_map<std::string, std::string> endpoints;
|
||||
for (auto const& it : VPackObjectIterator(valueSlice)) {
|
||||
std::string const serverId = it.key.copyString();
|
||||
|
||||
if (!isUuid(serverId)) {
|
||||
continue;
|
||||
}
|
||||
if (!it.value.isObject()) {
|
||||
continue;
|
||||
}
|
||||
VPackSlice endpointSlice = it.value.get("endpoint");
|
||||
if (!endpointSlice.isString()) {
|
||||
continue;
|
||||
}
|
||||
auto it2 = endpoints.emplace(endpointSlice.copyString(), serverId);
|
||||
if (!it2.second && it2.first->first != serverId) {
|
||||
// duplicate entry!
|
||||
LOG_TOPIC("9a134", WARN, Logger::CLUSTER)
|
||||
<< "found duplicate server entry for endpoint '"
|
||||
<< endpointSlice.copyString() << "', already used by other server " << it2.first->second
|
||||
<< ". it looks like this is a (mis)configuration issue";
|
||||
// anyway, continue with startup
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// @brief whether or not "value" is a server UUID
|
||||
bool ServerState::isUuid(std::string const& value) const {
|
||||
// whenever the format of the generated UUIDs changes, please make sure to
|
||||
// adjust ::uuidRegex too!
|
||||
return std::regex_match(value, ::uuidRegex);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -451,7 +514,7 @@ void mkdir(std::string const& path) {
|
|||
}
|
||||
}
|
||||
|
||||
std::string ServerState::getUuidFilename() {
|
||||
std::string ServerState::getUuidFilename() const {
|
||||
auto dbpath = application_features::ApplicationServer::getFeature<DatabasePathFeature>(
|
||||
"DatabasePath");
|
||||
TRI_ASSERT(dbpath != nullptr);
|
||||
|
@ -481,6 +544,8 @@ bool ServerState::writePersistedId(std::string const& id) {
|
|||
}
|
||||
|
||||
std::string ServerState::generatePersistedId(RoleEnum const& role) {
|
||||
// whenever the format of the generated UUID changes, please make sure to
|
||||
// adjust ::uuidRegex too!
|
||||
std::string id =
|
||||
roleToShortString(role) + "-" + to_string(boost::uuids::random_generator()());
|
||||
writePersistedId(id);
|
||||
|
@ -507,9 +572,6 @@ std::string ServerState::getPersistedId() {
|
|||
FATAL_ERROR_EXIT();
|
||||
}
|
||||
|
||||
static constexpr char const* currentServersRegisteredPref =
|
||||
"/Current/ServersRegistered/";
|
||||
|
||||
/// @brief check equality of engines with other registered servers
|
||||
bool ServerState::checkEngineEquality(AgencyComm& comm) {
|
||||
std::string engineName = EngineSelectorFeature::engineName();
|
||||
|
@ -540,7 +602,7 @@ bool ServerState::checkEngineEquality(AgencyComm& comm) {
|
|||
/// @brief create an id for a specified role
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool ServerState::registerAtAgencyPhase1(AgencyComm& comm, const ServerState::RoleEnum& role) {
|
||||
bool ServerState::registerAtAgencyPhase1(AgencyComm& comm, ServerState::RoleEnum const& role) {
|
||||
std::string const agencyListKey = roleToAgencyListKey(role);
|
||||
std::string const latestIdKey = "Latest" + roleToAgencyKey(role) + "Id";
|
||||
|
||||
|
@ -675,12 +737,19 @@ bool ServerState::registerAtAgencyPhase1(AgencyComm& comm, const ServerState::Ro
|
|||
bool ServerState::registerAtAgencyPhase2(AgencyComm& comm, bool const hadPersistedId) {
|
||||
TRI_ASSERT(!_id.empty() && !_myEndpoint.empty());
|
||||
|
||||
while (!application_features::ApplicationServer::isStopping()) {
|
||||
std::string serverRegistrationPath = currentServersRegisteredPref + _id;
|
||||
std::string rebootIdPath = "/Current/ServersKnown/" + _id + "/rebootId";
|
||||
std::string const serverRegistrationPath = currentServersRegisteredPref + _id;
|
||||
std::string const rebootIdPath = "/Current/ServersKnown/" + _id + "/rebootId";
|
||||
|
||||
// If we generated a new UUID, this *must not* exist in the Agency, so we
|
||||
// should fail to register.
|
||||
std::vector<AgencyPrecondition> pre;
|
||||
if (!hadPersistedId) {
|
||||
pre.emplace_back(AgencyPrecondition(rebootIdPath, AgencyPrecondition::Type::EMPTY, true));
|
||||
}
|
||||
|
||||
while (!application_features::ApplicationServer::isStopping()) {
|
||||
VPackBuilder builder;
|
||||
try {
|
||||
{
|
||||
VPackObjectBuilder b(&builder);
|
||||
builder.add("endpoint", VPackValue(_myEndpoint));
|
||||
builder.add("advertisedEndpoint", VPackValue(_advertisedEndpoint));
|
||||
|
@ -688,16 +757,6 @@ bool ServerState::registerAtAgencyPhase2(AgencyComm& comm, bool const hadPersist
|
|||
builder.add("version", VPackValue(rest::Version::getNumericServerVersion()));
|
||||
builder.add("versionString", VPackValue(rest::Version::getServerVersion()));
|
||||
builder.add("engine", VPackValue(EngineSelectorFeature::engineName()));
|
||||
} catch (...) {
|
||||
LOG_TOPIC("de625", FATAL, arangodb::Logger::CLUSTER) << "out of memory";
|
||||
FATAL_ERROR_EXIT();
|
||||
}
|
||||
|
||||
// If we generated a new UUID, this *must not* exist in the Agency, so we
|
||||
// should fail to register.
|
||||
std::vector<AgencyPrecondition> pre;
|
||||
if (!hadPersistedId) {
|
||||
pre.emplace_back(AgencyPrecondition(rebootIdPath, AgencyPrecondition::Type::EMPTY, true));
|
||||
}
|
||||
|
||||
AgencyWriteTransaction trx(
|
||||
|
@ -770,7 +829,7 @@ void ServerState::setId(std::string const& id) {
|
|||
/// @brief get the short server id
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
uint32_t ServerState::getShortId() {
|
||||
uint32_t ServerState::getShortId() const {
|
||||
return _shortId.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
@ -791,7 +850,7 @@ uint64_t ServerState::getRebootId() const {
|
|||
return _rebootId;
|
||||
}
|
||||
|
||||
void ServerState::setRebootId(uint64_t const rebootId) {
|
||||
void ServerState::setRebootId(uint64_t rebootId) {
|
||||
TRI_ASSERT(rebootId > 0);
|
||||
_rebootId = rebootId;
|
||||
}
|
||||
|
|
|
@ -203,14 +203,14 @@ class ServerState {
|
|||
void setId(std::string const&);
|
||||
|
||||
/// @brief get the short id
|
||||
uint32_t getShortId();
|
||||
uint32_t getShortId() const;
|
||||
|
||||
/// @brief set the server short id
|
||||
void setShortId(uint32_t);
|
||||
|
||||
uint64_t getRebootId() const;
|
||||
|
||||
void setRebootId(uint64_t const rebootId);
|
||||
void setRebootId(uint64_t rebootId);
|
||||
|
||||
/// @brief get the server endpoint
|
||||
std::string getEndpoint();
|
||||
|
@ -256,8 +256,8 @@ class ServerState {
|
|||
/// @brief sets server mode and propagates new mode to agency
|
||||
Result propagateClusterReadOnly(bool);
|
||||
|
||||
/// file where the server persists it's UUID
|
||||
std::string getUuidFilename();
|
||||
/// file where the server persists its UUID
|
||||
std::string getUuidFilename() const;
|
||||
|
||||
private:
|
||||
/// @brief atomically fetches the server role
|
||||
|
@ -278,13 +278,16 @@ class ServerState {
|
|||
ResultT<uint64_t> readRebootIdFromAgency(AgencyComm& comm);
|
||||
|
||||
/// @brief register at agency, might already be done
|
||||
bool registerAtAgencyPhase1(AgencyComm&, const RoleEnum&);
|
||||
bool registerAtAgencyPhase1(AgencyComm&, RoleEnum const&);
|
||||
|
||||
/// @brief write the Current/ServersRegistered entry
|
||||
bool registerAtAgencyPhase2(AgencyComm&, bool const hadPersistedId);
|
||||
bool registerAtAgencyPhase2(AgencyComm&, bool hadPersistedId);
|
||||
|
||||
void setFoxxmasterSinceNow();
|
||||
|
||||
/// @brief whether or not "value" is a server UUID
|
||||
bool isUuid(std::string const& value) const;
|
||||
|
||||
private:
|
||||
/// @brief server role
|
||||
std::atomic<RoleEnum> _role;
|
||||
|
@ -334,10 +337,10 @@ class ServerState {
|
|||
/// @brief whether or not the cluster was initialized
|
||||
bool _initialized;
|
||||
|
||||
std::string _foxxmaster;
|
||||
|
||||
bool _foxxmasterQueueupdate;
|
||||
|
||||
std::string _foxxmaster;
|
||||
|
||||
// @brief point in time since which this server is the Foxxmaster
|
||||
TRI_voc_tick_t _foxxmasterSince;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue