mirror of https://gitee.com/bigwinds/arangodb
supervising on
This commit is contained in:
parent
439118fbfa
commit
e3b78a8f6e
|
@ -105,6 +105,16 @@ std::string Node::uri() const {
|
||||||
return path.str();
|
return path.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Node::Node(Node&& other) :
|
||||||
|
_node_name(std::move(other._node_name)),
|
||||||
|
_children(std::move(other._children)),
|
||||||
|
_value(std::move(other._value)) {}
|
||||||
|
|
||||||
|
Node::Node(Node const& other) :
|
||||||
|
_node_name(other._node_name),
|
||||||
|
_children(other._children),
|
||||||
|
_value(other._value) {}
|
||||||
|
|
||||||
// Assignment of rhs slice
|
// Assignment of rhs slice
|
||||||
Node& Node::operator=(VPackSlice const& slice) {
|
Node& Node::operator=(VPackSlice const& slice) {
|
||||||
// 1. remove any existing time to live entry
|
// 1. remove any existing time to live entry
|
||||||
|
@ -119,17 +129,29 @@ Node& Node::operator=(VPackSlice const& slice) {
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Assignment of rhs node
|
||||||
|
Node& Node::operator=(Node&& rhs) {
|
||||||
|
// 1. remove any existing time to live entry
|
||||||
|
// 2. copy children map
|
||||||
|
// 3. copy from rhs to buffer pointer
|
||||||
|
// Must not copy rhs's _parent, _ttl, _observers
|
||||||
|
removeTimeToLive();
|
||||||
|
_node_name = std::move(rhs._node_name);
|
||||||
|
_children = std::move(rhs._children);
|
||||||
|
_value = std::move(rhs._value);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
// Assignment of rhs node
|
// Assignment of rhs node
|
||||||
Node& Node::operator=(Node const& rhs) {
|
Node& Node::operator=(Node const& rhs) {
|
||||||
// 1. remove any existing time to live entry
|
// 1. remove any existing time to live entry
|
||||||
// 2. clear children map
|
// 2. clear children map
|
||||||
// 3. copy from rhs to buffer pointer
|
// 3. move from rhs to buffer pointer
|
||||||
// 4. inform all observers here and above
|
// Must not move rhs's _parent, _ttl, _observers
|
||||||
// Must not copy rhs's _parent, _ttl, _observers
|
|
||||||
removeTimeToLive();
|
removeTimeToLive();
|
||||||
_node_name = rhs._node_name;
|
_node_name = rhs._node_name;
|
||||||
_value = rhs._value;
|
|
||||||
_children = rhs._children;
|
_children = rhs._children;
|
||||||
|
_value = rhs._value;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -88,6 +88,9 @@ class Node {
|
||||||
/// @brief Construct with name
|
/// @brief Construct with name
|
||||||
explicit Node(std::string const& name);
|
explicit Node(std::string const& name);
|
||||||
|
|
||||||
|
Node(Node const& other);
|
||||||
|
Node(Node&& other);
|
||||||
|
|
||||||
/// @brief Construct with name and introduce to tree under parent
|
/// @brief Construct with name and introduce to tree under parent
|
||||||
Node(std::string const& name, Node* parent);
|
Node(std::string const& name, Node* parent);
|
||||||
|
|
||||||
|
@ -105,6 +108,7 @@ class Node {
|
||||||
|
|
||||||
/// @brief Apply rhs to this node (deep copy of rhs)
|
/// @brief Apply rhs to this node (deep copy of rhs)
|
||||||
Node& operator=(Node const& node);
|
Node& operator=(Node const& node);
|
||||||
|
Node& operator=(Node&& node);
|
||||||
|
|
||||||
/// @brief Apply value slice to this node
|
/// @brief Apply value slice to this node
|
||||||
Node& operator=(arangodb::velocypack::Slice const&);
|
Node& operator=(arangodb::velocypack::Slice const&);
|
||||||
|
|
|
@ -99,6 +99,36 @@ inline static bool endpointPathFromUrl(std::string const& url,
|
||||||
// Create with name
|
// Create with name
|
||||||
Store::Store(std::string const& name) : Thread(name), _node(name, this) {}
|
Store::Store(std::string const& name) : Thread(name), _node(name, this) {}
|
||||||
|
|
||||||
|
Store::Store(Store const& other) :
|
||||||
|
Thread(other._node.name()), _agent(other._agent), _timeTable(other._timeTable),
|
||||||
|
_observerTable(other._observerTable), _observedTable(other._observedTable),
|
||||||
|
_node(other._node) {}
|
||||||
|
|
||||||
|
Store::Store(Store&& other) :
|
||||||
|
Thread(other._node.name()), _agent(std::move(other._agent)),
|
||||||
|
_timeTable(std::move(other._timeTable)),
|
||||||
|
_observerTable(std::move(other._observerTable)),
|
||||||
|
_observedTable(std::move(other._observedTable)),
|
||||||
|
_node(std::move(other._node)) {}
|
||||||
|
|
||||||
|
Store& Store::operator=(Store const& rhs) {
|
||||||
|
_agent = rhs._agent;
|
||||||
|
_timeTable = rhs._timeTable;
|
||||||
|
_observerTable = rhs._observerTable;
|
||||||
|
_observedTable = rhs._observedTable;
|
||||||
|
_node = rhs._node;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Store& Store::operator=(Store&& rhs) {
|
||||||
|
_agent = std::move(rhs._agent);
|
||||||
|
_timeTable = std::move(rhs._timeTable);
|
||||||
|
_observerTable = std::move(rhs._observerTable);
|
||||||
|
_observedTable = std::move(rhs._observedTable);
|
||||||
|
_node = std::move(rhs._node);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
// Default ctor
|
// Default ctor
|
||||||
Store::~Store() {}
|
Store::~Store() {}
|
||||||
|
|
||||||
|
|
|
@ -40,6 +40,18 @@ class Store : public arangodb::Thread {
|
||||||
/// @brief Destruct
|
/// @brief Destruct
|
||||||
virtual ~Store();
|
virtual ~Store();
|
||||||
|
|
||||||
|
/// @brief Copy constructor
|
||||||
|
Store (Store const& other);
|
||||||
|
|
||||||
|
/// @brief Move constructor
|
||||||
|
Store (Store&& other);
|
||||||
|
|
||||||
|
// @brief Copy assignent
|
||||||
|
Store& operator= (Store const& rhs);
|
||||||
|
|
||||||
|
// @brief Move assigment
|
||||||
|
Store& operator= (Store&& rhs);
|
||||||
|
|
||||||
/// @brief Apply entry in query
|
/// @brief Apply entry in query
|
||||||
std::vector<bool> apply(query_t const& query);
|
std::vector<bool> apply(query_t const& query);
|
||||||
|
|
||||||
|
|
|
@ -29,11 +29,23 @@
|
||||||
#include "Basics/ConditionLocker.h"
|
#include "Basics/ConditionLocker.h"
|
||||||
#include "VocBase/server.h"
|
#include "VocBase/server.h"
|
||||||
|
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
using namespace arangodb;
|
using namespace arangodb;
|
||||||
|
|
||||||
namespace arangodb {
|
namespace arangodb {
|
||||||
namespace consensus {
|
namespace consensus {
|
||||||
|
|
||||||
|
std::string printTimestamp(Supervision::TimePoint const& t) {
|
||||||
|
time_t tt = std::chrono::system_clock::to_time_t(t);
|
||||||
|
struct tm tb;
|
||||||
|
size_t const len (21);
|
||||||
|
char buffer[len];
|
||||||
|
TRI_gmtime(tt, &tb);
|
||||||
|
::strftime(buffer, sizeof(buffer), "%Y-%m-%dT%H:%M:%SZ", &tb);
|
||||||
|
return std::string(buffer, len);
|
||||||
|
}
|
||||||
|
|
||||||
inline arangodb::consensus::write_ret_t makeReport(Agent* _agent,
|
inline arangodb::consensus::write_ret_t makeReport(Agent* _agent,
|
||||||
Builder const& report) {
|
Builder const& report) {
|
||||||
query_t envelope = std::make_shared<Builder>();
|
query_t envelope = std::make_shared<Builder>();
|
||||||
|
@ -48,17 +60,58 @@ inline arangodb::consensus::write_ret_t makeReport(Agent* _agent,
|
||||||
return _agent->write(envelope);
|
return _agent->write(envelope);
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string const pendingPrefix = "/arango/Supervision/Jobs/Pending/";
|
static std::string const pendingPrefix = "/Supervision/Jobs/Pending/";
|
||||||
static std::string const collectionsPrefix = "/arango/Plan/Collections/";
|
static std::string const collectionsPrefix = "/Plan/Collections/";
|
||||||
|
static std::string const toDoPrefix = "/Target/ToDo";
|
||||||
|
|
||||||
struct FailedServerJob {
|
struct MoveShard : public Job {
|
||||||
FailedServerJob(Node const& snapshot, Agent* agent, uint64_t jobId,
|
|
||||||
std::string const& failed) {
|
MoveShard (std::string const& creator, std::string const& database,
|
||||||
|
std::string const& collection, std::string const& shard,
|
||||||
|
std::string const& fromServer, std::string const& toServer,
|
||||||
|
uint64_t const& jobId, std::string const& agencyPrefix,
|
||||||
|
Agent* agent) {
|
||||||
|
|
||||||
|
todoEntry (creator, database, collection, shard, fromServer, toServer,
|
||||||
|
jobId, agencyPrefix, agent);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void todoEntry (std::string const& creator, std::string const& database,
|
||||||
|
std::string const& collection, std::string const& shard,
|
||||||
|
std::string const& fromServer, std::string const& toServer,
|
||||||
|
uint64_t const& jobId, std::string const& agencyPrefix,
|
||||||
|
Agent* agent) {
|
||||||
|
Builder todo;
|
||||||
|
todo.openArray(); todo.openObject();
|
||||||
|
todo.add(VPackValue(agencyPrefix + toDoPrefix + "/"
|
||||||
|
+ std::to_string(jobId)));
|
||||||
|
{
|
||||||
|
VPackObjectBuilder entry(&todo);
|
||||||
|
todo.add("creator", VPackValue(creator));
|
||||||
|
todo.add("type", VPackValue("moveShard"));
|
||||||
|
todo.add("database", VPackValue(database));
|
||||||
|
todo.add("collection", VPackValue(collection));
|
||||||
|
todo.add("shard", VPackValue(shard));
|
||||||
|
todo.add("fromServer", VPackValue(fromServer));
|
||||||
|
todo.add("toServer", VPackValue(toServer));
|
||||||
|
}
|
||||||
|
todo.close(); todo.close();
|
||||||
|
write_ret_t ret = makeReport(agent, todo);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
struct FailedServer : public Job {
|
||||||
|
FailedServer(Node const& snapshot, Agent* agent, uint64_t jobId,
|
||||||
|
std::string const& failed, std::string agencyPrefix) {
|
||||||
// 1. find all shards in plan, where failed was leader.
|
// 1. find all shards in plan, where failed was leader.
|
||||||
// 2. swap positions in plan between failed and a random in sync follower
|
// 2. swap positions in plan between failed and a random in sync follower
|
||||||
|
|
||||||
Node::Children const& databases =
|
Node::Children const& databases =
|
||||||
snapshot("/arango/Plan/Collections").children();
|
snapshot("/Plan/Collections").children();
|
||||||
|
|
||||||
for (auto const& database : databases) {
|
for (auto const& database : databases) {
|
||||||
for (auto const& collptr : database.second->children()) {
|
for (auto const& collptr : database.second->children()) {
|
||||||
|
@ -72,22 +125,25 @@ struct FailedServerJob {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
reportJobInSupervision(jobId, shard, failed);
|
//MoveShard ()
|
||||||
planChanges(collptr, database, shard);
|
reportJobInSupervision(jobId, shard, failed, agencyPrefix);
|
||||||
|
planChanges(collptr, database, shard, agencyPrefix);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void reportJobInSupervision(
|
void reportJobInSupervision(uint64_t jobId,
|
||||||
uint64_t jobId,
|
std::pair<std::string,
|
||||||
std::pair<std::string, std::shared_ptr<Node>> const& shard,
|
std::shared_ptr<Node>> const& shard,
|
||||||
std::string const& serverID) {
|
std::string const& serverID,
|
||||||
|
std::string const& agencyPrefix) {
|
||||||
|
|
||||||
std::string const& shardId = shard.first;
|
std::string const& shardId = shard.first;
|
||||||
VPackSlice const& dbservers = shard.second->slice();
|
VPackSlice const& dbservers = shard.second->slice();
|
||||||
std::string path =
|
std::string path = agencyPrefix + pendingPrefix
|
||||||
pendingPrefix + arangodb::basics::StringUtils::itoa(jobId);
|
+ arangodb::basics::StringUtils::itoa(jobId);
|
||||||
query_t envelope = std::make_shared<Builder>();
|
query_t envelope = std::make_shared<Builder>();
|
||||||
|
|
||||||
Builder report;
|
Builder report;
|
||||||
|
@ -113,17 +169,16 @@ struct FailedServerJob {
|
||||||
report.close();
|
report.close();
|
||||||
// makeReport(envelope, report);
|
// makeReport(envelope, report);
|
||||||
|
|
||||||
LOG(WARN) << report.toJson();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void planChanges(
|
void planChanges(
|
||||||
std::pair<std::string, std::shared_ptr<Node>> const& database,
|
std::pair<std::string, std::shared_ptr<Node>> const& database,
|
||||||
std::pair<std::string, std::shared_ptr<Node>> const& collection,
|
std::pair<std::string, std::shared_ptr<Node>> const& collection,
|
||||||
std::pair<std::string, std::shared_ptr<Node>> const& shard) {
|
std::pair<std::string, std::shared_ptr<Node>> const& shard,
|
||||||
std::string path = collectionsPrefix + database.first + "/" +
|
std::string const& agencyPrefix) {
|
||||||
|
std::string path = agencyPrefix + collectionsPrefix + database.first + "/" +
|
||||||
collection.first + "/shards/" + shard.first;
|
collection.first + "/shards/" + shard.first;
|
||||||
|
|
||||||
LOG(WARN) << path;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -131,6 +186,8 @@ struct FailedServerJob {
|
||||||
|
|
||||||
using namespace arangodb::consensus;
|
using namespace arangodb::consensus;
|
||||||
|
|
||||||
|
std::string Supervision::_agencyPrefix = "/arango";
|
||||||
|
|
||||||
Supervision::Supervision()
|
Supervision::Supervision()
|
||||||
: arangodb::Thread("Supervision"),
|
: arangodb::Thread("Supervision"),
|
||||||
_agent(nullptr),
|
_agent(nullptr),
|
||||||
|
@ -144,22 +201,14 @@ Supervision::~Supervision() { shutdown(); };
|
||||||
|
|
||||||
void Supervision::wakeUp() {
|
void Supervision::wakeUp() {
|
||||||
TRI_ASSERT(_agent != nullptr);
|
TRI_ASSERT(_agent != nullptr);
|
||||||
_snapshot = _agent->readDB().get("/");
|
_snapshot = _agent->readDB().get(_agencyPrefix);
|
||||||
_cv.signal();
|
_cv.signal();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string printTimestamp(Supervision::TimePoint const& t) {
|
static std::string const syncPrefix = "/Sync/ServerStates/";
|
||||||
time_t tt = std::chrono::system_clock::to_time_t(t);
|
static std::string const supervisionPrefix = "/Supervision/Health/";
|
||||||
struct tm tb;
|
static std::string const planDBServersPrefix = "/Plan/DBServers";
|
||||||
char buffer[21];
|
|
||||||
TRI_gmtime(tt, &tb);
|
|
||||||
size_t len = ::strftime(buffer, sizeof(buffer), "%Y-%m-%dT%H:%M:%SZ", &tb);
|
|
||||||
return std::string(buffer, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string const syncPrefix = "/arango/Sync/ServerStates/";
|
|
||||||
static std::string const supervisionPrefix = "/arango/Supervision/Health/";
|
|
||||||
static std::string const planDBServersPrefix = "/arango/Plan/DBServers";
|
|
||||||
std::vector<check_t> Supervision::checkDBServers() {
|
std::vector<check_t> Supervision::checkDBServers() {
|
||||||
std::vector<check_t> ret;
|
std::vector<check_t> ret;
|
||||||
Node::Children const& machinesPlanned =
|
Node::Children const& machinesPlanned =
|
||||||
|
@ -179,7 +228,7 @@ std::vector<check_t> Supervision::checkDBServers() {
|
||||||
report->openArray();
|
report->openArray();
|
||||||
report->openArray();
|
report->openArray();
|
||||||
report->openObject();
|
report->openObject();
|
||||||
report->add(supervisionPrefix + serverID,
|
report->add(_agencyPrefix + supervisionPrefix + serverID,
|
||||||
VPackValue(VPackValueType::Object));
|
VPackValue(VPackValueType::Object));
|
||||||
report->add("LastHearbeatReceived",
|
report->add("LastHearbeatReceived",
|
||||||
VPackValue(printTimestamp(it->second->myTimestamp)));
|
VPackValue(printTimestamp(it->second->myTimestamp)));
|
||||||
|
@ -194,8 +243,8 @@ std::vector<check_t> Supervision::checkDBServers() {
|
||||||
if (t.count() > _gracePeriod) { // Failure
|
if (t.count() > _gracePeriod) { // Failure
|
||||||
if (it->second->maintenance() == 0) {
|
if (it->second->maintenance() == 0) {
|
||||||
it->second->maintenance(TRI_NewTickServer());
|
it->second->maintenance(TRI_NewTickServer());
|
||||||
FailedServerJob fsj(_snapshot, _agent, it->second->maintenance(),
|
FailedServer fsj(_snapshot, _agent, it->second->maintenance(),
|
||||||
serverID);
|
serverID, _agencyPrefix);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -219,7 +268,6 @@ std::vector<check_t> Supervision::checkDBServers() {
|
||||||
auto itr = _vitalSigns.begin();
|
auto itr = _vitalSigns.begin();
|
||||||
while (itr != _vitalSigns.end()) {
|
while (itr != _vitalSigns.end()) {
|
||||||
if (machinesPlanned.find(itr->first) == machinesPlanned.end()) {
|
if (machinesPlanned.find(itr->first) == machinesPlanned.end()) {
|
||||||
LOG(WARN) << itr->first << " shut down!";
|
|
||||||
itr = _vitalSigns.erase(itr);
|
itr = _vitalSigns.erase(itr);
|
||||||
} else {
|
} else {
|
||||||
++itr;
|
++itr;
|
||||||
|
@ -229,16 +277,12 @@ std::vector<check_t> Supervision::checkDBServers() {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Supervision::moveShard(std::string const& from, std::string const& to) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Supervision::doChecks(bool timedout) {
|
bool Supervision::doChecks(bool timedout) {
|
||||||
if (_agent == nullptr) {
|
if (_agent == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
_snapshot = _agent->readDB().get("/");
|
_snapshot = _agent->readDB().get(_agencyPrefix);
|
||||||
|
|
||||||
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Sanity checks";
|
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Sanity checks";
|
||||||
/*std::vector<check_t> ret = */checkDBServers();
|
/*std::vector<check_t> ret = */checkDBServers();
|
||||||
|
@ -247,27 +291,49 @@ bool Supervision::doChecks(bool timedout) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Supervision::run() {
|
void Supervision::run() {
|
||||||
|
|
||||||
CONDITION_LOCKER(guard, _cv);
|
CONDITION_LOCKER(guard, _cv);
|
||||||
TRI_ASSERT(_agent != nullptr);
|
TRI_ASSERT(_agent != nullptr);
|
||||||
bool timedout = false;
|
bool timedout = false;
|
||||||
|
|
||||||
while (!this->isStopping()) {
|
while (!this->isStopping()) {
|
||||||
|
|
||||||
|
// Get agency prefix after cluster init
|
||||||
|
if (_jobId == 0) {
|
||||||
|
if (!updateAgencyPrefix(10)) {
|
||||||
|
LOG_TOPIC(ERR, Logger::AGENCY)
|
||||||
|
<< "Cannot get prefix from Agency. Stopping supervision for good.";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get bunch of job IDs from agency for future jobs
|
||||||
|
if (_jobId == 0 || _jobId == _jobIdMax) {
|
||||||
|
if (!getUniqueIds()) {
|
||||||
|
LOG_TOPIC(ERR, Logger::AGENCY)
|
||||||
|
<< "Cannot get unique IDs from Agency. Stopping supervision for good.";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
MoveShard ("coordinator1", "_system", "41", "s42", "DBServer1",
|
||||||
|
"DBServer2", _jobId++, _agencyPrefix, _agent);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait unless leader
|
||||||
if (_agent->leading()) {
|
if (_agent->leading()) {
|
||||||
timedout = _cv.wait(_frequency * 1000000); // quarter second
|
timedout = _cv.wait(_frequency * 1000000); // quarter second
|
||||||
} else {
|
} else {
|
||||||
_cv.wait();
|
_cv.wait();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_jobId == 0 || _jobId == _jobIdMax) {
|
// Do supervision
|
||||||
if (!getUniqueIds()) {
|
|
||||||
LOG_TOPIC(ERR, Logger::AGENCY) << "Cannot get unique IDs from Agency. "
|
|
||||||
"Stopping supervision for good.";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
doChecks(timedout);
|
doChecks(timedout);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start thread
|
// Start thread
|
||||||
|
@ -280,20 +346,37 @@ bool Supervision::start() {
|
||||||
bool Supervision::start(Agent* agent) {
|
bool Supervision::start(Agent* agent) {
|
||||||
_agent = agent;
|
_agent = agent;
|
||||||
_frequency = static_cast<long>(_agent->config().supervisionFrequency);
|
_frequency = static_cast<long>(_agent->config().supervisionFrequency);
|
||||||
_snapshot = _agent->readDB().get("/");
|
|
||||||
|
|
||||||
updateFromAgency();
|
|
||||||
|
|
||||||
return start();
|
return start();
|
||||||
}
|
}
|
||||||
|
|
||||||
#include <iostream>
|
// Get agency prefix fron agency
|
||||||
|
bool Supervision::updateAgencyPrefix (size_t nTries, int intervalSec) {
|
||||||
|
|
||||||
|
// Try nTries to get agency's prefix in intervals
|
||||||
|
for (size_t i = 0; i < nTries; i++) {
|
||||||
|
_snapshot = _agent->readDB().get("/");
|
||||||
|
if (_snapshot.children().size() > 0) {
|
||||||
|
_agencyPrefix = _snapshot.children().begin()->first;
|
||||||
|
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Agency prefix is " << _agencyPrefix;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
std::this_thread::sleep_for (std::chrono::seconds(intervalSec));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stand-alone agency
|
||||||
|
return false;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string const syncLatest = "/Sync/LatestID";
|
||||||
|
// Get bunch of cluster's unique ids from agency
|
||||||
bool Supervision::getUniqueIds() {
|
bool Supervision::getUniqueIds() {
|
||||||
uint64_t latestId;
|
uint64_t latestId;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
latestId = std::stoul(
|
latestId = std::stoul(
|
||||||
_agent->readDB().get("/arango/Sync/LatestID").slice().toJson());
|
_agent->readDB().get(_agencyPrefix + "/Sync/LatestID").slice().toJson());
|
||||||
} catch (std::exception const& e) {
|
} catch (std::exception const& e) {
|
||||||
LOG(WARN) << e.what();
|
LOG(WARN) << e.what();
|
||||||
return false;
|
return false;
|
||||||
|
@ -304,11 +387,10 @@ bool Supervision::getUniqueIds() {
|
||||||
Builder uniq;
|
Builder uniq;
|
||||||
uniq.openArray();
|
uniq.openArray();
|
||||||
uniq.openObject();
|
uniq.openObject();
|
||||||
uniq.add("/arango/Sync/LatestID",
|
uniq.add(_agencyPrefix + syncLatest, VPackValue(latestId + 100000)); // new
|
||||||
VPackValue(latestId + 100000)); // new val
|
|
||||||
uniq.close();
|
uniq.close();
|
||||||
uniq.openObject();
|
uniq.openObject();
|
||||||
uniq.add("/arango/Sync/LatestID", VPackValue(latestId)); // precond
|
uniq.add(_agencyPrefix + syncLatest, VPackValue(latestId)); // precond
|
||||||
uniq.close();
|
uniq.close();
|
||||||
uniq.close();
|
uniq.close();
|
||||||
|
|
||||||
|
@ -321,7 +403,7 @@ bool Supervision::getUniqueIds() {
|
||||||
}
|
}
|
||||||
|
|
||||||
latestId = std::stoul(
|
latestId = std::stoul(
|
||||||
_agent->readDB().get("/arango/Sync/LatestID").slice().toJson());
|
_agent->readDB().get(_agencyPrefix + "/Sync/LatestID").slice().toJson());
|
||||||
}
|
}
|
||||||
|
|
||||||
return success;
|
return success;
|
||||||
|
@ -329,12 +411,13 @@ bool Supervision::getUniqueIds() {
|
||||||
|
|
||||||
void Supervision::updateFromAgency() {
|
void Supervision::updateFromAgency() {
|
||||||
auto const& jobsPending =
|
auto const& jobsPending =
|
||||||
_snapshot("/arango/Supervision/Jobs/Pending").children();
|
_snapshot("/Supervision/Jobs/Pending").children();
|
||||||
|
|
||||||
for (auto const& jobent : jobsPending) {
|
for (auto const& jobent : jobsPending) {
|
||||||
auto const& job = *(jobent.second);
|
auto const& job = *(jobent.second);
|
||||||
|
|
||||||
LOG(WARN) << job.name() << " " << job("failed").toJson() << job("");
|
LOG_TOPIC(WARN, Logger::AGENCY)
|
||||||
|
<< job.name() << " " << job("failed").toJson() << job("");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -343,4 +426,6 @@ void Supervision::beginShutdown() {
|
||||||
Thread::beginShutdown();
|
Thread::beginShutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
Store const& Supervision::store() const { return _agent->readDB(); }
|
Store const& Supervision::store() const {
|
||||||
|
return _agent->readDB();
|
||||||
|
}
|
||||||
|
|
|
@ -50,10 +50,6 @@ struct Job {
|
||||||
Job() {}
|
Job() {}
|
||||||
~Job() {}
|
~Job() {}
|
||||||
};
|
};
|
||||||
struct FailedServersJob : public Job {
|
|
||||||
FailedServersJob();
|
|
||||||
~FailedServersJob();
|
|
||||||
};
|
|
||||||
|
|
||||||
struct check_t {
|
struct check_t {
|
||||||
bool good;
|
bool good;
|
||||||
|
@ -128,6 +124,10 @@ class Supervision : public arangodb::Thread {
|
||||||
void wakeUp();
|
void wakeUp();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
/// @brief Update agency prefix from agency itself
|
||||||
|
bool updateAgencyPrefix (size_t nTries = 10, int intervalSec = 1);
|
||||||
|
|
||||||
/// @brief Move shard from one db server to other db server
|
/// @brief Move shard from one db server to other db server
|
||||||
bool moveShard(std::string const& from, std::string const& to);
|
bool moveShard(std::string const& from, std::string const& to);
|
||||||
|
|
||||||
|
@ -167,6 +167,8 @@ class Supervision : public arangodb::Thread {
|
||||||
long _gracePeriod;
|
long _gracePeriod;
|
||||||
long _jobId;
|
long _jobId;
|
||||||
long _jobIdMax;
|
long _jobIdMax;
|
||||||
|
|
||||||
|
static std::string _agencyPrefix;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -579,14 +579,13 @@ bool AgencyComm::tryInitializeStructure() {
|
||||||
builder.add(VPackValue("Target"));
|
builder.add(VPackValue("Target"));
|
||||||
{
|
{
|
||||||
VPackObjectBuilder c(&builder);
|
VPackObjectBuilder c(&builder);
|
||||||
addEmptyVPackObject("Coordinators", builder);
|
|
||||||
builder.add(VPackValue("Collections"));
|
builder.add(VPackValue("Collections"));
|
||||||
{
|
{
|
||||||
VPackObjectBuilder d(&builder);
|
VPackObjectBuilder d(&builder);
|
||||||
addEmptyVPackObject("_system", builder);
|
addEmptyVPackObject("_system", builder);
|
||||||
}
|
}
|
||||||
builder.add("Version", VPackValue(1));
|
addEmptyVPackObject("Coordinators", builder);
|
||||||
addEmptyVPackObject("MapLocalToID", builder);
|
addEmptyVPackObject("DBServers", builder);
|
||||||
builder.add(VPackValue("Databases"));
|
builder.add(VPackValue("Databases"));
|
||||||
{
|
{
|
||||||
VPackObjectBuilder d(&builder);
|
VPackObjectBuilder d(&builder);
|
||||||
|
@ -597,19 +596,20 @@ bool AgencyComm::tryInitializeStructure() {
|
||||||
builder.add("id", VPackValue("1"));
|
builder.add("id", VPackValue("1"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
addEmptyVPackObject("DBServers", builder);
|
|
||||||
builder.add("Lock", VPackValue("UNLOCKED"));
|
builder.add("Lock", VPackValue("UNLOCKED"));
|
||||||
|
addEmptyVPackObject("MapLocalToID", builder);
|
||||||
|
addEmptyVPackObject("Failed", builder);
|
||||||
|
addEmptyVPackObject("Finished", builder);
|
||||||
|
addEmptyVPackObject("Pending", builder);
|
||||||
|
addEmptyVPackObject("ToDo", builder);
|
||||||
|
builder.add("Version", VPackValue(1));
|
||||||
}
|
}
|
||||||
builder.add(VPackValue("Supervision"));
|
builder.add(VPackValue("Supervision"));
|
||||||
{
|
{
|
||||||
VPackObjectBuilder c(&builder);
|
VPackObjectBuilder c(&builder);
|
||||||
builder.add(VPackValue("Jobs"));
|
addEmptyVPackObject("Health", builder);
|
||||||
{
|
addEmptyVPackObject("Shards", builder);
|
||||||
VPackObjectBuilder d(&builder);
|
addEmptyVPackObject("DBServers", builder);
|
||||||
addEmptyVPackObject("Pending", builder);
|
|
||||||
addEmptyVPackObject("Finished", builder);
|
|
||||||
addEmptyVPackObject("Failed", builder);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
builder.add("InitDone", VPackValue(true));
|
builder.add("InitDone", VPackValue(true));
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
|
|
Loading…
Reference in New Issue