mirror of https://gitee.com/bigwinds/arangodb
Supervision Job for Active Failover (#5066)
This commit is contained in:
parent
646db8ca0a
commit
45fbed497b
|
@ -0,0 +1,299 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Simon Grätzer
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "ActiveFailoverJob.h"
|
||||
|
||||
#include "Agency/AgentInterface.h"
|
||||
#include "Agency/Job.h"
|
||||
#include "Agency/JobContext.h"
|
||||
#include "Agency/Store.h"
|
||||
#include "Cluster/ClusterHelpers.h"
|
||||
#include "VocBase/voc-types.h"
|
||||
|
||||
using namespace arangodb;
|
||||
using namespace arangodb::consensus;
|
||||
|
||||
ActiveFailoverJob::ActiveFailoverJob(Node const& snapshot, AgentInterface* agent,
|
||||
std::string const& jobId, std::string const& creator,
|
||||
std::string const& failed)
|
||||
: Job(NOTFOUND, snapshot, agent, jobId, creator),
|
||||
_server(failed) { }
|
||||
|
||||
ActiveFailoverJob::ActiveFailoverJob(Node const& snapshot, AgentInterface* agent,
|
||||
JOB_STATUS status, std::string const& jobId)
|
||||
: Job(status, snapshot, agent, jobId) {
|
||||
// Get job details from agency:
|
||||
std::string path = pos[status] + _jobId + "/";
|
||||
auto tmp_server = _snapshot.hasAsString(path + "server");
|
||||
auto tmp_creator = _snapshot.hasAsString(path + "creator");
|
||||
|
||||
if (tmp_server.second && tmp_creator.second) {
|
||||
_server = tmp_server.first;
|
||||
_creator = tmp_creator.first;
|
||||
} else {
|
||||
std::stringstream err;
|
||||
err << "Failed to find job " << _jobId << " in agency.";
|
||||
LOG_TOPIC(ERR, Logger::SUPERVISION) << err.str();
|
||||
finish(tmp_server.first, "", false, err.str());
|
||||
_status = FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
ActiveFailoverJob::~ActiveFailoverJob() {}
|
||||
|
||||
void ActiveFailoverJob::run() {
|
||||
runHelper(_server, "");
|
||||
}
|
||||
|
||||
bool ActiveFailoverJob::create(std::shared_ptr<VPackBuilder> envelope) {
|
||||
|
||||
LOG_TOPIC(DEBUG, Logger::SUPERVISION)
|
||||
<< "Todo: Handle failover for leader " + _server;
|
||||
|
||||
bool selfCreate = (envelope == nullptr); // Do we create ourselves?
|
||||
|
||||
if (selfCreate) {
|
||||
_jb = std::make_shared<Builder>();
|
||||
} else {
|
||||
_jb = envelope;
|
||||
}
|
||||
|
||||
auto now = timepointToString(std::chrono::system_clock::now());
|
||||
{ VPackArrayBuilder transaction(_jb.get());
|
||||
{ VPackObjectBuilder operations(_jb.get());
|
||||
// Todo entry
|
||||
_jb->add(VPackValue(toDoPrefix + _jobId));
|
||||
{ VPackObjectBuilder todo(_jb.get());
|
||||
_jb->add("creator", VPackValue(_creator));
|
||||
_jb->add("type", VPackValue("activeFailover"));
|
||||
_jb->add("server", VPackValue(_server));
|
||||
_jb->add("jobId", VPackValue(_jobId));
|
||||
_jb->add("timeCreated", VPackValue(now));
|
||||
} // todo
|
||||
|
||||
// FailedServers entry []
|
||||
_jb->add(VPackValue(failedServersPrefix + "/" + _server));
|
||||
{ VPackArrayBuilder failedServers(_jb.get()); }
|
||||
} // Operations
|
||||
|
||||
//Preconditions
|
||||
{ VPackObjectBuilder health(_jb.get());
|
||||
// Status should still be BAD
|
||||
addPreconditionServerHealth(*_jb, _server, Supervision::HEALTH_STATUS_BAD);
|
||||
// Target/FailedServers does not already include _server
|
||||
_jb->add(VPackValue(failedServersPrefix + "/" + _server));
|
||||
{ VPackObjectBuilder old(_jb.get());
|
||||
_jb->add("oldEmpty", VPackValue(true)); }
|
||||
// Target/FailedServers is still as in the snapshot
|
||||
_jb->add(VPackValue(failedServersPrefix));
|
||||
{ VPackObjectBuilder old(_jb.get());
|
||||
_jb->add("old", _snapshot(failedServersPrefix).toBuilder().slice());}
|
||||
} // Preconditions
|
||||
} // transactions
|
||||
|
||||
_status = TODO;
|
||||
|
||||
if (!selfCreate) {
|
||||
return true;
|
||||
}
|
||||
|
||||
write_ret_t res = singleWriteTransaction(_agent, *_jb);
|
||||
if (res.accepted && res.indices.size() == 1 && res.indices[0]) {
|
||||
return true;
|
||||
}
|
||||
|
||||
_status = NOTFOUND;
|
||||
|
||||
LOG_TOPIC(INFO, Logger::SUPERVISION) << "Failed to insert job " + _jobId;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ActiveFailoverJob::start() {
|
||||
// If anything throws here, the run() method catches it and finishes
|
||||
// the job.
|
||||
|
||||
// Fail job, if Health back to not FAILED
|
||||
if (checkServerHealth(_snapshot, _server) != Supervision::HEALTH_STATUS_FAILED) {
|
||||
std::string reason = "Server " + _server + " is no longer failed. " +
|
||||
"Not starting ActiveFailoverJob job";
|
||||
LOG_TOPIC(INFO, Logger::SUPERVISION) << reason;
|
||||
return finish(_server, "", true, reason); // move to /Target/Finished
|
||||
}
|
||||
|
||||
auto leader = _snapshot.hasAsSlice(asyncReplLeader);
|
||||
if (!leader.second || leader.first.compareString(_server) != 0) {
|
||||
std::string reason = "Server " + _server + " is not the current replication leader";
|
||||
LOG_TOPIC(INFO, Logger::SUPERVISION) << reason;
|
||||
return finish(_server, "", true, reason); // move to /Target/Finished
|
||||
}
|
||||
|
||||
// Abort job blocking server if abortable
|
||||
auto jobId = _snapshot.hasAsString(blockedServersPrefix + _server);
|
||||
if (jobId.second && !abortable(_snapshot, jobId.first)) {
|
||||
return false;
|
||||
} else if (jobId.second) {
|
||||
JobContext(PENDING, jobId.first, _snapshot, _agent).abort();
|
||||
}
|
||||
|
||||
// Todo entry
|
||||
Builder todo;
|
||||
{ VPackArrayBuilder t(&todo);
|
||||
if (_jb == nullptr) {
|
||||
try {
|
||||
_snapshot(toDoPrefix + _jobId).toBuilder(todo);
|
||||
} catch (std::exception const&) {
|
||||
LOG_TOPIC(INFO, Logger::SUPERVISION)
|
||||
<< "Failed to get key " + toDoPrefix + _jobId + " from agency snapshot";
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
todo.add(_jb->slice()[0].get(toDoPrefix + _jobId));
|
||||
}} // Todo entry
|
||||
|
||||
std::string newLeader = findBestFollower();
|
||||
if (newLeader.empty() || _server == newLeader) {
|
||||
LOG_TOPIC(INFO, Logger::SUPERVISION) << "No server available, will retry job later";
|
||||
return false; // job will retry later
|
||||
}
|
||||
LOG_TOPIC(INFO, Logger::SUPERVISION) << "Selected '" << newLeader << "' as leader";
|
||||
|
||||
// Enter pending, remove todo
|
||||
Builder pending;
|
||||
{ VPackArrayBuilder listOfTransactions(&pending);
|
||||
|
||||
{ VPackObjectBuilder operations(&pending);
|
||||
addPutJobIntoSomewhere(pending, "Finished", todo.slice()[0]);
|
||||
addRemoveJobFromSomewhere(pending, "ToDo", _jobId);
|
||||
pending.add(asyncReplLeader, VPackValue(newLeader));
|
||||
} // mutation part of transaction done
|
||||
|
||||
// Preconditions
|
||||
{ VPackObjectBuilder precondition(&pending);
|
||||
// Failed condition persists
|
||||
addPreconditionServerHealth(pending, _server, Supervision::HEALTH_STATUS_FAILED);
|
||||
// Destination server still in good condition
|
||||
addPreconditionServerHealth(pending, newLeader, Supervision::HEALTH_STATUS_GOOD);
|
||||
// Destination server should not be blocked by another job
|
||||
addPreconditionServerNotBlocked(pending, newLeader);
|
||||
// AsyncReplication leader must be the failed server
|
||||
addPreconditionUnchanged(pending, asyncReplLeader, leader.first);
|
||||
} // precondition done
|
||||
|
||||
} // array for transaction done
|
||||
|
||||
// Transact to agency
|
||||
write_ret_t res = singleWriteTransaction(_agent, pending);
|
||||
|
||||
if (res.accepted && res.indices.size() == 1 && res.indices[0]) {
|
||||
_status = FINISHED;
|
||||
LOG_TOPIC(INFO, Logger::SUPERVISION)
|
||||
<< "Finished: ActiveFailoverJob server " << _server << " failover to " << newLeader;
|
||||
return true;
|
||||
}
|
||||
|
||||
LOG_TOPIC(INFO, Logger::SUPERVISION) << "Precondition failed for ActiveFailoverJob " + _jobId;
|
||||
return false;
|
||||
}
|
||||
|
||||
JOB_STATUS ActiveFailoverJob::status() {
|
||||
if (_status != PENDING) {
|
||||
return _status;
|
||||
}
|
||||
|
||||
TRI_ASSERT(false); // PENDING is not an option for this job, since it
|
||||
// travels directly from ToDo to Finished or Failed
|
||||
return _status;
|
||||
}
|
||||
|
||||
arangodb::Result ActiveFailoverJob::abort() {
|
||||
|
||||
// We can assume that the job is in ToDo or not there:
|
||||
if (_status == NOTFOUND || _status == FINISHED || _status == FAILED) {
|
||||
return Result(TRI_ERROR_SUPERVISION_GENERAL_FAILURE,
|
||||
"Failed aborting addFollower job beyond pending stage");
|
||||
}
|
||||
|
||||
Result result;
|
||||
// Can now only be TODO or PENDING
|
||||
if (_status == TODO) {
|
||||
finish("", "", false, "job aborted");
|
||||
return result;
|
||||
}
|
||||
|
||||
TRI_ASSERT(false); // cannot happen, since job moves directly to FINISHED
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
typedef std::pair<std::string, TRI_voc_tick_t> ServerTick;
|
||||
/// Try to select the follower most in-sync with failed leader
|
||||
std::string ActiveFailoverJob::findBestFollower() {
|
||||
std::vector<std::string> as = healthyServers(_snapshot);
|
||||
|
||||
// blocked; (not sure if this can even happen)
|
||||
try {
|
||||
for (auto const& srv : _snapshot(blockedServersPrefix).children()) {
|
||||
as.erase(std::remove(as.begin(), as.end(), srv.first), as.end());
|
||||
}
|
||||
} catch (...) {}
|
||||
|
||||
std::vector<ServerTick> ticks;
|
||||
try { // collect tick values from transient state
|
||||
query_t trx = std::make_unique<VPackBuilder>();
|
||||
{
|
||||
VPackArrayBuilder transactions(trx.get());
|
||||
VPackArrayBuilder operations(trx.get());
|
||||
trx->add(VPackValue("/" + Job::agencyPrefix + asyncReplTransientPrefix));
|
||||
}
|
||||
trans_ret_t res = _agent->transient(std::move(trx));
|
||||
|
||||
if (res.accepted) {
|
||||
VPackSlice resp = res.result->slice();
|
||||
if (!resp.isArray() || resp.length() == 0) {
|
||||
return "";
|
||||
}
|
||||
VPackSlice obj = resp.at(0).get({ Job::agencyPrefix, "AsyncReplication"});
|
||||
for (VPackObjectIterator::ObjectPair pair : VPackObjectIterator(obj)) {
|
||||
std::string srvUUID = pair.key.copyString();
|
||||
if (std::find(as.begin(), as.end(), srvUUID) == as.end()) {
|
||||
continue; // skip inaccessible servers
|
||||
}
|
||||
|
||||
VPackSlice leader = pair.value.get("leader"); // broken leader
|
||||
VPackSlice lastTick = pair.value.get("lastTick");
|
||||
if (leader.isString() && leader.compareString(_server) == 0 &&
|
||||
lastTick.isNumber()) {
|
||||
ticks.emplace_back(std::move(srvUUID), lastTick.getUInt());
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (...) {}
|
||||
|
||||
std::sort(ticks.begin(), ticks.end(), [&](ServerTick const& a,
|
||||
ServerTick const& b) {
|
||||
return a.second > b.second;
|
||||
});
|
||||
if (!ticks.empty()) {
|
||||
return ticks[0].first;
|
||||
}
|
||||
return ""; // fallback to any available server
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Simon Grätzer
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ARANGOD_CONSENSUS_ACTIVE_FAILOVER_JOB_H
|
||||
#define ARANGOD_CONSENSUS_ACTIVE_FAILOVER_JOB_H 1
|
||||
|
||||
#include "Job.h"
|
||||
#include "Supervision.h"
|
||||
|
||||
namespace arangodb {
|
||||
namespace consensus {
|
||||
|
||||
struct ActiveFailoverJob final : public Job {
|
||||
|
||||
ActiveFailoverJob(Node const& snapshot, AgentInterface* agent, std::string const& jobId,
|
||||
std::string const& creator,
|
||||
std::string const& failed);
|
||||
|
||||
ActiveFailoverJob(Node const& snapshot, AgentInterface* agent,
|
||||
JOB_STATUS status, std::string const& jobId);
|
||||
|
||||
virtual ~ActiveFailoverJob();
|
||||
|
||||
virtual JOB_STATUS status() override final;
|
||||
virtual void run() override final;
|
||||
virtual bool create(std::shared_ptr<VPackBuilder> envelope = nullptr)
|
||||
override final;
|
||||
virtual bool start() override final;
|
||||
virtual Result abort() override final;
|
||||
|
||||
private:
|
||||
|
||||
std::string findBestFollower();
|
||||
|
||||
private:
|
||||
std::string _server;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -43,10 +43,11 @@ std::string const planColPrefix = "/Plan/Collections/";
|
|||
std::string const curColPrefix = "/Current/Collections/";
|
||||
std::string const blockedServersPrefix = "/Supervision/DBServers/";
|
||||
std::string const blockedShardsPrefix = "/Supervision/Shards/";
|
||||
std::string const serverStatePrefix = "/Sync/ServerStates/";
|
||||
std::string const planVersion = "/Plan/Version";
|
||||
std::string const plannedServers = "/Plan/DBServers";
|
||||
std::string const healthPrefix = "/Supervision/Health/";
|
||||
std::string const asyncReplLeader = "/Plan/AsyncReplication/Leader";
|
||||
std::string const asyncReplTransientPrefix = "/AsyncReplication/";
|
||||
|
||||
} // namespace arangodb::consensus
|
||||
} // namespace arangodb
|
||||
|
@ -239,6 +240,18 @@ std::vector<std::string> Job::availableServers(Node const& snapshot) {
|
|||
|
||||
}
|
||||
|
||||
/// @brief Get servers from Supervision with health status GOOD
|
||||
std::vector<std::string> Job::healthyServers(arangodb::consensus::Node const& snapshot) {
|
||||
std::vector<std::string> ret;
|
||||
for (auto const& srv : snapshot(healthPrefix).children()) {
|
||||
auto healthState = srv.second->hasAsString("Status");
|
||||
if (healthState.second && healthState.first == Supervision::HEALTH_STATUS_GOOD) {
|
||||
ret.emplace_back(srv.first);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> std::vector<size_t> idxsort (const std::vector<T> &v) {
|
||||
|
||||
std::vector<size_t> idx(v.size());
|
||||
|
@ -413,8 +426,9 @@ bool Job::abortable(Node const& snapshot, std::string const& jobId) {
|
|||
}
|
||||
auto const& tmp_type = job.first.hasAsString("type");
|
||||
|
||||
std::string type = tmp_type.first;
|
||||
if (!tmp_type.second || type == "failedServer" || type == "failedLeader") {
|
||||
std::string const& type = tmp_type.first;
|
||||
if (!tmp_type.second || type == "failedServer" || type == "failedLeader" ||
|
||||
type == "activeFailover") {
|
||||
return false;
|
||||
} else if (type == "addFollower" || type == "moveShard" ||
|
||||
type == "cleanOutServer") {
|
||||
|
@ -549,12 +563,9 @@ void Job::addReleaseShard(Builder& trx, std::string const& shard) {
|
|||
std::string Job::checkServerHealth(Node const& snapshot,
|
||||
std::string const& server) {
|
||||
auto status = snapshot.hasAsString(healthPrefix + server + "/Status");
|
||||
|
||||
|
||||
if (!status.second) {
|
||||
return "UNCLEAR";
|
||||
}
|
||||
if (status.first != "GOOD") {
|
||||
return "UNHEALTHY";
|
||||
}
|
||||
return "GOOD";
|
||||
return status.first;
|
||||
}
|
||||
|
|
|
@ -53,10 +53,11 @@ extern std::string const planColPrefix;
|
|||
extern std::string const curColPrefix;
|
||||
extern std::string const blockedServersPrefix;
|
||||
extern std::string const blockedShardsPrefix;
|
||||
extern std::string const serverStatePrefix;
|
||||
extern std::string const planVersion;
|
||||
extern std::string const plannedServers;
|
||||
extern std::string const healthPrefix;
|
||||
extern std::string const asyncReplLeader;
|
||||
extern std::string const asyncReplTransientPrefix;
|
||||
|
||||
struct Job {
|
||||
|
||||
|
@ -128,6 +129,9 @@ struct Job {
|
|||
/// @brief Get servers from plan, which are not failed or cleaned out
|
||||
static std::vector<std::string> availableServers(
|
||||
const arangodb::consensus::Node&);
|
||||
|
||||
/// @brief Get servers from Supervision with health status GOOD
|
||||
static std::vector<std::string> healthyServers(arangodb::consensus::Node const&);
|
||||
|
||||
static std::vector<shard_t> clones(
|
||||
Node const& snap, std::string const& db, std::string const& col,
|
||||
|
@ -169,12 +173,12 @@ struct Job {
|
|||
static void addReleaseServer(Builder& trx, std::string const& server);
|
||||
static void addReleaseShard(Builder& trx, std::string const& shard);
|
||||
static void addPreconditionServerNotBlocked(Builder& pre, std::string const& server);
|
||||
static void addPreconditionServerHealth(Builder& pre, std::string const& server, std::string const& health);
|
||||
static void addPreconditionServerHealth(Builder& pre, std::string const& server,
|
||||
std::string const& health);
|
||||
static void addPreconditionShardNotBlocked(Builder& pre, std::string const& shard);
|
||||
static void addPreconditionUnchanged(Builder& pre,
|
||||
std::string const& key, Slice value);
|
||||
static std::string checkServerHealth(Node const& snapshot, std::string const& server);
|
||||
|
||||
};
|
||||
|
||||
inline arangodb::consensus::write_ret_t singleWriteTransaction(
|
||||
|
@ -263,8 +267,7 @@ inline arangodb::consensus::trans_ret_t generalTransaction(
|
|||
}
|
||||
|
||||
inline arangodb::consensus::trans_ret_t transient(AgentInterface* _agent,
|
||||
Builder const& transaction,
|
||||
bool waitForCommit = true) {
|
||||
Builder const& transaction) {
|
||||
query_t envelope = std::make_shared<Builder>();
|
||||
|
||||
Slice trx = transaction.slice();
|
||||
|
@ -288,7 +291,6 @@ inline arangodb::consensus::trans_ret_t transient(AgentInterface* _agent,
|
|||
<< "Supervision failed to build transaction for transient: " << e.what();
|
||||
}
|
||||
|
||||
|
||||
return _agent->transient(envelope);
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
#include "JobContext.h"
|
||||
|
||||
#include "Agency/ActiveFailoverJob.h"
|
||||
#include "Agency/AddFollower.h"
|
||||
#include "Agency/CleanOutServer.h"
|
||||
#include "Agency/FailedFollower.h"
|
||||
|
@ -59,6 +60,8 @@ JobContext::JobContext (JOB_STATUS status, std::string id, Node const& snapshot,
|
|||
_job = std::make_unique<AddFollower>(snapshot, agent, status, id);
|
||||
} else if (type == "removeFollower") {
|
||||
_job = std::make_unique<RemoveFollower>(snapshot, agent, status, id);
|
||||
} else if (type == "activeFailover") {
|
||||
_job = std::make_unique<ActiveFailoverJob>(snapshot, agent, status, id);
|
||||
} else {
|
||||
LOG_TOPIC(ERR, Logger::AGENCY) <<
|
||||
"Failed to run supervision job " << type << " with id " << id;
|
||||
|
|
|
@ -98,6 +98,7 @@ bool MoveShard::create(std::shared_ptr<VPackBuilder> envelope) {
|
|||
|
||||
std::string now(timepointToString(std::chrono::system_clock::now()));
|
||||
|
||||
#ifdef ARANGODB_ENABLE_MAINTAINER_MODE
|
||||
// DBservers
|
||||
std::string planPath =
|
||||
planColPrefix + _database + "/" + _collection + "/shards/" + _shard;
|
||||
|
@ -105,6 +106,7 @@ bool MoveShard::create(std::shared_ptr<VPackBuilder> envelope) {
|
|||
Slice plan = _snapshot.hasAsSlice(planPath).first;
|
||||
TRI_ASSERT(plan.isArray());
|
||||
TRI_ASSERT(plan[0].isString());
|
||||
#endif
|
||||
|
||||
if (selfCreate) {
|
||||
_jb->openArray();
|
||||
|
|
|
@ -1164,7 +1164,6 @@ Slice Node::getArray() const {
|
|||
return Slice(_vecBuf.data());
|
||||
}
|
||||
|
||||
|
||||
void Node::clear() {
|
||||
_children.clear();
|
||||
_ttl = std::chrono::system_clock::time_point();
|
||||
|
|
|
@ -272,7 +272,6 @@ class Node {
|
|||
/// @return second is true if url exists, first populated if second true
|
||||
std::pair<Slice, bool> hasAsArray(std::string const &) const;
|
||||
|
||||
|
||||
//
|
||||
// These two operator() functions could be "protected" once
|
||||
// unit tests updated.
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
#include <thread>
|
||||
|
||||
#include "Agency/ActiveFailoverJob.h"
|
||||
#include "Agency/AddFollower.h"
|
||||
#include "Agency/Agent.h"
|
||||
#include "Agency/CleanOutServer.h"
|
||||
|
@ -173,7 +174,6 @@ static std::string const targetShortID = "/Target/MapUniqueToShortID/";
|
|||
static std::string const currentServersRegisteredPrefix =
|
||||
"/Current/ServersRegistered";
|
||||
static std::string const foxxmaster = "/Current/Foxxmaster";
|
||||
static std::string const asyncReplLeader = "/Plan/AsyncReplication/Leader";
|
||||
|
||||
void Supervision::upgradeOne(Builder& builder) {
|
||||
_lock.assertLockedByCurrentThread();
|
||||
|
@ -282,7 +282,6 @@ void handleOnStatusDBServer(
|
|||
uint64_t const& jobId, std::shared_ptr<VPackBuilder>& envelope) {
|
||||
|
||||
std::string failedServerPath = failedServersPrefix + "/" + serverID;
|
||||
|
||||
// New condition GOOD:
|
||||
if (transisted.status == Supervision::HEALTH_STATUS_GOOD) {
|
||||
if (snapshot.has(failedServerPath)) {
|
||||
|
@ -306,7 +305,6 @@ void handleOnStatusDBServer(
|
|||
"supervision", serverID).create(envelope);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -329,18 +327,32 @@ void handleOnStatusCoordinator(
|
|||
|
||||
void handleOnStatusSingle(
|
||||
Agent* agent, Node const& snapshot, HealthRecord& persisted,
|
||||
HealthRecord& transisted, std::string const& serverID) {
|
||||
// if the current leader server failed => reset the value to ""
|
||||
if (transisted.status == Supervision::HEALTH_STATUS_FAILED) {
|
||||
|
||||
if (snapshot.hasAsString(asyncReplLeader).first == serverID) {
|
||||
VPackBuilder create;
|
||||
{ VPackArrayBuilder tx(&create);
|
||||
{ VPackObjectBuilder d(&create);
|
||||
create.add(asyncReplLeader, VPackValue("")); }}
|
||||
singleWriteTransaction(agent, create);
|
||||
HealthRecord& transisted, std::string const& serverID,
|
||||
uint64_t const& jobId, std::shared_ptr<VPackBuilder>& envelope) {
|
||||
|
||||
std::string failedServerPath = failedServersPrefix + "/" + serverID;
|
||||
// New condition GOOD:
|
||||
if (transisted.status == Supervision::HEALTH_STATUS_GOOD) {
|
||||
if (snapshot.has(failedServerPath)) {
|
||||
envelope = std::make_shared<VPackBuilder>();
|
||||
{ VPackArrayBuilder a(envelope.get());
|
||||
{ VPackObjectBuilder operations (envelope.get());
|
||||
envelope->add(VPackValue(failedServerPath));
|
||||
{ VPackObjectBuilder ccc(envelope.get());
|
||||
envelope->add("op", VPackValue("delete")); }}}
|
||||
}
|
||||
} else if ( // New state: FAILED persisted: GOOD (-> BAD)
|
||||
persisted.status == Supervision::HEALTH_STATUS_GOOD &&
|
||||
transisted.status != Supervision::HEALTH_STATUS_GOOD) {
|
||||
transisted.status = Supervision::HEALTH_STATUS_BAD;
|
||||
} else if ( // New state: FAILED persisted: BAD (-> Job)
|
||||
persisted.status == Supervision::HEALTH_STATUS_BAD &&
|
||||
transisted.status == Supervision::HEALTH_STATUS_FAILED ) {
|
||||
if (!snapshot.has(failedServerPath)) {
|
||||
envelope = std::make_shared<VPackBuilder>();
|
||||
ActiveFailoverJob(snapshot, agent, std::to_string(jobId),
|
||||
"supervision", serverID).create(envelope);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -356,7 +368,8 @@ void handleOnStatus(
|
|||
handleOnStatusCoordinator(
|
||||
agent, snapshot, persisted, transisted, serverID);
|
||||
} else if (serverID.compare(0,4,"SNGL") == 0) {
|
||||
handleOnStatusSingle(agent, snapshot, persisted, transisted, serverID);
|
||||
handleOnStatusSingle(agent, snapshot, persisted, transisted,
|
||||
serverID, jobId, envelope);
|
||||
} else {
|
||||
LOG_TOPIC(ERR, Logger::SUPERVISION)
|
||||
<< "Unknown server type. No supervision action taken. " << serverID;
|
||||
|
|
|
@ -104,7 +104,7 @@ void auth::TokenCache::invalidateBasicCache() {
|
|||
auth::TokenCache::Entry auth::TokenCache::checkAuthenticationBasic(
|
||||
std::string const& secret) {
|
||||
if (_userManager == nullptr) { // server does not support users
|
||||
LOG_TOPIC(TRACE, Logger::AUTHENTICATION) << "Basic auth not supported";
|
||||
LOG_TOPIC(WARN, Logger::AUTHENTICATION) << "Basic auth not supported";
|
||||
return auth::TokenCache::Entry();
|
||||
}
|
||||
|
||||
|
@ -185,7 +185,7 @@ auth::TokenCache::Entry auth::TokenCache::checkAuthenticationJWT(
|
|||
|
||||
std::vector<std::string> const parts = StringUtils::split(jwt, '.');
|
||||
if (parts.size() != 3) {
|
||||
LOG_TOPIC(TRACE, arangodb::Logger::FIXME)
|
||||
LOG_TOPIC(TRACE, arangodb::Logger::AUTHENTICATION)
|
||||
<< "Secret contains " << parts.size() << " parts";
|
||||
return auth::TokenCache::Entry();
|
||||
}
|
||||
|
|
|
@ -52,10 +52,10 @@ class TokenCache {
|
|||
friend class auth::TokenCache;
|
||||
|
||||
public:
|
||||
Entry() : _authenticated(false), _expiry(0) {}
|
||||
explicit Entry() : _authenticated(false), _expiry(0) {}
|
||||
|
||||
explicit Entry(std::string const& username, bool a, double t)
|
||||
: _username(username), _authenticated(a), _expiry(t) {}
|
||||
: _username(username), _authenticated(a), _expiry(t) {}
|
||||
|
||||
std::string const& username() const { return _username; }
|
||||
bool authenticated() const { return _authenticated; }
|
||||
|
@ -81,7 +81,7 @@ class TokenCache {
|
|||
/// set new jwt secret, regenerate _jetToken
|
||||
void setJwtSecret(std::string const&);
|
||||
std::string jwtSecret() const;
|
||||
/// Get the jwt token, which should be used for communicatin
|
||||
/// Get the jwt token, which should be used for communication
|
||||
std::string const& jwtToken() const noexcept {
|
||||
TRI_ASSERT(!_jwtToken.empty());
|
||||
return _jwtToken;
|
||||
|
|
|
@ -140,6 +140,7 @@ SET(ARANGOD_SOURCES
|
|||
Actions/ActionFeature.cpp
|
||||
Actions/RestActionHandler.cpp
|
||||
Actions/actions.cpp
|
||||
Agency/ActiveFailoverJob.cpp
|
||||
Agency/AddFollower.cpp
|
||||
Agency/AgencyComm.cpp
|
||||
Agency/AgencyFeature.cpp
|
||||
|
|
|
@ -554,6 +554,8 @@ void HeartbeatThread::runSingleServer() {
|
|||
LOG_TOPIC(TRACE, Logger::HEARTBEAT) << "Current leader: " << _myId;
|
||||
if (applier->isActive()) {
|
||||
applier->stopAndJoin();
|
||||
// preemtily remove the transient entry from the agency
|
||||
_agency.setTransient(transientPath, VPackSlice::emptyObjectSlice(), 0);
|
||||
}
|
||||
|
||||
// ensure everyone has server access
|
||||
|
@ -590,8 +592,22 @@ void HeartbeatThread::runSingleServer() {
|
|||
// wait for everything to calm down for good measure
|
||||
std::this_thread::sleep_for(std::chrono::seconds(10));
|
||||
}
|
||||
|
||||
TRI_voc_tick_t lastTick = 0; // we always want to set lastTick
|
||||
auto sendTransient = [&]() {
|
||||
VPackBuilder builder;
|
||||
builder.openObject();
|
||||
builder.add("leader", leader);
|
||||
builder.add("lastTick", VPackValue(lastTick));
|
||||
builder.close();
|
||||
double ttl = std::chrono::duration_cast<std::chrono::seconds>(_interval).count() * 5.0;
|
||||
_agency.setTransient(transientPath, builder.slice(), ttl);
|
||||
};
|
||||
TRI_DEFER(sendTransient());
|
||||
|
||||
if (applier->endpoint() != endpoint) { // configure applier for new endpoint
|
||||
if (applier->isActive() && applier->endpoint() == endpoint) {
|
||||
lastTick = applier->lastTick();
|
||||
} else if (applier->endpoint() != endpoint) { // configure applier for new endpoint
|
||||
if (applier->isActive()) {
|
||||
applier->stopAndJoin();
|
||||
}
|
||||
|
|
|
@ -358,7 +358,7 @@ rest::ResponseCode GeneralCommTask::canAccessPath(
|
|||
!StringUtils::isPrefix(path, ApiUser)) {
|
||||
events::NotAuthorized(request);
|
||||
result = rest::ResponseCode::UNAUTHORIZED;
|
||||
LOG_TOPIC(TRACE, Logger::AUTHENTICATION) << "Access forbidden to " << path;
|
||||
LOG_TOPIC(TRACE, Logger::AUTHORIZATION) << "Access forbidden to " << path;
|
||||
}
|
||||
|
||||
// mop: inside the authenticateRequest() request->user will be populated
|
||||
|
@ -390,7 +390,7 @@ rest::ResponseCode GeneralCommTask::canAccessPath(
|
|||
// simon: upgrade rights for Foxx apps. FIXME
|
||||
result = rest::ResponseCode::OK;
|
||||
vc->forceSuperuser();
|
||||
LOG_TOPIC(TRACE, Logger::AUTHENTICATION) << "Upgrading rights for " << path;
|
||||
LOG_TOPIC(TRACE, Logger::AUTHORIZATION) << "Upgrading rights for " << path;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,9 +89,12 @@ RestStatus RestAuthHandler::execute() {
|
|||
_username = usernameSlice.copyString();
|
||||
std::string const password = passwordSlice.copyString();
|
||||
|
||||
AuthenticationFeature* af = AuthenticationFeature::instance();
|
||||
TRI_ASSERT(af != nullptr);
|
||||
if (af->userManager()->checkPassword(_username, password)) {
|
||||
auth::UserManager* um = AuthenticationFeature::instance()->userManager();
|
||||
if (um == nullptr) {
|
||||
std::string msg = "This server does not support users";
|
||||
LOG_TOPIC(ERR, Logger::AUTHENTICATION) << msg;
|
||||
generateError(rest::ResponseCode::UNAUTHORIZED, TRI_ERROR_HTTP_UNAUTHORIZED, msg);
|
||||
} else if (um->checkPassword(_username, password)) {
|
||||
VPackBuilder resultBuilder;
|
||||
{
|
||||
VPackObjectBuilder b(&resultBuilder);
|
||||
|
@ -101,13 +104,12 @@ RestStatus RestAuthHandler::execute() {
|
|||
|
||||
_isValid = true;
|
||||
generateDocument(resultBuilder.slice(), true, &VPackOptions::Defaults);
|
||||
return RestStatus::DONE;
|
||||
} else {
|
||||
// mop: rfc 2616 10.4.2 (if credentials wrong 401)
|
||||
generateError(rest::ResponseCode::UNAUTHORIZED,
|
||||
TRI_ERROR_HTTP_UNAUTHORIZED, "Wrong credentials");
|
||||
return RestStatus::DONE;
|
||||
}
|
||||
return RestStatus::DONE;
|
||||
}
|
||||
|
||||
RestStatus RestAuthHandler::badRequest() {
|
||||
|
|
|
@ -953,13 +953,14 @@ Result RestReplicationHandler::processRestoreCollection(
|
|||
ExecContext const* exe = ExecContext::CURRENT;
|
||||
if (name[0] != '_' && exe != nullptr && !exe->isSuperuser() &&
|
||||
ServerState::instance()->isSingleServer()) {
|
||||
AuthenticationFeature* af = AuthenticationFeature::instance();
|
||||
|
||||
af->userManager()->updateUser(exe->user(), [&](auth::User& entry) {
|
||||
entry.grantCollection(_vocbase.name(), col->name(), auth::Level::RW);
|
||||
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
});
|
||||
auth::UserManager* um = AuthenticationFeature::instance()->userManager();
|
||||
TRI_ASSERT(um != nullptr); // should not get here
|
||||
if (um != nullptr) {
|
||||
um->updateUser(exe->user(), [&](auth::User& entry) {
|
||||
entry.grantCollection(_vocbase.name(), col->name(), auth::Level::RW);
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return Result();
|
||||
|
@ -1124,12 +1125,15 @@ Result RestReplicationHandler::processRestoreCollectionCoordinator(
|
|||
|
||||
ExecContext const* exe = ExecContext::CURRENT;
|
||||
if (name[0] != '_' && exe != nullptr && !exe->isSuperuser()) {
|
||||
AuthenticationFeature* af = AuthenticationFeature::instance();
|
||||
af->userManager()->updateUser(ExecContext::CURRENT->user(),
|
||||
[&](auth::User& entry) {
|
||||
entry.grantCollection(dbName, col->name(), auth::Level::RW);
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
});
|
||||
auth::UserManager* um = AuthenticationFeature::instance()->userManager();
|
||||
TRI_ASSERT(um != nullptr); // should not get here
|
||||
if (um != nullptr) {
|
||||
um->updateUser(ExecContext::CURRENT->user(),
|
||||
[&](auth::User& entry) {
|
||||
entry.grantCollection(dbName, col->name(), auth::Level::RW);
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (basics::Exception const& ex) {
|
||||
// Error, report it.
|
||||
|
@ -1324,8 +1328,12 @@ Result RestReplicationHandler::processRestoreUsersBatch(
|
|||
|
||||
auto queryResult = query.execute(queryRegistry);
|
||||
|
||||
// neither agency nor dbserver should get here
|
||||
AuthenticationFeature* af = AuthenticationFeature::instance();
|
||||
af->userManager()->outdate();
|
||||
TRI_ASSERT(af->userManager() != nullptr);
|
||||
if (af->userManager() != nullptr) {
|
||||
af->userManager()->outdate();
|
||||
}
|
||||
af->tokenCache()->invalidateBasicCache();
|
||||
|
||||
return Result{queryResult.code};
|
||||
|
|
|
@ -46,8 +46,10 @@ RestUsersHandler::RestUsersHandler(GeneralRequest* request,
|
|||
RestStatus RestUsersHandler::execute() {
|
||||
RequestType const type = _request->requestType();
|
||||
AuthenticationFeature* af = AuthenticationFeature::instance();
|
||||
if (af == nullptr) { // nullptr happens only during shutdown
|
||||
return RestStatus::FAIL;
|
||||
if (af == nullptr || af->userManager() == nullptr) {
|
||||
// nullptr may happens during shutdown, or on Agency
|
||||
generateError(ResponseCode::BAD, TRI_ERROR_NOT_IMPLEMENTED);
|
||||
return RestStatus::DONE;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
|
|
|
@ -50,58 +50,54 @@ VocbaseContext::~VocbaseContext() {
|
|||
_vocbase.release();
|
||||
}
|
||||
|
||||
/*static*/ VocbaseContext* VocbaseContext::create(
|
||||
GeneralRequest* req, TRI_vocbase_t& vocbase
|
||||
) {
|
||||
VocbaseContext* VocbaseContext::create(GeneralRequest* req, TRI_vocbase_t& vocbase) {
|
||||
// _vocbase has already been refcounted for us
|
||||
TRI_ASSERT(!vocbase.isDangling());
|
||||
|
||||
AuthenticationFeature* auth = AuthenticationFeature::instance();
|
||||
TRI_ASSERT(auth != nullptr);
|
||||
if (auth == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!auth->isActive()) {
|
||||
} else if (!auth->isActive()) {
|
||||
return new VocbaseContext(req, vocbase, /*isInternal*/ false,
|
||||
/*sysLevel*/ auth::Level::RW,
|
||||
/*dbLevel*/ auth::Level::RW);
|
||||
}
|
||||
|
||||
if (req->authenticated()) {
|
||||
// superusers will have an empty username. This MUST be invalid
|
||||
// for users authenticating with name / password
|
||||
if (req->user().empty()) {
|
||||
if (req->authenticationMethod() != AuthenticationMethod::JWT) {
|
||||
std::string msg = "only jwt can be used to authenticate as superuser";
|
||||
LOG_TOPIC(WARN, Logger::AUTHENTICATION) << msg;
|
||||
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, msg);
|
||||
}
|
||||
|
||||
return new VocbaseContext(req, vocbase, /*isInternal*/ true,
|
||||
/*sysLevel*/ auth::Level::RW,
|
||||
/*dbLevel*/ auth::Level::RW);
|
||||
}
|
||||
|
||||
auth::UserManager* um = auth->userManager();
|
||||
if (um == nullptr) {
|
||||
LOG_TOPIC(ERR, Logger::AUTHENTICATION) << "Server does not support users";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auth::Level dbLvl = um->databaseAuthLevel(req->user(), req->databaseName());
|
||||
auth::Level sysLvl = dbLvl;
|
||||
if (req->databaseName() != TRI_VOC_SYSTEM_DATABASE) {
|
||||
sysLvl = um->databaseAuthLevel(req->user(), TRI_VOC_SYSTEM_DATABASE);
|
||||
}
|
||||
|
||||
if (!req->authenticated()) {
|
||||
return new VocbaseContext(req, vocbase, /*isInternal*/ false,
|
||||
/*sysLevel*/ sysLvl,
|
||||
/*dbLevel*/ dbLvl);
|
||||
/*sysLevel*/ auth::Level::NONE,
|
||||
/*dbLevel*/ auth::Level::NONE);
|
||||
}
|
||||
|
||||
// superusers will have an empty username. This MUST be invalid
|
||||
// for users authenticating with name / password
|
||||
if (req->user().empty()) {
|
||||
if (req->authenticationMethod() != AuthenticationMethod::JWT) {
|
||||
std::string msg = "only jwt can be used to authenticate as superuser";
|
||||
LOG_TOPIC(WARN, Logger::AUTHENTICATION) << msg;
|
||||
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, msg);
|
||||
}
|
||||
return new VocbaseContext(req, vocbase, /*isInternal*/ true,
|
||||
/*sysLevel*/ auth::Level::RW,
|
||||
/*dbLevel*/ auth::Level::RW);
|
||||
}
|
||||
|
||||
auth::UserManager* um = auth->userManager();
|
||||
if (um == nullptr) {
|
||||
LOG_TOPIC(WARN, Logger::AUTHENTICATION) << "Server does not support users";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auth::Level dbLvl = um->databaseAuthLevel(req->user(), req->databaseName());
|
||||
auth::Level sysLvl = dbLvl;
|
||||
if (req->databaseName() != TRI_VOC_SYSTEM_DATABASE) {
|
||||
sysLvl = um->databaseAuthLevel(req->user(), TRI_VOC_SYSTEM_DATABASE);
|
||||
}
|
||||
|
||||
return new VocbaseContext(req, vocbase, /*isInternal*/ false,
|
||||
/*sysLevel*/ auth::Level::NONE,
|
||||
/*dbLevel*/ auth::Level::NONE);
|
||||
/*sysLevel*/ sysLvl,
|
||||
/*dbLevel*/ dbLvl);
|
||||
}
|
||||
|
||||
void VocbaseContext::forceSuperuser() {
|
||||
|
|
|
@ -198,13 +198,14 @@ Result Collections::create(TRI_vocbase_t* vocbase, std::string const& name,
|
|||
|
||||
// do not grant rights on system collections
|
||||
// in case of success we grant the creating user RW access
|
||||
if (name[0] != '_' && exe != nullptr && !exe->isSuperuser()) {
|
||||
auth::UserManager* um = AuthenticationFeature::instance()->userManager();
|
||||
if (name[0] != '_' && um != nullptr && exe != nullptr && !exe->isSuperuser()) {
|
||||
// this should not fail, we can not get here without database RW access
|
||||
af->userManager()->updateUser(
|
||||
ExecContext::CURRENT->user(), [&](auth::User& entry) {
|
||||
entry.grantCollection(vocbase->name(), name, auth::Level::RW);
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
});
|
||||
um->updateUser(
|
||||
ExecContext::CURRENT->user(), [&](auth::User& entry) {
|
||||
entry.grantCollection(vocbase->name(), name, auth::Level::RW);
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
});
|
||||
}
|
||||
|
||||
// reload otherwise collection might not be in yet
|
||||
|
@ -215,10 +216,10 @@ Result Collections::create(TRI_vocbase_t* vocbase, std::string const& name,
|
|||
|
||||
// do not grant rights on system collections
|
||||
// in case of success we grant the creating user RW access
|
||||
if (name[0] != '_' && exe != nullptr && !exe->isSuperuser() &&
|
||||
ServerState::instance()->isSingleServerOrCoordinator()) {
|
||||
auth::UserManager* um = AuthenticationFeature::instance()->userManager();
|
||||
if (name[0] != '_' && um != nullptr && exe != nullptr && !exe->isSuperuser()) {
|
||||
// this should not fail, we can not get here without database RW access
|
||||
af->userManager()->updateUser(
|
||||
um->updateUser(
|
||||
ExecContext::CURRENT->user(), [&](auth::User& u) {
|
||||
u.grantCollection(vocbase->name(), name, auth::Level::RW);
|
||||
return TRI_ERROR_NO_ERROR;
|
||||
|
@ -480,8 +481,7 @@ Result Collections::drop(TRI_vocbase_t* vocbase, LogicalCollection* coll,
|
|||
if (!exec->canUseDatabase(vocbase->name(), auth::Level::RW) ||
|
||||
!exec->canUseCollection(coll->name(), auth::Level::RW)) {
|
||||
return Result(TRI_ERROR_FORBIDDEN,
|
||||
"Insufficient rights to drop "
|
||||
"collection " +
|
||||
"Insufficient rights to drop collection " +
|
||||
coll->name());
|
||||
} else if (!exec->isSuperuser() && !ServerState::writeOpsEnabled()) {
|
||||
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_ARANGO_READ_ONLY,
|
||||
|
@ -510,9 +510,9 @@ Result Collections::drop(TRI_vocbase_t* vocbase, LogicalCollection* coll,
|
|||
}
|
||||
}
|
||||
|
||||
if (res.ok() && ServerState::instance()->isSingleServerOrCoordinator()) {
|
||||
AuthenticationFeature* af = AuthenticationFeature::instance();
|
||||
af->userManager()->enumerateUsers([&](auth::User& entry) -> bool {
|
||||
auth::UserManager* um = AuthenticationFeature::instance()->userManager();
|
||||
if (res.ok() && um != nullptr) {
|
||||
um->enumerateUsers([&](auth::User& entry) -> bool {
|
||||
return entry.removeCollection(dbname, collName);
|
||||
});
|
||||
}
|
||||
|
|
|
@ -80,12 +80,13 @@ std::vector<std::string> Databases::list(std::string const& user) {
|
|||
if (ServerState::instance()->isCoordinator()) {
|
||||
|
||||
AuthenticationFeature* af = AuthenticationFeature::instance();
|
||||
auth::UserManager* um = af->userManager();
|
||||
std::vector<std::string> names;
|
||||
std::vector<std::string> dbs =
|
||||
databaseFeature->getDatabaseNamesCoordinator();
|
||||
for (std::string const& db : dbs) {
|
||||
if (!af->isActive() ||
|
||||
af->userManager()->databaseAuthLevel(user, db) > auth::Level::NONE) {
|
||||
if (!af->isActive() || (um != nullptr &&
|
||||
um->databaseAuthLevel(user, db) > auth::Level::NONE)) {
|
||||
names.push_back(db);
|
||||
}
|
||||
}
|
||||
|
@ -141,7 +142,7 @@ arangodb::Result Databases::info(TRI_vocbase_t* vocbase, VPackBuilder& result) {
|
|||
arangodb::Result Databases::create(std::string const& dbName,
|
||||
VPackSlice const& inUsers,
|
||||
VPackSlice const& inOptions) {
|
||||
AuthenticationFeature* af = AuthenticationFeature::instance();
|
||||
auth::UserManager* um = AuthenticationFeature::instance()->userManager();
|
||||
ExecContext const* exec = ExecContext::CURRENT;
|
||||
if (exec != nullptr) {
|
||||
if (!exec->isAdminUser()) {
|
||||
|
@ -262,9 +263,9 @@ arangodb::Result Databases::create(std::string const& dbName,
|
|||
TRI_ASSERT(vocbase->name() == dbName);
|
||||
|
||||
// we need to add the permissions before running the upgrade script
|
||||
if (ExecContext::CURRENT != nullptr) {
|
||||
if (ExecContext::CURRENT != nullptr && um != nullptr) {
|
||||
// ignore errors here Result r =
|
||||
af->userManager()->updateUser(
|
||||
um->updateUser(
|
||||
ExecContext::CURRENT->user(), [&](auth::User& entry) {
|
||||
entry.grantDatabase(dbName, auth::Level::RW);
|
||||
entry.grantCollection(dbName, "*", auth::Level::RW);
|
||||
|
@ -295,10 +296,9 @@ arangodb::Result Databases::create(std::string const& dbName,
|
|||
TRI_DEFER(vocbase->release());
|
||||
|
||||
// we need to add the permissions before running the upgrade script
|
||||
if (ServerState::instance()->isSingleServer() &&
|
||||
ExecContext::CURRENT != nullptr) {
|
||||
if (ExecContext::CURRENT != nullptr && um != nullptr) {
|
||||
// ignore errors here Result r =
|
||||
af->userManager()->updateUser(
|
||||
um->updateUser(
|
||||
ExecContext::CURRENT->user(), [&](auth::User& entry) {
|
||||
entry.grantDatabase(dbName, auth::Level::RW);
|
||||
entry.grantCollection(dbName, "*", auth::Level::RW);
|
||||
|
@ -410,10 +410,9 @@ arangodb::Result Databases::drop(TRI_vocbase_t* systemVocbase,
|
|||
}
|
||||
|
||||
Result res;
|
||||
AuthenticationFeature* af = AuthenticationFeature::instance();
|
||||
if (ServerState::instance()->isCoordinator() ||
|
||||
!ServerState::instance()->isRunningInCluster()) {
|
||||
res = af->userManager()->enumerateUsers([&](auth::User& entry) -> bool {
|
||||
auth::UserManager* um = AuthenticationFeature::instance()->userManager();
|
||||
if (um != nullptr) {
|
||||
res = um->enumerateUsers([&](auth::User& entry) -> bool {
|
||||
return entry.removeDatabase(dbName);
|
||||
});
|
||||
}
|
||||
|
|
|
@ -122,7 +122,10 @@ bool UpgradeTasks::addDefaultUserOther(TRI_vocbase_t* vocbase,
|
|||
return false;
|
||||
}
|
||||
auth::UserManager* um = AuthenticationFeature::instance()->userManager();
|
||||
TRI_ASSERT(um != nullptr);
|
||||
if (um == nullptr) {
|
||||
return true; // server does not support users
|
||||
}
|
||||
|
||||
for (VPackSlice slice : VPackArrayIterator(users)) {
|
||||
std::string user = VelocyPackHelper::getStringValue(slice, "username",
|
||||
StaticStrings::Empty);
|
||||
|
|
|
@ -1053,11 +1053,12 @@ function startInstanceCluster (instanceInfo, protocol, options,
|
|||
// we need to find the leading server
|
||||
if (options.activefailover) {
|
||||
internal.wait(5.0);
|
||||
let opts = makeAuthorizationHeaders(authOpts);
|
||||
opts['headers']['content-type'] = 'application/json';
|
||||
opts['method'] = 'POST';
|
||||
let reply = download(agencyUrl + '/_api/agency/read',
|
||||
'[["/arango/Plan/AsyncReplication/Leader"]]', opts);
|
||||
let opts = {
|
||||
method: 'POST',
|
||||
jwt: crypto.jwtEncode(authOpts['server.jwt-secret'], {'server_id': 'none', 'iss': 'arangodb'}, 'HS256'),
|
||||
headers: {'content-type': 'application/json' }
|
||||
};
|
||||
let reply = download(agencyUrl + '/_api/agency/read', '[["/arango/Plan/AsyncReplication/Leader"]]', opts);
|
||||
|
||||
if (!reply.error && reply.code === 200) {
|
||||
let res = JSON.parse(reply.body);
|
||||
|
|
|
@ -28,7 +28,8 @@
|
|||
const functionsDocumentation = {
|
||||
'resilience': 'resilience tests',
|
||||
'client_resilience': 'client resilience tests',
|
||||
'cluster_sync': 'cluster sync tests'
|
||||
'cluster_sync': 'cluster sync tests',
|
||||
'active_failover': 'active failover tests'
|
||||
};
|
||||
const optionsDocumentation = [
|
||||
];
|
||||
|
@ -85,10 +86,36 @@ function clusterSync (options) {
|
|||
return tu.performTests(options, testCases, 'cluster_sync', tu.runThere);
|
||||
}
|
||||
|
||||
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
// / @brief TEST: active failover
|
||||
// //////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
function activeFailover (options) {
|
||||
if (options.cluster) {
|
||||
return {
|
||||
'active_failover': {
|
||||
'status': true,
|
||||
'message': 'skipped because of cluster',
|
||||
'skipped': true
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
let testCases = tu.scanTestPath('js/client/tests/active-failover');
|
||||
options.activefailover = true;
|
||||
options.singles = 4;
|
||||
return tu.performTests(options, testCases, 'client_resilience', tu.runInArangosh, {
|
||||
'server.authentication': 'true',
|
||||
'server.jwt-secret': 'haxxmann'
|
||||
});
|
||||
}
|
||||
|
||||
function setup (testFns, defaultFns, opts, fnDocs, optionsDoc) {
|
||||
testFns['resilience'] = resilience;
|
||||
testFns['client_resilience'] = clientResilience;
|
||||
testFns['cluster_sync'] = clusterSync;
|
||||
testFns['active_failover'] = activeFailover;
|
||||
for (var attrname in functionsDocumentation) { fnDocs[attrname] = functionsDocumentation[attrname]; }
|
||||
for (var i = 0; i < optionsDocumentation.length; i++) { optionsDoc.push(optionsDocumentation[i]); }
|
||||
}
|
||||
|
|
|
@ -0,0 +1,486 @@
|
|||
/*jshint strict: false, sub: true */
|
||||
/*global print, assertTrue, assertEqual */
|
||||
'use strict';
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2016 ArangoDB GmbH, Cologne, Germany
|
||||
/// Copyright 2014 triagens GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Andreas Streichardt
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const jsunity = require('jsunity');
|
||||
const internal = require('internal');
|
||||
const fs = require('fs');
|
||||
|
||||
const arangosh = require('@arangodb/arangosh');
|
||||
const crypto = require('@arangodb/crypto');
|
||||
const request = require("@arangodb/request");
|
||||
const tasks = require("@arangodb/tasks");
|
||||
|
||||
const arango = internal.arango;
|
||||
const compareTicks = require("@arangodb/replication").compareTicks;
|
||||
const wait = internal.wait;
|
||||
const db = internal.db;
|
||||
|
||||
const suspendExternal = internal.suspendExternal;
|
||||
const continueExternal = internal.continueExternal;
|
||||
|
||||
const jwtSecret = 'haxxmann';
|
||||
const jwtSuperuser = crypto.jwtEncode(jwtSecret, {
|
||||
"server_id": "test",
|
||||
"iss": "arangodb",
|
||||
"exp": Math.floor(Date.now() / 1000) + 3600
|
||||
}, 'HS256');
|
||||
const jwtRoot = crypto.jwtEncode(jwtSecret, {
|
||||
"preferred_username": "root",
|
||||
"iss": "arangodb",
|
||||
"exp": Math.floor(Date.now() / 1000) + 3600
|
||||
}, 'HS256');
|
||||
|
||||
if (!internal.env.hasOwnProperty('INSTANCEINFO')) {
|
||||
throw new Error('env.INSTANCEINFO was not set by caller!');
|
||||
}
|
||||
const instanceinfo = JSON.parse(internal.env.INSTANCEINFO);
|
||||
|
||||
const cname = "UnitTestActiveFailover";
|
||||
|
||||
/*try {
|
||||
let globals = JSON.parse(process.env.ARANGOSH_GLOBALS);
|
||||
Object.keys(globals).forEach(g => {
|
||||
global[g] = globals[g];
|
||||
});
|
||||
} catch (e) {
|
||||
}*/
|
||||
|
||||
function getUrl(endpoint) {
|
||||
return endpoint.replace(/^tcp:/, 'http:').replace(/^ssl:/, 'https:');
|
||||
}
|
||||
|
||||
function baseUrl() {
|
||||
return getUrl(arango.getEndpoint());
|
||||
};
|
||||
|
||||
function connectToServer(leader) {
|
||||
arango.reconnect(leader, "_system", "root", "");
|
||||
db._flushCache();
|
||||
};
|
||||
|
||||
// getEndponts works with any server
|
||||
function getClusterEndpoints() {
|
||||
//let jwt = crypto.jwtEncode(options['server.jwt-secret'], {'server_id': 'none', 'iss': 'arangodb'}, 'HS256');
|
||||
var res = request.get({
|
||||
url: baseUrl() + "/_api/cluster/endpoints",
|
||||
auth: {
|
||||
bearer: jwtRoot,
|
||||
}
|
||||
});
|
||||
assertTrue(res instanceof request.Response);
|
||||
assertTrue(res.hasOwnProperty('statusCode'), JSON.stringify(res));
|
||||
assertTrue(res.statusCode === 200, JSON.stringify(res));
|
||||
assertTrue(res.hasOwnProperty('json'));
|
||||
assertTrue(res.json.hasOwnProperty('endpoints'));
|
||||
assertTrue(res.json.endpoints instanceof Array);
|
||||
assertTrue(res.json.endpoints.length > 0);
|
||||
return res.json.endpoints.map(e => e.endpoint);
|
||||
}
|
||||
|
||||
function getLoggerState(endpoint) {
|
||||
var res = request.get({
|
||||
url: getUrl(endpoint) + "/_db/_system/_api/replication/logger-state",
|
||||
auth: {
|
||||
bearer: jwtRoot,
|
||||
}
|
||||
});
|
||||
assertTrue(res instanceof request.Response);
|
||||
assertTrue(res.hasOwnProperty('statusCode') && res.statusCode === 200);
|
||||
assertTrue(res.hasOwnProperty('json'));
|
||||
return arangosh.checkRequestResult(res.json);
|
||||
}
|
||||
|
||||
function getApplierState(endpoint) {
|
||||
var res = request.get({
|
||||
url: getUrl(endpoint) + "/_db/_system/_api/replication/applier-state?global=true",
|
||||
auth: {
|
||||
bearer: jwtRoot,
|
||||
}
|
||||
});
|
||||
assertTrue(res instanceof request.Response);
|
||||
assertTrue(res.hasOwnProperty('statusCode') && res.statusCode === 200);
|
||||
assertTrue(res.hasOwnProperty('json'));
|
||||
return arangosh.checkRequestResult(res.json);
|
||||
}
|
||||
|
||||
// check the servers are in sync with the leader
|
||||
function checkInSync(leader, servers, ignore) {
|
||||
print("Checking in-sync state with lead: ", leader);
|
||||
let check = (endpoint) => {
|
||||
if (endpoint === leader || endpoint === ignore) {
|
||||
return true;
|
||||
}
|
||||
|
||||
let applier = getApplierState(endpoint);
|
||||
return applier.state.running && applier.endpoint === leader &&
|
||||
(compareTicks(applier.state.lastAppliedContinuousTick, leaderTick) >= 0 ||
|
||||
compareTicks(applier.state.lastProcessedContinuousTick, leaderTick) >= 0);
|
||||
};
|
||||
|
||||
const leaderTick = getLoggerState(leader).state.lastLogTick;
|
||||
|
||||
let loop = 100;
|
||||
while (loop-- > 0) {
|
||||
if (servers.every(check)) {
|
||||
print("All followers are in sync");
|
||||
return true;
|
||||
}
|
||||
wait(1.0);
|
||||
}
|
||||
print("Timeout waiting for followers");
|
||||
return false;
|
||||
}
|
||||
|
||||
function checkData(server) {
|
||||
print("Checking data of ", server);
|
||||
let res = request.get({
|
||||
url: getUrl(server) + "/_api/collection/" + cname + "/count",
|
||||
auth: {
|
||||
bearer: jwtRoot,
|
||||
}
|
||||
});
|
||||
|
||||
assertTrue(res instanceof request.Response);
|
||||
//assertTrue(res.hasOwnProperty('statusCode'));
|
||||
assertTrue(res.statusCode === 200);
|
||||
return res.json.count;
|
||||
}
|
||||
|
||||
function readAgencyValue(path) {
|
||||
let agents = instanceinfo.arangods.filter(arangod => arangod.role === "agent");
|
||||
assertTrue(agents.length > 0, "No agents present");
|
||||
print("Querying agency... (", path, ")");
|
||||
var res = request.post({
|
||||
url: agents[0].url + "/_api/agency/read",
|
||||
auth: {
|
||||
bearer: jwtSuperuser,
|
||||
},
|
||||
body: JSON.stringify([[path]])
|
||||
});
|
||||
assertTrue(res instanceof request.Response);
|
||||
assertTrue(res.hasOwnProperty('statusCode'), JSON.stringify(res));
|
||||
assertEqual(res.statusCode, 200, JSON.stringify(res));
|
||||
assertTrue(res.hasOwnProperty('json'));
|
||||
//print("Agency response ", res.json);
|
||||
return arangosh.checkRequestResult(res.json);
|
||||
}
|
||||
|
||||
// resolve leader from agency
|
||||
function leaderInAgency() {
|
||||
let i = 10;
|
||||
do {
|
||||
let res = readAgencyValue("/arango/Plan/AsyncReplication/Leader");
|
||||
let uuid = res[0].arango.Plan.AsyncReplication.Leader;
|
||||
if (uuid && uuid.length > 0) {
|
||||
res = readAgencyValue("/arango/Supervision/Health");
|
||||
return res[0].arango.Supervision.Health[uuid].Endpoint;
|
||||
}
|
||||
internal.wait(1.0);
|
||||
} while (i-- > 0);
|
||||
throw "Unable to resole leader from agency";
|
||||
}
|
||||
|
||||
function checkForFailover(leader) {
|
||||
print("Waiting for failover of ", leader);
|
||||
|
||||
let oldLeaderUUID = "";
|
||||
let i = 5; // 5 * 5s == 25s
|
||||
do {
|
||||
let res = readAgencyValue("/arango/Supervision/Health");
|
||||
let srvHealth = res[0].arango.Supervision.Health;
|
||||
Object.keys(srvHealth).forEach(key => {
|
||||
let srv = srvHealth[key];
|
||||
if (srv['Endpoint'] === leader && srv.Status === 'FAILED') {
|
||||
print("Server ", key, "( ", leader, " ) is marked FAILED");
|
||||
oldLeaderUUID = key;
|
||||
}
|
||||
});
|
||||
if (oldLeaderUUID !== "") {
|
||||
break;
|
||||
}
|
||||
internal.wait(5.0);
|
||||
} while (i-- > 0);
|
||||
|
||||
// now wait for new leader to appear
|
||||
let nextLeaderUUID = "";
|
||||
do {
|
||||
let res = readAgencyValue("/arango/Plan/AsyncReplication/Leader");
|
||||
nextLeaderUUID = res[0].arango.Plan.AsyncReplication.Leader;
|
||||
if (nextLeaderUUID !== oldLeaderUUID) {
|
||||
res = readAgencyValue("/arango/Supervision/Health");
|
||||
return res[0].arango.Supervision.Health[nextLeaderUUID].Endpoint;
|
||||
}
|
||||
internal.wait(5.0);
|
||||
} while (i-- > 0);
|
||||
print("Timing out, current leader value: ", nextLeaderUUID);
|
||||
throw "No failover occured";
|
||||
}
|
||||
|
||||
// Testsuite that quickly checks some of the basic premises of
|
||||
// the active failover functionality. It is designed as a quicker
|
||||
// variant of the node resilience tests (for active failover).
|
||||
// Things like Foxx resilience are not tested
|
||||
function ActiveFailoverSuite() {
|
||||
let servers = getClusterEndpoints();
|
||||
assertTrue(servers.length >= 4, "This test expects four single instances");
|
||||
let firstLeader = servers[0];
|
||||
let suspended = [];
|
||||
let currentLead = leaderInAgency();
|
||||
|
||||
return {
|
||||
setUp: function () {
|
||||
let col = db._create(cname);
|
||||
assertTrue(checkInSync(currentLead, servers));
|
||||
|
||||
for (let i = 0; i < 10000; i++) {
|
||||
col.save({ attr: i});
|
||||
}
|
||||
},
|
||||
|
||||
tearDown: function () {
|
||||
//db._collection(cname).drop();
|
||||
//serverTeardown();
|
||||
|
||||
suspended.forEach(arangod => {
|
||||
print("Resuming: ", arangod.endpoint);
|
||||
assertTrue(continueExternal(arangod.pid));
|
||||
});
|
||||
|
||||
currentLead = leaderInAgency();
|
||||
print("connecting shell to leader ", currentLead)
|
||||
connectToServer(currentLead);
|
||||
if (db._collection(cname)) {
|
||||
db._drop(cname);
|
||||
}
|
||||
|
||||
assertTrue(checkInSync(currentLead, servers));
|
||||
|
||||
let endpoints = getClusterEndpoints();
|
||||
assertTrue(endpoints.length === servers.length);
|
||||
assertTrue(endpoints[0] === currentLead);
|
||||
},
|
||||
|
||||
// Basic test if followers get in sync
|
||||
testFollowerInSync: function () {
|
||||
assertEqual(servers[0], currentLead);
|
||||
|
||||
let col = db._collection(cname);
|
||||
assertEqual(col.count(), 10000);
|
||||
assertTrue(checkInSync(currentLead, servers));
|
||||
assertEqual(checkData(currentLead), 10000);
|
||||
},
|
||||
|
||||
// Simple failover case: Leader is suspended, slave needs to
|
||||
// take over within a reasonable amount of time
|
||||
testFailover: function () {
|
||||
|
||||
assertTrue(checkInSync(currentLead, servers));
|
||||
assertEqual(checkData(currentLead), 10000);
|
||||
|
||||
suspended = instanceinfo.arangods.filter(arangod => arangod.endpoint === currentLead);
|
||||
suspended.forEach(arangod => {
|
||||
print("Suspending Leader: ", arangod.endpoint);
|
||||
assertTrue(suspendExternal(arangod.pid));
|
||||
});
|
||||
|
||||
let oldLead = currentLead;
|
||||
// await failover and check that follower get in sync
|
||||
currentLead = checkForFailover(currentLead);
|
||||
assertTrue(currentLead !== oldLead);
|
||||
print("Failover to new leader : ", currentLead);
|
||||
|
||||
internal.wait(2.5); // settle down, heartbeat interval is 1s
|
||||
assertEqual(checkData(currentLead), 10000);
|
||||
print("New leader has correct data");
|
||||
|
||||
// check the remaining followers get in sync
|
||||
assertTrue(checkInSync(currentLead, servers, oldLead));
|
||||
|
||||
// restart the old leader
|
||||
suspended.forEach(arangod => {
|
||||
print("Resuming: ", arangod.endpoint);
|
||||
assertTrue(continueExternal(arangod.pid));
|
||||
});
|
||||
suspended = [];
|
||||
|
||||
assertTrue(checkInSync(currentLead, servers));
|
||||
},
|
||||
|
||||
// More complex case: We want to get the most up to date follower
|
||||
// Insert a number of documents, suspend n-1 followers for a few seconds.
|
||||
// We then suspend the leader and expect a specific follower to take over
|
||||
testFollowerSelection: function () {
|
||||
|
||||
assertTrue(checkInSync(currentLead, servers));
|
||||
assertEqual(checkData(currentLead), 10000);
|
||||
|
||||
// we assume the second leader is still the leader
|
||||
let endpoints = getClusterEndpoints();
|
||||
assertTrue(endpoints.length === servers.length);
|
||||
assertTrue(endpoints[0] === currentLead);
|
||||
|
||||
print("Starting data creation task on ", currentLead, " (expect it to fail later)");
|
||||
connectToServer(currentLead);
|
||||
/// this task should stop once the server becomes a slave
|
||||
var task = tasks.register({
|
||||
name: "UnitTestsFailover",
|
||||
command: `
|
||||
const db = require('@arangodb').db;
|
||||
let col = db._collection("UnitTestActiveFailover");
|
||||
let cc = col.count();
|
||||
for (let i = 0; i < 1000000; i++) {
|
||||
col.save({attr: i + cc});
|
||||
}`
|
||||
});
|
||||
|
||||
internal.wait(2.5);
|
||||
|
||||
// pick a random follower
|
||||
let nextLead = endpoints[2]; // could be any one of them
|
||||
// suspend remaining followers
|
||||
print("Suspending followers, except one");
|
||||
suspended = instanceinfo.arangods.filter(arangod => arangod.role !== 'agent' &&
|
||||
arangod.endpoint !== currentLead &&
|
||||
arangod.endpoint !== nextLead);
|
||||
suspended.forEach(arangod => {
|
||||
print("Suspending: ", arangod.endpoint);
|
||||
assertTrue(suspendExternal(arangod.pid));
|
||||
});
|
||||
|
||||
// check our leader stays intact, while remaining followers fail
|
||||
let i = 20;
|
||||
//let expected = servers.length - suspended.length; // should be 2
|
||||
do {
|
||||
endpoints = getClusterEndpoints();
|
||||
assertEqual(endpoints[0], currentLead, "Unwanted leadership failover");
|
||||
internal.wait(1.0); // Health status may take some time to change
|
||||
} while (endpoints.length !== 2 && i-- > 0);
|
||||
assertTrue(i > 0, "timed-out waiting for followers to fail");
|
||||
assertEqual(endpoints.length, 2);
|
||||
assertEqual(endpoints[1], nextLead); // this server must become new leader
|
||||
|
||||
// resume followers
|
||||
print("Resuming followers");
|
||||
suspended.forEach(arangod => {
|
||||
print("Resuming: ", arangod.endpoint);
|
||||
assertTrue(continueExternal(arangod.pid));
|
||||
});
|
||||
suspended = [];
|
||||
|
||||
let upper = checkData(currentLead);
|
||||
print("Leader inserted ", upper, " documents so far");
|
||||
print("Suspending leader ", currentLead);
|
||||
instanceinfo.arangods.forEach(arangod => {
|
||||
if (arangod.endpoint === currentLead) {
|
||||
print("Suspending: ", arangod.endpoint);
|
||||
suspended.push(arangod);
|
||||
assertTrue(suspendExternal(arangod.pid));
|
||||
}
|
||||
});
|
||||
|
||||
// await failover and check that follower get in sync
|
||||
let oldLead = currentLead;
|
||||
currentLead = checkForFailover(currentLead);
|
||||
assertTrue(currentLead === nextLead, "Did not fail to best in-sync follower");
|
||||
|
||||
internal.wait(2.5); // settle down, heartbeat interval is 1s
|
||||
let cc = checkData(currentLead);
|
||||
// we expect to find documents within an acceptable range
|
||||
assertTrue(10000 <= cc && cc <= upper + 500, "Leader has too little or too many documents");
|
||||
print("Number of documents is in acceptable range");
|
||||
|
||||
assertTrue(checkInSync(currentLead, servers, oldLead));
|
||||
print("Remaining followers are in sync");
|
||||
|
||||
// Resuming stopped second leader
|
||||
print("Resuming server that still thinks it is leader (ArangoError 1004 is expected)");
|
||||
suspended.forEach(arangod => {
|
||||
print("Resuming: ", arangod.endpoint);
|
||||
assertTrue(continueExternal(arangod.pid));
|
||||
});
|
||||
suspended = [];
|
||||
|
||||
assertTrue(checkInSync(currentLead, servers));
|
||||
},
|
||||
|
||||
// try to failback to the original leader
|
||||
testFailback: function() {
|
||||
if (currentLead === firstLeader) {
|
||||
return; // nevermind then
|
||||
}
|
||||
|
||||
assertTrue(checkInSync(currentLead, servers));
|
||||
assertEqual(checkData(currentLead), 10000);
|
||||
|
||||
print("Suspending followers, except original leader");
|
||||
suspended = instanceinfo.arangods.filter(arangod => arangod.role !== 'agent' &&
|
||||
arangod.endpoint !== firstLeader);
|
||||
suspended.forEach(arangod => {
|
||||
print("Suspending: ", arangod.endpoint);
|
||||
assertTrue(suspendExternal(arangod.pid));
|
||||
});
|
||||
|
||||
// await failover and check that follower get in sync
|
||||
let oldLead = currentLead;
|
||||
currentLead = checkForFailover(currentLead);
|
||||
assertTrue(currentLead === firstLeader, "Did not fail to original leader");
|
||||
|
||||
suspended.forEach(arangod => {
|
||||
print("Resuming: ", arangod.endpoint);
|
||||
assertTrue(continueExternal(arangod.pid));
|
||||
});
|
||||
suspended = [];
|
||||
|
||||
assertTrue(checkInSync(currentLead, servers));
|
||||
assertEqual(checkData(currentLead), 10000);
|
||||
}
|
||||
|
||||
// Try to cleanup everything that was created
|
||||
/*testCleanup: function () {
|
||||
|
||||
let res = readAgencyValue("/arango/Plan/AsyncReplication/Leader");
|
||||
assertTrue(res !== null);
|
||||
let uuid = res[0].arango.Plan.AsyncReplication.Leader;
|
||||
res = readAgencyValue("/arango/Supervision/Health");
|
||||
let lead = res[0].arango.Supervision.Health[uuid].Endpoint;
|
||||
|
||||
connectToServer(lead);
|
||||
db._drop(cname);
|
||||
|
||||
assertTrue(checkInSync(lead, servers));
|
||||
}*/
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief executes the test suite
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
jsunity.run(ActiveFailoverSuite);
|
||||
|
||||
return jsunity.done();
|
|
@ -295,7 +295,7 @@ function RequestSuite () {
|
|||
expect(obj.path).to.equal(path);
|
||||
expect(obj).to.have.property('headers');
|
||||
expect(obj.headers).to.have.property('authorization');
|
||||
expect(obj.headers.authorization).to.equal('Bearer ' + auth.bearer);
|
||||
expect(obj.headers.authorization).to.equal('bearer ' + auth.bearer);
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -166,17 +166,6 @@ function request (req) {
|
|||
});
|
||||
}
|
||||
|
||||
if (req.auth) {
|
||||
headers.authorization = (
|
||||
req.auth.bearer ?
|
||||
'Bearer ' + req.auth.bearer :
|
||||
'Basic ' + new Buffer(
|
||||
req.auth.username + ':' +
|
||||
req.auth.password
|
||||
).toString('base64')
|
||||
);
|
||||
}
|
||||
|
||||
let options = {
|
||||
method: (req.method || 'get').toUpperCase(),
|
||||
headers: headers,
|
||||
|
@ -197,6 +186,16 @@ function request (req) {
|
|||
if (req.sslProtocol) {
|
||||
options.sslProtocol = req.sslProtocol;
|
||||
}
|
||||
if (is.existy(req.auth)) {
|
||||
if (is.existy(req.auth.jwt)) {
|
||||
options.jwt = req.auth.jwt;
|
||||
} else if (is.existy(req.auth.bearer)) {
|
||||
options.jwt = req.auth.bearer;
|
||||
} else if (is.existy(req.auth.username)) {
|
||||
options.username = req.auth.username;
|
||||
options.password = req.auth.password || "";
|
||||
}
|
||||
}
|
||||
let result = internal.download(path, body, options);
|
||||
|
||||
return new IncomingResponse(result, req.encoding, req.json);
|
||||
|
|
|
@ -124,17 +124,6 @@ function clusterRequest(req) {
|
|||
});
|
||||
}
|
||||
|
||||
if (req.auth) {
|
||||
headers['authorization'] = ( // eslint-disable-line dot-notation
|
||||
req.auth.bearer ?
|
||||
'Bearer ' + req.auth.bearer :
|
||||
'Basic ' + new Buffer(
|
||||
req.auth.username + ':' +
|
||||
req.auth.password
|
||||
).toString('base64')
|
||||
);
|
||||
}
|
||||
|
||||
let options = {
|
||||
method: (req.method || 'get').toUpperCase(),
|
||||
headers: headers,
|
||||
|
@ -152,6 +141,16 @@ function clusterRequest(req) {
|
|||
} else {
|
||||
options.maxRedirects = 10;
|
||||
}
|
||||
if (is.existy(req.auth)) {
|
||||
if (is.existy(req.auth.jwt)) {
|
||||
options.jwt = req.auth.jwt;
|
||||
} else if (is.existy(req.auth.bearer)) {
|
||||
options.jwt = req.auth.bearer;
|
||||
} else if (is.existy(req.auth.username)) {
|
||||
options.username = req.auth.username;
|
||||
options.password = req.auth.password || "";
|
||||
}
|
||||
}
|
||||
let result = internal.clusterDownload(path, body, options);
|
||||
return new Response(result, req.encoding, req.json);
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@ var errors = arangodb.errors;
|
|||
var db = arangodb.db;
|
||||
|
||||
var replication = require("@arangodb/replication");
|
||||
let compareTicks = replication.compareTicks;
|
||||
var console = require("console");
|
||||
var internal = require("internal");
|
||||
var masterEndpoint = arango.getEndpoint();
|
||||
|
@ -58,28 +59,6 @@ const collectionCount = function(name) {
|
|||
return db._collection(name).count();
|
||||
};
|
||||
|
||||
const compareTicks = function(l, r) {
|
||||
var i;
|
||||
if (l === null) {
|
||||
l = "0";
|
||||
}
|
||||
if (r === null) {
|
||||
r = "0";
|
||||
}
|
||||
if (l.length !== r.length) {
|
||||
return l.length - r.length < 0 ? -1 : 1;
|
||||
}
|
||||
|
||||
// length is equal
|
||||
for (i = 0; i < l.length; ++i) {
|
||||
if (l[i] !== r[i]) {
|
||||
return l[i] < r[i] ? -1 : 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
};
|
||||
|
||||
const compare = function(masterFunc, masterFunc2, slaveFuncOngoing, slaveFuncFinal, applierConfiguration) {
|
||||
var state = {};
|
||||
|
||||
|
|
|
@ -665,6 +665,7 @@ void JS_Download(v8::FunctionCallbackInfo<v8::Value> const& args) {
|
|||
bool returnBodyOnError = false;
|
||||
int maxRedirects = 5;
|
||||
uint64_t sslProtocol = TLS_V12;
|
||||
std::string jwtToken, username, password;
|
||||
|
||||
if (args.Length() > 2) {
|
||||
if (!args[2]->IsObject()) {
|
||||
|
@ -757,6 +758,15 @@ void JS_Download(v8::FunctionCallbackInfo<v8::Value> const& args) {
|
|||
returnBodyOnError = TRI_ObjectToBoolean(
|
||||
options->Get(TRI_V8_ASCII_STRING(isolate, "returnBodyOnError")));
|
||||
}
|
||||
|
||||
if (options->Has(TRI_V8_ASCII_STRING(isolate, "jwt"))) {
|
||||
jwtToken = TRI_ObjectToString(options->Get(TRI_V8_ASCII_STRING(isolate, "jwt")));
|
||||
} else if (options->Has(TRI_V8_ASCII_STRING(isolate, "username"))) {
|
||||
username = TRI_ObjectToString(options->Get(TRI_V8_ASCII_STRING(isolate, "username")));
|
||||
if (options->Has(TRI_V8_ASCII_STRING(isolate, "password"))) {
|
||||
password = TRI_ObjectToString(options->Get(TRI_V8_ASCII_STRING(isolate, "password")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// outfile
|
||||
|
@ -870,6 +880,11 @@ void JS_Download(v8::FunctionCallbackInfo<v8::Value> const& args) {
|
|||
params.setSupportDeflate(false);
|
||||
// security by obscurity won't work. Github requires a useragent nowadays.
|
||||
params.setExposeArangoDB(true);
|
||||
if (!jwtToken.empty()) {
|
||||
params.setJwt(jwtToken);
|
||||
} else if (!username.empty()) {
|
||||
params.setUserNamePassword("/", username, password);
|
||||
}
|
||||
SimpleHttpClient client(connection.get(), params);
|
||||
|
||||
v8::Handle<v8::Object> result = v8::Object::New(isolate);
|
||||
|
|
|
@ -0,0 +1,415 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test case for ActiverFailover job
|
||||
///
|
||||
/// @file
|
||||
///
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Simon Grätzer
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#include "catch.hpp"
|
||||
#include "fakeit.hpp"
|
||||
|
||||
#include "Agency/ActiveFailoverJob.h"
|
||||
#include "Agency/AgentInterface.h"
|
||||
#include "Agency/Node.h"
|
||||
#include "lib/Basics/StringUtils.h"
|
||||
#include "lib/Random/RandomGenerator.h"
|
||||
#include <iostream>
|
||||
#include <velocypack/Collection.h>
|
||||
#include <velocypack/Parser.h>
|
||||
#include <velocypack/Slice.h>
|
||||
#include <velocypack/velocypack-aliases.h>
|
||||
|
||||
using namespace arangodb;
|
||||
using namespace arangodb::basics;
|
||||
using namespace arangodb::consensus;
|
||||
using namespace fakeit;
|
||||
|
||||
namespace arangodb {
|
||||
namespace tests {
|
||||
namespace active_failover_test {
|
||||
|
||||
const std::string PREFIX = "arango";
|
||||
const std::string LEADER = "SNGL-leader";
|
||||
const std::string FOLLOWER1 = "SNGL-follower1"; // tick 10, STATE GOOD
|
||||
const std::string FOLLOWER2 = "SNGL-follower2"; // tick 1, STATE GOOD
|
||||
const std::string FOLLOWER3 = "SNGL-follower23"; // tick 9, STATE GOOD
|
||||
const std::string FOLLOWER4 = "SNGL-follower4"; // tick 100, STATE BAD
|
||||
const std::string FOLLOWER5 = "SNGL-follower5"; // tick 1000, STATE GOOD wrong leader
|
||||
|
||||
|
||||
const char *agency =
|
||||
#include "ActiveFailoverTest.json"
|
||||
;
|
||||
|
||||
const char *transient =
|
||||
#include "ActiveFailoverTestTransient.json"
|
||||
;
|
||||
|
||||
|
||||
Node createNodeFromBuilder(Builder const& builder) {
|
||||
|
||||
Builder opBuilder;
|
||||
{ VPackObjectBuilder a(&opBuilder);
|
||||
opBuilder.add("new", builder.slice()); }
|
||||
Node node("");
|
||||
node.handle<SET>(opBuilder.slice());
|
||||
return node;
|
||||
|
||||
}
|
||||
|
||||
Builder createBuilder(char const* c) {
|
||||
|
||||
Options options;
|
||||
options.checkAttributeUniqueness = true;
|
||||
VPackParser parser(&options);
|
||||
parser.parse(c);
|
||||
|
||||
Builder builder;
|
||||
builder.add(parser.steal()->slice());
|
||||
return builder;
|
||||
|
||||
}
|
||||
|
||||
typedef std::function<std::unique_ptr<Builder>(
|
||||
Slice const&, std::string const&)> TestStructType;
|
||||
|
||||
inline static std::string typeName (Slice const& slice) {
|
||||
return std::string(slice.typeName());
|
||||
}
|
||||
|
||||
TEST_CASE("ActiveFailover", "[agency][supervision]") {
|
||||
|
||||
arangodb::RandomGenerator::initialize(arangodb::RandomGenerator::RandomType::MERSENNE);
|
||||
|
||||
Builder base = createBuilder(agency);
|
||||
//baseStructure.toBuilder(builder);*/
|
||||
|
||||
std::string jobId = "1";
|
||||
|
||||
write_ret_t fakeWriteResult {true, "", std::vector<bool> {true}, std::vector<index_t> {1}};
|
||||
|
||||
SECTION("creating a job should create a job in todo") {
|
||||
Mock<AgentInterface> mockAgent;
|
||||
|
||||
write_ret_t fakeWriteResult {true, "", std::vector<bool> {true}, std::vector<index_t> {1}};
|
||||
When(Method(mockAgent, write)).AlwaysDo([&](query_t const& q, bool d) -> write_ret_t {
|
||||
INFO(q->slice().toJson());
|
||||
auto expectedJobKey = "/arango/Target/ToDo/" + jobId;
|
||||
REQUIRE(typeName(q->slice()) == "array" );
|
||||
REQUIRE(q->slice().length() == 1);
|
||||
REQUIRE(typeName(q->slice()[0]) == "array");
|
||||
// operations + preconditions
|
||||
REQUIRE(q->slice()[0].length() == 2);
|
||||
REQUIRE(typeName(q->slice()[0][0]) == "object");
|
||||
REQUIRE(q->slice()[0][0].length() == 2); // should do an entry in todo and failedservers
|
||||
REQUIRE(typeName(q->slice()[0][0].get(expectedJobKey)) == "object");
|
||||
|
||||
auto job = q->slice()[0][0].get(expectedJobKey);
|
||||
REQUIRE(typeName(job.get("creator")) == "string");
|
||||
REQUIRE(typeName(job.get("type")) == "string");
|
||||
CHECK(job.get("type").copyString() == "activeFailover");
|
||||
REQUIRE(typeName(job.get("server")) == "string");
|
||||
CHECK(job.get("server").copyString() == LEADER);
|
||||
CHECK(typeName(job.get("jobId")) == "string");
|
||||
CHECK(job.get("jobId").copyString() == jobId);
|
||||
CHECK(typeName(job.get("timeCreated")) == "string");
|
||||
|
||||
return fakeWriteResult;
|
||||
});
|
||||
|
||||
When(Method(mockAgent, waitFor)).AlwaysReturn(AgentInterface::raft_commit_t::OK);
|
||||
|
||||
auto& agent = mockAgent.get();
|
||||
Node snapshot = createNodeFromBuilder(base);
|
||||
ActiveFailoverJob job(snapshot(PREFIX), &agent, jobId, "tests", LEADER);
|
||||
|
||||
REQUIRE(job.create());
|
||||
Verify(Method(mockAgent, write));
|
||||
}
|
||||
|
||||
SECTION("The state is already 'GOOD' and 'Target/FailedServers' is still as in the snapshot. Violate: GOOD") {
|
||||
|
||||
const char* tt = R"=({"arango":{"Supervision":{"Health":{"SNGL-leader":{"Status":"GOOD"}}}}})=";
|
||||
VPackBuilder overw = createBuilder(tt);
|
||||
VPackBuilder mod = VPackCollection::merge(base.slice(), overw.slice(), true);
|
||||
|
||||
Mock<AgentInterface> mockAgent;
|
||||
When(Method(mockAgent, write)).AlwaysDo([&](query_t const& q, bool d) -> write_ret_t {
|
||||
INFO(q->slice().toJson());
|
||||
REQUIRE(typeName(q->slice()) == "array" );
|
||||
REQUIRE(q->slice().length() == 1);
|
||||
REQUIRE(typeName(q->slice()[0]) == "array");
|
||||
// operations + preconditions
|
||||
REQUIRE(q->slice()[0].length() == 2);
|
||||
REQUIRE(typeName(q->slice()[0][0]) == "object");
|
||||
REQUIRE(typeName(q->slice()[0][1]) == "object");
|
||||
|
||||
auto writes = q->slice()[0][0];
|
||||
REQUIRE(typeName(writes.get("/arango/Target/ToDo/1")) == "object");
|
||||
REQUIRE(writes.get("/arango/Target/ToDo/1").get("server").copyString() == LEADER);
|
||||
|
||||
auto precond = q->slice()[0][1];
|
||||
REQUIRE(typeName(precond.get("/arango/Supervision/Health/SNGL-leader/Status")) == "object");
|
||||
REQUIRE(precond.get("/arango/Supervision/Health/SNGL-leader/Status").get("old").copyString() == "BAD");
|
||||
REQUIRE(typeName(precond.get("/arango/Target/FailedServers").get("old")) == "object");
|
||||
|
||||
return write_ret_t{false, "", std::vector<bool> {false}, std::vector<index_t> {0}};
|
||||
});
|
||||
|
||||
When(Method(mockAgent, waitFor)).AlwaysReturn(AgentInterface::raft_commit_t::OK);
|
||||
auto& agent = mockAgent.get();
|
||||
Node snapshot = createNodeFromBuilder(mod);
|
||||
ActiveFailoverJob job(snapshot(PREFIX), &agent, jobId, "unittest", LEADER);
|
||||
|
||||
REQUIRE_FALSE(job.create());
|
||||
REQUIRE(job.status() == JOB_STATUS::NOTFOUND);
|
||||
Verify(Method(mockAgent,write));
|
||||
} // SECTION
|
||||
|
||||
SECTION("Server is healthy again, job finishes") {
|
||||
|
||||
const char* health = R"=({"arango":{"Supervision":{"Health":{"SNGL-leader":{"Status":"GOOD"}}},
|
||||
"Target":{"ToDo":{"1":{"jobId":"1","type":"activeFailover"}}}}})=";
|
||||
VPackBuilder mod = VPackCollection::merge(base.slice(), createBuilder(health).slice(), true);
|
||||
|
||||
Mock<AgentInterface> mockAgent;
|
||||
When(Method(mockAgent, write)).Do([&](query_t const& q, bool d) -> write_ret_t {
|
||||
REQUIRE(typeName(q->slice()) == "array" );
|
||||
REQUIRE(q->slice().length() == 1);
|
||||
REQUIRE(typeName(q->slice()[0]) == "array");
|
||||
// operations + preconditions
|
||||
REQUIRE(q->slice()[0].length() == 2);
|
||||
REQUIRE(typeName(q->slice()[0][0]) == "object");
|
||||
REQUIRE(typeName(q->slice()[0][1]) == "object");
|
||||
|
||||
auto writes = q->slice()[0][0];
|
||||
REQUIRE(typeName(writes.get("/arango/Target/ToDo/1")) == "object");
|
||||
REQUIRE(writes.get("/arango/Target/ToDo/1").get("server").copyString() == LEADER);
|
||||
|
||||
auto precond = q->slice()[0][1];
|
||||
REQUIRE(typeName(precond.get("/arango/Supervision/Health/SNGL-leader/Status")) == "object");
|
||||
REQUIRE(precond.get("/arango/Supervision/Health/SNGL-leader/Status").get("old").copyString() == "BAD");
|
||||
REQUIRE(typeName(precond.get("/arango/Target/FailedServers").get("old")) == "object");
|
||||
|
||||
return fakeWriteResult;
|
||||
});
|
||||
When(Method(mockAgent, waitFor)).AlwaysReturn(AgentInterface::raft_commit_t::OK);
|
||||
auto& agent = mockAgent.get();
|
||||
Node snapshot = createNodeFromBuilder(mod); //snapshort contains GOOD leader
|
||||
|
||||
ActiveFailoverJob job(snapshot(PREFIX), &agent, jobId, "unittest", LEADER);
|
||||
REQUIRE(job.create()); // we already put the TODO entry in the snapshot for finish
|
||||
REQUIRE(job.status() == JOB_STATUS::TODO);
|
||||
Verify(Method(mockAgent,write)).Exactly(1);
|
||||
|
||||
When(Method(mockAgent, write)).Do([&](query_t const& q, bool d) -> write_ret_t {
|
||||
// check that the job finishes now, without changing leader
|
||||
VPackSlice writes = q->slice()[0][0];
|
||||
REQUIRE(typeName(writes.get("/arango/Target/ToDo/1").get("op")) == "string");
|
||||
REQUIRE(typeName(writes.get("/arango/Target/Finished/1")) == "object");
|
||||
REQUIRE_FALSE(writes.hasKey("/arango" + asyncReplLeader)); // no change to leader
|
||||
return fakeWriteResult;
|
||||
});
|
||||
|
||||
REQUIRE(job.start());
|
||||
REQUIRE(job.status() == JOB_STATUS::FINISHED);
|
||||
Verify(Method(mockAgent,write)).Exactly(2);
|
||||
|
||||
} // SECTION
|
||||
|
||||
SECTION("Current leader is different from server in job, job finishes") {
|
||||
|
||||
const char* health = R"=({"arango":{"Plan":{"AsyncReplication":{"Leader":"SNGL-follower1"}},
|
||||
"Target":{"ToDo":{"1":{"jobId":"1","type":"activeFailover"}}}}})=";
|
||||
VPackBuilder mod = VPackCollection::merge(base.slice(), createBuilder(health).slice(), true);
|
||||
|
||||
Mock<AgentInterface> mockAgent;
|
||||
When(Method(mockAgent, write)).Do([&](query_t const& q, bool d) -> write_ret_t {
|
||||
REQUIRE(typeName(q->slice()) == "array" );
|
||||
REQUIRE(q->slice().length() == 1);
|
||||
REQUIRE(typeName(q->slice()[0]) == "array");
|
||||
// operations + preconditions
|
||||
REQUIRE(q->slice()[0].length() == 2);
|
||||
REQUIRE(typeName(q->slice()[0][0]) == "object");
|
||||
REQUIRE(typeName(q->slice()[0][1]) == "object");
|
||||
|
||||
auto writes = q->slice()[0][0];
|
||||
REQUIRE(typeName(writes.get("/arango/Target/ToDo/1")) == "object");
|
||||
REQUIRE(writes.get("/arango/Target/ToDo/1").get("server").copyString() == LEADER);
|
||||
|
||||
auto precond = q->slice()[0][1];
|
||||
REQUIRE(typeName(precond.get("/arango/Supervision/Health/SNGL-leader/Status")) == "object");
|
||||
REQUIRE(precond.get("/arango/Supervision/Health/SNGL-leader/Status").get("old").copyString() == "BAD");
|
||||
REQUIRE(typeName(precond.get("/arango/Target/FailedServers").get("old")) == "object");
|
||||
|
||||
return fakeWriteResult;
|
||||
});
|
||||
When(Method(mockAgent, waitFor)).AlwaysReturn(AgentInterface::raft_commit_t::OK);
|
||||
auto& agent = mockAgent.get();
|
||||
Node snapshot = createNodeFromBuilder(mod); //snapshort contains different leader
|
||||
|
||||
ActiveFailoverJob job(snapshot(PREFIX), &agent, jobId, "unittest", LEADER);
|
||||
REQUIRE(job.create()); // we already put the TODO entry in the snapshot for finish
|
||||
REQUIRE(job.status() == JOB_STATUS::TODO);
|
||||
Verify(Method(mockAgent,write)).Exactly(1);
|
||||
|
||||
When(Method(mockAgent, write)).Do([&](query_t const& q, bool d) -> write_ret_t {
|
||||
// check that the job finishes now, without changing leader
|
||||
VPackSlice writes = q->slice()[0][0];
|
||||
REQUIRE(typeName(writes.get("/arango/Target/ToDo/1").get("op")) == "string");
|
||||
REQUIRE(typeName(writes.get("/arango/Target/Finished/1")) == "object");
|
||||
REQUIRE_FALSE(writes.hasKey("/arango" + asyncReplLeader)); // no change to leader
|
||||
return fakeWriteResult;
|
||||
});
|
||||
|
||||
REQUIRE(job.start());
|
||||
REQUIRE(job.status() == JOB_STATUS::FINISHED);
|
||||
Verify(Method(mockAgent,write)).Exactly(2);
|
||||
|
||||
} // SECTION
|
||||
|
||||
SECTION("no in-sync follower found, job retries") {
|
||||
|
||||
// follower follows wrong leader
|
||||
const char* noInSync = R"=({"arango":{"AsyncReplication":{"SNGL-follower1":{"leader":"abc","lastTick":9}}}})=";
|
||||
trans_ret_t fakeTransient {true, "", 1, 0, std::make_shared<Builder>(createBuilder(noInSync))};
|
||||
|
||||
Mock<AgentInterface> mockAgent;
|
||||
When(Method(mockAgent, write)).Do([&](query_t const& q, bool d) -> write_ret_t {
|
||||
REQUIRE(typeName(q->slice()) == "array" );
|
||||
REQUIRE(q->slice().length() == 1);
|
||||
REQUIRE(typeName(q->slice()[0]) == "array");
|
||||
// operations + preconditions
|
||||
REQUIRE(q->slice()[0].length() == 2);
|
||||
REQUIRE(typeName(q->slice()[0][0]) == "object");
|
||||
REQUIRE(typeName(q->slice()[0][1]) == "object");
|
||||
|
||||
auto writes = q->slice()[0][0];
|
||||
REQUIRE(typeName(writes.get("/arango/Target/ToDo/1")) == "object");
|
||||
REQUIRE(writes.get("/arango/Target/ToDo/1").get("server").copyString() == LEADER);
|
||||
|
||||
auto precond = q->slice()[0][1];
|
||||
REQUIRE(typeName(precond.get("/arango/Supervision/Health/SNGL-leader/Status")) == "object");
|
||||
REQUIRE(precond.get("/arango/Supervision/Health/SNGL-leader/Status").get("old").copyString() == "BAD");
|
||||
REQUIRE(typeName(precond.get("/arango/Target/FailedServers").get("old")) == "object");
|
||||
|
||||
return fakeWriteResult;
|
||||
});
|
||||
When(Method(mockAgent, waitFor)).AlwaysReturn(AgentInterface::raft_commit_t::OK);
|
||||
auto& agent = mockAgent.get();
|
||||
Node snapshot = createNodeFromBuilder(base);
|
||||
|
||||
ActiveFailoverJob job(snapshot(PREFIX), &agent, jobId, "unittest", LEADER);
|
||||
REQUIRE(job.create()); // we already put the TODO entry in the snapshot for finish
|
||||
REQUIRE(job.status() == JOB_STATUS::TODO);
|
||||
Verify(Method(mockAgent,write)).Exactly(1);
|
||||
|
||||
When(Method(mockAgent, transient)).Return(fakeTransient);
|
||||
When(Method(mockAgent, write)).Do([&](query_t const& q, bool d) -> write_ret_t {
|
||||
// check that the job fails now
|
||||
auto writes = q->slice()[0][0];
|
||||
REQUIRE(std::string(writes.get("/arango/Target/ToDo/1").get("op").typeName()) == "string"); \
|
||||
CHECK(std::string(writes.get("/arango/Target/Failed/1").typeName()) == "object");
|
||||
return fakeWriteResult;
|
||||
});
|
||||
|
||||
REQUIRE_FALSE(job.start());
|
||||
// job status stays on TODO and can retry later
|
||||
REQUIRE(job.status() == JOB_STATUS::TODO);
|
||||
Verify(Method(mockAgent,transient)).Exactly(Once);
|
||||
Verify(Method(mockAgent,write)).Exactly(Once); // finish is not called
|
||||
|
||||
} // SECTION
|
||||
|
||||
SECTION("follower with best tick value used, job succeeds") {
|
||||
|
||||
// 2 in-sync followers, follower1 should be used
|
||||
trans_ret_t fakeTransient {true, "", 1, 0, std::make_shared<Builder>(createBuilder(transient))};
|
||||
|
||||
Mock<AgentInterface> mockAgent;
|
||||
When(Method(mockAgent, write)).Do([&](query_t const& q, bool d) -> write_ret_t {
|
||||
REQUIRE(typeName(q->slice()) == "array" );
|
||||
REQUIRE(q->slice().length() == 1);
|
||||
REQUIRE(typeName(q->slice()[0]) == "array");
|
||||
// we always simply override! no preconditions...
|
||||
REQUIRE(q->slice()[0].length() == 2);
|
||||
REQUIRE(typeName(q->slice()[0][0]) == "object");
|
||||
REQUIRE(typeName(q->slice()[0][1]) == "object");
|
||||
|
||||
auto writes = q->slice()[0][0];
|
||||
REQUIRE(typeName(writes.get("/arango/Target/ToDo/1")) == "object");
|
||||
REQUIRE(writes.get("/arango/Target/ToDo/1").get("server").copyString() == LEADER);
|
||||
|
||||
auto precond = q->slice()[0][1];
|
||||
REQUIRE(typeName(precond.get("/arango/Supervision/Health/SNGL-leader/Status")) == "object");
|
||||
REQUIRE(precond.get("/arango/Supervision/Health/SNGL-leader/Status").get("old").copyString() == "BAD");
|
||||
REQUIRE(typeName(precond.get("/arango/Target/FailedServers").get("old")) == "object");
|
||||
|
||||
return fakeWriteResult;
|
||||
});
|
||||
When(Method(mockAgent, waitFor)).AlwaysReturn(AgentInterface::raft_commit_t::OK);
|
||||
auto& agent = mockAgent.get();
|
||||
Node snapshot = createNodeFromBuilder(base);
|
||||
|
||||
ActiveFailoverJob job(snapshot(PREFIX), &agent, jobId, "unittest", LEADER);
|
||||
REQUIRE(job.create()); // we already put the TODO entry in the snapshot for finish
|
||||
REQUIRE(job.status() == JOB_STATUS::TODO);
|
||||
Verify(Method(mockAgent,write)).Exactly(1);
|
||||
|
||||
When(Method(mockAgent, transient)).Return(fakeTransient);
|
||||
When(Method(mockAgent, write)).Do([&](query_t const& q, bool d) -> write_ret_t {
|
||||
REQUIRE(typeName(q->slice()) == "array" );
|
||||
REQUIRE(q->slice().length() == 1);
|
||||
REQUIRE(typeName(q->slice()[0]) == "array");
|
||||
// we always simply override! no preconditions...
|
||||
REQUIRE(q->slice()[0].length() == 2);
|
||||
REQUIRE(typeName(q->slice()[0][0]) == "object");
|
||||
REQUIRE(typeName(q->slice()[0][1]) == "object");
|
||||
|
||||
INFO(q->toString());
|
||||
|
||||
// check that the job succeeds now
|
||||
auto writes = q->slice()[0][0];
|
||||
REQUIRE(typeName(writes.get("/arango/Target/ToDo/1").get("op")) == "string"); \
|
||||
REQUIRE(typeName(writes.get("/arango/Target/Finished/1")) == "object");
|
||||
REQUIRE(typeName(writes.get("/arango/Plan/AsyncReplication/Leader")) == "string");
|
||||
REQUIRE(writes.get("/arango/Plan/AsyncReplication/Leader").copyString() == FOLLOWER1);
|
||||
|
||||
auto precond = q->slice()[0][1];
|
||||
REQUIRE(typeName(precond.get("/arango/Supervision/Health/SNGL-leader/Status")) == "object");
|
||||
REQUIRE(precond.get("/arango/Supervision/Health/SNGL-leader/Status").get("old").copyString() == "FAILED");
|
||||
REQUIRE(precond.get("/arango/Supervision/Health/SNGL-follower1/Status").get("old").copyString() == "GOOD");
|
||||
REQUIRE(precond.get("/arango/Plan/AsyncReplication/Leader").get("old").copyString() == LEADER);
|
||||
|
||||
return fakeWriteResult;
|
||||
});
|
||||
|
||||
REQUIRE(job.start());
|
||||
// job status stays on TODO and can retry later
|
||||
REQUIRE(job.status() == JOB_STATUS::FINISHED);
|
||||
Verify(Method(mockAgent,transient)).Exactly(1);
|
||||
Verify(Method(mockAgent,write)).Exactly(2);
|
||||
|
||||
} // SECTION
|
||||
|
||||
};
|
||||
|
||||
}}}
|
|
@ -0,0 +1,63 @@
|
|||
R"=(
|
||||
{
|
||||
"arango": {
|
||||
"Current": {
|
||||
"Version": 1,
|
||||
"DBServers": {},
|
||||
"Singles": {
|
||||
"SNGL-follower1": "none",
|
||||
"SNGL-follower2": "none",
|
||||
"SNGL-follower3": "none",
|
||||
"SNGL-follower4": "none",
|
||||
"SNGL-follower5": "none",
|
||||
"SNGL-leader": "none"
|
||||
}
|
||||
},
|
||||
"Plan": {
|
||||
"Version": 1,
|
||||
"DBServers": {},
|
||||
"AsyncReplication": {
|
||||
"Leader": "SNGL-leader"
|
||||
},
|
||||
"Singles": {
|
||||
"SNGL-follower1": "none",
|
||||
"SNGL-follower2": "none",
|
||||
"SNGL-follower3": "none",
|
||||
"SNGL-follower4": "none",
|
||||
"SNGL-follower5": "none",
|
||||
"SNGL-leader": "none"
|
||||
}
|
||||
},
|
||||
"Supervision": {
|
||||
"DBServers": {},
|
||||
"Health": {
|
||||
"SNGL-follower1": {
|
||||
"Status": "GOOD"
|
||||
},
|
||||
"SNGL-follower2": {
|
||||
"Status": "GOOD"
|
||||
},
|
||||
"SNGL-follower3": {
|
||||
"Status": "GOOD"
|
||||
},
|
||||
"SNGL-follower4": {
|
||||
"Status": "BAD"
|
||||
},
|
||||
"SNGL-follower5": {
|
||||
"Status": "GOOD"
|
||||
},
|
||||
"SNGL-leader": {
|
||||
"Status": "FAILED"
|
||||
}
|
||||
},
|
||||
"Shards": {}
|
||||
},
|
||||
"Target": {
|
||||
"FailedServers": {},
|
||||
"Failed": {},
|
||||
"Finished": {},
|
||||
"ToDo": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
)="
|
|
@ -0,0 +1,28 @@
|
|||
R"=(
|
||||
[{
|
||||
"arango": {
|
||||
"AsyncReplication": {
|
||||
"SNGL-follower1": {
|
||||
"lastTick": 10,
|
||||
"leader": "SNGL-leader"
|
||||
},
|
||||
"SNGL-follower2": {
|
||||
"lastTick": 1,
|
||||
"leader": "SNGL-leader"
|
||||
},
|
||||
"SNGL-follower3": {
|
||||
"lastTick": 9,
|
||||
"leader": "SNGL-leader"
|
||||
},
|
||||
"SNGL-follower4": {
|
||||
"lastTick": 100,
|
||||
"leader": "SNGL-leader"
|
||||
},
|
||||
"SNGL-follower5": {
|
||||
"lastTick": 1000,
|
||||
"leader": "SNGL-follower2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}]
|
||||
)="
|
|
@ -157,7 +157,7 @@ TEST_CASE("AddFollower", "[agency][supervision]") {
|
|||
|
||||
SECTION("<collection> still exists, if missing, job is finished, move to "
|
||||
"Target/Finished") {
|
||||
|
||||
|
||||
TestStructType createTestStructure = [&](
|
||||
Slice const& s, std::string const& path) {
|
||||
|
||||
|
|
|
@ -27,7 +27,6 @@
|
|||
#include "catch.hpp"
|
||||
#include "fakeit.hpp"
|
||||
|
||||
#include "Agency/AddFollower.h"
|
||||
#include "Agency/FailedFollower.h"
|
||||
#include "Agency/MoveShard.h"
|
||||
#include "Agency/AgentInterface.h"
|
||||
|
|
|
@ -326,13 +326,12 @@ SECTION("if the leader is healthy again we fail the job") {
|
|||
auto writes = q->slice()[0][0]; \
|
||||
REQUIRE(std::string(writes.get("/arango/Target/ToDo/1").get("op").typeName()) == "string"); \
|
||||
CHECK(std::string(writes.get("/arango/Target/Failed/1").typeName()) == "object");
|
||||
|
||||
return fakeWriteResult;
|
||||
});
|
||||
When(Method(mockAgent, waitFor)).AlwaysReturn();
|
||||
AgentInterface &agent = mockAgent.get();
|
||||
auto failedLeader = FailedLeader(
|
||||
agency("arango"),
|
||||
agency(PREFIX),
|
||||
&agent,
|
||||
JOB_STATUS::TODO,
|
||||
jobId
|
||||
|
|
|
@ -60,6 +60,7 @@ endif ()
|
|||
add_executable(
|
||||
arangodbtests
|
||||
${CMAKE_SOURCE_DIR}/arangod/RestServer/FlushFeature.cpp
|
||||
Agency/ActiveFailoverTest.cpp
|
||||
Agency/AddFollowerTest.cpp
|
||||
Agency/CleanOutServerTest.cpp
|
||||
Agency/FailedFollowerTest.cpp
|
||||
|
|
Loading…
Reference in New Issue