1
0
Fork 0

Merge branch 'devel' of github.com:arangodb/arangodb into devel

This commit is contained in:
hkernbach 2016-05-19 17:45:07 +02:00
commit 2ea71da526
17 changed files with 680 additions and 155 deletions

View File

@ -105,6 +105,16 @@ std::string Node::uri() const {
return path.str();
}
Node::Node(Node&& other) :
_node_name(std::move(other._node_name)),
_children(std::move(other._children)),
_value(std::move(other._value)) {}
Node::Node(Node const& other) :
_node_name(other._node_name),
_children(other._children),
_value(other._value) {}
// Assignment of rhs slice
Node& Node::operator=(VPackSlice const& slice) {
// 1. remove any existing time to live entry
@ -119,17 +129,29 @@ Node& Node::operator=(VPackSlice const& slice) {
return *this;
}
// Assignment of rhs node
Node& Node::operator=(Node&& rhs) {
// 1. remove any existing time to live entry
// 2. copy children map
// 3. copy from rhs to buffer pointer
// Must not copy rhs's _parent, _ttl, _observers
removeTimeToLive();
_node_name = std::move(rhs._node_name);
_children = std::move(rhs._children);
_value = std::move(rhs._value);
return *this;
}
// Assignment of rhs node
Node& Node::operator=(Node const& rhs) {
// 1. remove any existing time to live entry
// 2. clear children map
// 3. copy from rhs to buffer pointer
// 4. inform all observers here and above
// Must not copy rhs's _parent, _ttl, _observers
// 3. move from rhs to buffer pointer
// Must not move rhs's _parent, _ttl, _observers
removeTimeToLive();
_node_name = rhs._node_name;
_value = rhs._value;
_children = rhs._children;
_value = rhs._value;
return *this;
}

View File

@ -88,6 +88,9 @@ class Node {
/// @brief Construct with name
explicit Node(std::string const& name);
Node(Node const& other);
Node(Node&& other);
/// @brief Construct with name and introduce to tree under parent
Node(std::string const& name, Node* parent);
@ -105,6 +108,7 @@ class Node {
/// @brief Apply rhs to this node (deep copy of rhs)
Node& operator=(Node const& node);
Node& operator=(Node&& node);
/// @brief Apply value slice to this node
Node& operator=(arangodb::velocypack::Slice const&);

View File

@ -99,6 +99,36 @@ inline static bool endpointPathFromUrl(std::string const& url,
// Create with name
Store::Store(std::string const& name) : Thread(name), _node(name, this) {}
Store::Store(Store const& other) :
Thread(other._node.name()), _agent(other._agent), _timeTable(other._timeTable),
_observerTable(other._observerTable), _observedTable(other._observedTable),
_node(other._node) {}
Store::Store(Store&& other) :
Thread(other._node.name()), _agent(std::move(other._agent)),
_timeTable(std::move(other._timeTable)),
_observerTable(std::move(other._observerTable)),
_observedTable(std::move(other._observedTable)),
_node(std::move(other._node)) {}
Store& Store::operator=(Store const& rhs) {
_agent = rhs._agent;
_timeTable = rhs._timeTable;
_observerTable = rhs._observerTable;
_observedTable = rhs._observedTable;
_node = rhs._node;
return *this;
}
Store& Store::operator=(Store&& rhs) {
_agent = std::move(rhs._agent);
_timeTable = std::move(rhs._timeTable);
_observerTable = std::move(rhs._observerTable);
_observedTable = std::move(rhs._observedTable);
_node = std::move(rhs._node);
return *this;
}
// Default ctor
Store::~Store() {}

View File

@ -40,6 +40,18 @@ class Store : public arangodb::Thread {
/// @brief Destruct
virtual ~Store();
/// @brief Copy constructor
Store (Store const& other);
/// @brief Move constructor
Store (Store&& other);
// @brief Copy assignent
Store& operator= (Store const& rhs);
// @brief Move assigment
Store& operator= (Store&& rhs);
/// @brief Apply entry in query
std::vector<bool> apply(query_t const& query);

View File

@ -29,11 +29,23 @@
#include "Basics/ConditionLocker.h"
#include "VocBase/server.h"
#include <thread>
using namespace arangodb;
namespace arangodb {
namespace consensus {
std::string printTimestamp(Supervision::TimePoint const& t) {
time_t tt = std::chrono::system_clock::to_time_t(t);
struct tm tb;
size_t const len (21);
char buffer[len];
TRI_gmtime(tt, &tb);
::strftime(buffer, sizeof(buffer), "%Y-%m-%dT%H:%M:%SZ", &tb);
return std::string(buffer, len);
}
inline arangodb::consensus::write_ret_t makeReport(Agent* _agent,
Builder const& report) {
query_t envelope = std::make_shared<Builder>();
@ -48,17 +60,58 @@ inline arangodb::consensus::write_ret_t makeReport(Agent* _agent,
return _agent->write(envelope);
}
static std::string const pendingPrefix = "/arango/Supervision/Jobs/Pending/";
static std::string const collectionsPrefix = "/arango/Plan/Collections/";
static std::string const pendingPrefix = "/Supervision/Jobs/Pending/";
static std::string const collectionsPrefix = "/Plan/Collections/";
static std::string const toDoPrefix = "/Target/ToDo";
struct FailedServerJob {
FailedServerJob(Node const& snapshot, Agent* agent, uint64_t jobId,
std::string const& failed) {
struct MoveShard : public Job {
MoveShard (std::string const& creator, std::string const& database,
std::string const& collection, std::string const& shard,
std::string const& fromServer, std::string const& toServer,
uint64_t const& jobId, std::string const& agencyPrefix,
Agent* agent) {
todoEntry (creator, database, collection, shard, fromServer, toServer,
jobId, agencyPrefix, agent);
}
void todoEntry (std::string const& creator, std::string const& database,
std::string const& collection, std::string const& shard,
std::string const& fromServer, std::string const& toServer,
uint64_t const& jobId, std::string const& agencyPrefix,
Agent* agent) {
Builder todo;
todo.openArray(); todo.openObject();
todo.add(VPackValue(agencyPrefix + toDoPrefix + "/"
+ std::to_string(jobId)));
{
VPackObjectBuilder entry(&todo);
todo.add("creator", VPackValue(creator));
todo.add("type", VPackValue("moveShard"));
todo.add("database", VPackValue(database));
todo.add("collection", VPackValue(collection));
todo.add("shard", VPackValue(shard));
todo.add("fromServer", VPackValue(fromServer));
todo.add("toServer", VPackValue(toServer));
}
todo.close(); todo.close();
write_ret_t ret = makeReport(agent, todo);
}
};
struct FailedServer : public Job {
FailedServer(Node const& snapshot, Agent* agent, uint64_t jobId,
std::string const& failed, std::string agencyPrefix) {
// 1. find all shards in plan, where failed was leader.
// 2. swap positions in plan between failed and a random in sync follower
Node::Children const& databases =
snapshot("/arango/Plan/Collections").children();
snapshot("/Plan/Collections").children();
for (auto const& database : databases) {
for (auto const& collptr : database.second->children()) {
@ -72,22 +125,25 @@ struct FailedServerJob {
continue;
}
reportJobInSupervision(jobId, shard, failed);
planChanges(collptr, database, shard);
//MoveShard ()
reportJobInSupervision(jobId, shard, failed, agencyPrefix);
planChanges(collptr, database, shard, agencyPrefix);
}
}
}
}
}
void reportJobInSupervision(
uint64_t jobId,
std::pair<std::string, std::shared_ptr<Node>> const& shard,
std::string const& serverID) {
void reportJobInSupervision(uint64_t jobId,
std::pair<std::string,
std::shared_ptr<Node>> const& shard,
std::string const& serverID,
std::string const& agencyPrefix) {
std::string const& shardId = shard.first;
VPackSlice const& dbservers = shard.second->slice();
std::string path =
pendingPrefix + arangodb::basics::StringUtils::itoa(jobId);
std::string path = agencyPrefix + pendingPrefix
+ arangodb::basics::StringUtils::itoa(jobId);
query_t envelope = std::make_shared<Builder>();
Builder report;
@ -113,17 +169,16 @@ struct FailedServerJob {
report.close();
// makeReport(envelope, report);
LOG(WARN) << report.toJson();
}
void planChanges(
std::pair<std::string, std::shared_ptr<Node>> const& database,
std::pair<std::string, std::shared_ptr<Node>> const& collection,
std::pair<std::string, std::shared_ptr<Node>> const& shard) {
std::string path = collectionsPrefix + database.first + "/" +
std::pair<std::string, std::shared_ptr<Node>> const& shard,
std::string const& agencyPrefix) {
std::string path = agencyPrefix + collectionsPrefix + database.first + "/" +
collection.first + "/shards/" + shard.first;
LOG(WARN) << path;
}
};
}
@ -131,6 +186,8 @@ struct FailedServerJob {
using namespace arangodb::consensus;
std::string Supervision::_agencyPrefix = "/arango";
Supervision::Supervision()
: arangodb::Thread("Supervision"),
_agent(nullptr),
@ -144,22 +201,14 @@ Supervision::~Supervision() { shutdown(); };
void Supervision::wakeUp() {
TRI_ASSERT(_agent != nullptr);
_snapshot = _agent->readDB().get("/");
_snapshot = _agent->readDB().get(_agencyPrefix);
_cv.signal();
}
std::string printTimestamp(Supervision::TimePoint const& t) {
time_t tt = std::chrono::system_clock::to_time_t(t);
struct tm tb;
char buffer[21];
TRI_gmtime(tt, &tb);
size_t len = ::strftime(buffer, sizeof(buffer), "%Y-%m-%dT%H:%M:%SZ", &tb);
return std::string(buffer, len);
}
static std::string const syncPrefix = "/Sync/ServerStates/";
static std::string const supervisionPrefix = "/Supervision/Health/";
static std::string const planDBServersPrefix = "/Plan/DBServers";
static std::string const syncPrefix = "/arango/Sync/ServerStates/";
static std::string const supervisionPrefix = "/arango/Supervision/Health/";
static std::string const planDBServersPrefix = "/arango/Plan/DBServers";
std::vector<check_t> Supervision::checkDBServers() {
std::vector<check_t> ret;
Node::Children const& machinesPlanned =
@ -179,7 +228,7 @@ std::vector<check_t> Supervision::checkDBServers() {
report->openArray();
report->openArray();
report->openObject();
report->add(supervisionPrefix + serverID,
report->add(_agencyPrefix + supervisionPrefix + serverID,
VPackValue(VPackValueType::Object));
report->add("LastHearbeatReceived",
VPackValue(printTimestamp(it->second->myTimestamp)));
@ -194,8 +243,8 @@ std::vector<check_t> Supervision::checkDBServers() {
if (t.count() > _gracePeriod) { // Failure
if (it->second->maintenance() == 0) {
it->second->maintenance(TRI_NewTickServer());
FailedServerJob fsj(_snapshot, _agent, it->second->maintenance(),
serverID);
FailedServer fsj(_snapshot, _agent, it->second->maintenance(),
serverID, _agencyPrefix);
}
}
@ -219,7 +268,6 @@ std::vector<check_t> Supervision::checkDBServers() {
auto itr = _vitalSigns.begin();
while (itr != _vitalSigns.end()) {
if (machinesPlanned.find(itr->first) == machinesPlanned.end()) {
LOG(WARN) << itr->first << " shut down!";
itr = _vitalSigns.erase(itr);
} else {
++itr;
@ -229,16 +277,12 @@ std::vector<check_t> Supervision::checkDBServers() {
return ret;
}
bool Supervision::moveShard(std::string const& from, std::string const& to) {
return true;
}
bool Supervision::doChecks(bool timedout) {
if (_agent == nullptr) {
return false;
}
_snapshot = _agent->readDB().get("/");
_snapshot = _agent->readDB().get(_agencyPrefix);
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Sanity checks";
/*std::vector<check_t> ret = */checkDBServers();
@ -247,27 +291,49 @@ bool Supervision::doChecks(bool timedout) {
}
void Supervision::run() {
CONDITION_LOCKER(guard, _cv);
TRI_ASSERT(_agent != nullptr);
bool timedout = false;
while (!this->isStopping()) {
// Get agency prefix after cluster init
if (_jobId == 0) {
if (!updateAgencyPrefix(10)) {
LOG_TOPIC(ERR, Logger::AGENCY)
<< "Cannot get prefix from Agency. Stopping supervision for good.";
break;
}
}
// Get bunch of job IDs from agency for future jobs
if (_jobId == 0 || _jobId == _jobIdMax) {
if (!getUniqueIds()) {
LOG_TOPIC(ERR, Logger::AGENCY)
<< "Cannot get unique IDs from Agency. Stopping supervision for good.";
break;
}
MoveShard ("coordinator1", "_system", "41", "s42", "DBServer1",
"DBServer2", _jobId++, _agencyPrefix, _agent);
}
// Wait unless leader
if (_agent->leading()) {
timedout = _cv.wait(_frequency * 1000000); // quarter second
} else {
_cv.wait();
}
if (_jobId == 0 || _jobId == _jobIdMax) {
if (!getUniqueIds()) {
LOG_TOPIC(ERR, Logger::AGENCY) << "Cannot get unique IDs from Agency. "
"Stopping supervision for good.";
break;
}
}
// Do supervision
doChecks(timedout);
}
}
// Start thread
@ -280,20 +346,37 @@ bool Supervision::start() {
bool Supervision::start(Agent* agent) {
_agent = agent;
_frequency = static_cast<long>(_agent->config().supervisionFrequency);
_snapshot = _agent->readDB().get("/");
updateFromAgency();
return start();
}
#include <iostream>
// Get agency prefix fron agency
bool Supervision::updateAgencyPrefix (size_t nTries, int intervalSec) {
// Try nTries to get agency's prefix in intervals
for (size_t i = 0; i < nTries; i++) {
_snapshot = _agent->readDB().get("/");
if (_snapshot.children().size() > 0) {
_agencyPrefix = _snapshot.children().begin()->first;
LOG_TOPIC(DEBUG, Logger::AGENCY) << "Agency prefix is " << _agencyPrefix;
return true;
}
std::this_thread::sleep_for (std::chrono::seconds(intervalSec));
}
// Stand-alone agency
return false;
}
static std::string const syncLatest = "/Sync/LatestID";
// Get bunch of cluster's unique ids from agency
bool Supervision::getUniqueIds() {
uint64_t latestId;
try {
latestId = std::stoul(
_agent->readDB().get("/arango/Sync/LatestID").slice().toJson());
_agent->readDB().get(_agencyPrefix + "/Sync/LatestID").slice().toJson());
} catch (std::exception const& e) {
LOG(WARN) << e.what();
return false;
@ -304,11 +387,10 @@ bool Supervision::getUniqueIds() {
Builder uniq;
uniq.openArray();
uniq.openObject();
uniq.add("/arango/Sync/LatestID",
VPackValue(latestId + 100000)); // new val
uniq.add(_agencyPrefix + syncLatest, VPackValue(latestId + 100000)); // new
uniq.close();
uniq.openObject();
uniq.add("/arango/Sync/LatestID", VPackValue(latestId)); // precond
uniq.add(_agencyPrefix + syncLatest, VPackValue(latestId)); // precond
uniq.close();
uniq.close();
@ -321,7 +403,7 @@ bool Supervision::getUniqueIds() {
}
latestId = std::stoul(
_agent->readDB().get("/arango/Sync/LatestID").slice().toJson());
_agent->readDB().get(_agencyPrefix + "/Sync/LatestID").slice().toJson());
}
return success;
@ -329,12 +411,13 @@ bool Supervision::getUniqueIds() {
void Supervision::updateFromAgency() {
auto const& jobsPending =
_snapshot("/arango/Supervision/Jobs/Pending").children();
_snapshot("/Supervision/Jobs/Pending").children();
for (auto const& jobent : jobsPending) {
auto const& job = *(jobent.second);
LOG(WARN) << job.name() << " " << job("failed").toJson() << job("");
LOG_TOPIC(WARN, Logger::AGENCY)
<< job.name() << " " << job("failed").toJson() << job("");
}
}
@ -343,4 +426,6 @@ void Supervision::beginShutdown() {
Thread::beginShutdown();
}
Store const& Supervision::store() const { return _agent->readDB(); }
Store const& Supervision::store() const {
return _agent->readDB();
}

View File

@ -50,10 +50,6 @@ struct Job {
Job() {}
~Job() {}
};
struct FailedServersJob : public Job {
FailedServersJob();
~FailedServersJob();
};
struct check_t {
bool good;
@ -128,6 +124,10 @@ class Supervision : public arangodb::Thread {
void wakeUp();
private:
/// @brief Update agency prefix from agency itself
bool updateAgencyPrefix (size_t nTries = 10, int intervalSec = 1);
/// @brief Move shard from one db server to other db server
bool moveShard(std::string const& from, std::string const& to);
@ -167,6 +167,8 @@ class Supervision : public arangodb::Thread {
long _gracePeriod;
long _jobId;
long _jobIdMax;
static std::string _agencyPrefix;
};
}
}

View File

@ -579,14 +579,13 @@ bool AgencyComm::tryInitializeStructure() {
builder.add(VPackValue("Target"));
{
VPackObjectBuilder c(&builder);
addEmptyVPackObject("Coordinators", builder);
builder.add(VPackValue("Collections"));
{
VPackObjectBuilder d(&builder);
addEmptyVPackObject("_system", builder);
}
builder.add("Version", VPackValue(1));
addEmptyVPackObject("MapLocalToID", builder);
addEmptyVPackObject("Coordinators", builder);
addEmptyVPackObject("DBServers", builder);
builder.add(VPackValue("Databases"));
{
VPackObjectBuilder d(&builder);
@ -597,19 +596,20 @@ bool AgencyComm::tryInitializeStructure() {
builder.add("id", VPackValue("1"));
}
}
addEmptyVPackObject("DBServers", builder);
builder.add("Lock", VPackValue("UNLOCKED"));
addEmptyVPackObject("MapLocalToID", builder);
addEmptyVPackObject("Failed", builder);
addEmptyVPackObject("Finished", builder);
addEmptyVPackObject("Pending", builder);
addEmptyVPackObject("ToDo", builder);
builder.add("Version", VPackValue(1));
}
builder.add(VPackValue("Supervision"));
{
VPackObjectBuilder c(&builder);
builder.add(VPackValue("Jobs"));
{
VPackObjectBuilder d(&builder);
addEmptyVPackObject("Pending", builder);
addEmptyVPackObject("Finished", builder);
addEmptyVPackObject("Failed", builder);
}
addEmptyVPackObject("Health", builder);
addEmptyVPackObject("Shards", builder);
addEmptyVPackObject("DBServers", builder);
}
builder.add("InitDone", VPackValue(true));
} catch (...) {

View File

@ -55,55 +55,25 @@ Dispatcher::~Dispatcher() {
/// @brief adds the standard queue
////////////////////////////////////////////////////////////////////////////////
void Dispatcher::addStandardQueue(size_t nrThreads, size_t maxSize) {
void Dispatcher::addStandardQueue(size_t nrThreads, size_t nrExtraThreads,
size_t maxSize) {
TRI_ASSERT(_queues[STANDARD_QUEUE] == nullptr);
_queues[STANDARD_QUEUE] =
new DispatcherQueue(_scheduler, this, STANDARD_QUEUE,
CreateDispatcherThread, nrThreads, maxSize);
CreateDispatcherThread, nrThreads, nrExtraThreads, maxSize);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief adds the AQL queue (used for the cluster)
////////////////////////////////////////////////////////////////////////////////
void Dispatcher::addAQLQueue(size_t nrThreads, size_t maxSize) {
void Dispatcher::addAQLQueue(size_t nrThreads, size_t nrExtraThreads,
size_t maxSize) {
TRI_ASSERT(_queues[AQL_QUEUE] == nullptr);
_queues[AQL_QUEUE] = new DispatcherQueue(
_scheduler, this, AQL_QUEUE, CreateDispatcherThread, nrThreads, maxSize);
}
////////////////////////////////////////////////////////////////////////////////
/// @brief starts a new named queue
///
/// This is not thread safe. Only used during initialization.
////////////////////////////////////////////////////////////////////////////////
int Dispatcher::addExtraQueue(size_t identifier, size_t nrThreads,
size_t maxSize) {
if (identifier == 0) {
return TRI_ERROR_QUEUE_ALREADY_EXISTS;
}
size_t n = identifier + (SYSTEM_QUEUE_SIZE - 1);
if (_queues.size() <= n) {
_queues.resize(n + 1, nullptr);
}
if (_queues[n] != nullptr) {
return TRI_ERROR_QUEUE_ALREADY_EXISTS;
}
if (_stopping != 0) {
return TRI_ERROR_DISPATCHER_IS_STOPPING;
}
_queues[n] = new DispatcherQueue(_scheduler, this, n, CreateDispatcherThread,
nrThreads, maxSize);
return TRI_ERROR_NO_ERROR;
_scheduler, this, AQL_QUEUE, CreateDispatcherThread, nrThreads, nrExtraThreads, maxSize);
}
////////////////////////////////////////////////////////////////////////////////

View File

@ -78,19 +78,13 @@ class Dispatcher {
/// @brief adds a new queue
//////////////////////////////////////////////////////////////////////////////
void addStandardQueue(size_t nrThreads, size_t maxSize);
void addStandardQueue(size_t nrThreads, size_t nrExtraThreads, size_t maxSize);
//////////////////////////////////////////////////////////////////////////////
/// @brief adds a new AQL queue
//////////////////////////////////////////////////////////////////////////////
void addAQLQueue(size_t nrThreads, size_t maxSize);
//////////////////////////////////////////////////////////////////////////////
/// @brief starts a new named queue
//////////////////////////////////////////////////////////////////////////////
int addExtraQueue(size_t identifier, size_t nrThreads, size_t maxSize);
void addAQLQueue(size_t nrThreads, size_t nrExtraThreads, size_t maxSize);
//////////////////////////////////////////////////////////////////////////////
/// @brief adds a new job

View File

@ -43,6 +43,7 @@ DispatcherFeature::DispatcherFeature(
application_features::ApplicationServer* server)
: ApplicationFeature(server, "Dispatcher"),
_nrStandardThreads(0),
_nrExtraThreads(0),
_nrAqlThreads(0),
_queueSize(16384),
_dispatcher(nullptr) {
@ -64,11 +65,15 @@ void DispatcherFeature::collectOptions(
options->addSection("server", "Server features");
options->addOption("--server.threads",
"number of threads for basic operations",
"number of threads for basic operations (0 = automatic)",
new UInt64Parameter(&_nrStandardThreads));
options->addHiddenOption("--server.extra-threads",
"number of extra threads that can additionally be created when all regular threads are blocked and the client requests thread creation",
new UInt64Parameter(&_nrExtraThreads));
options->addHiddenOption("--server.aql-threads",
"number of threads for basic operations",
"number of threads for basic operations (0 = automatic)",
new UInt64Parameter(&_nrAqlThreads));
options->addHiddenOption("--server.maximal-queue-size",
@ -95,6 +100,10 @@ void DispatcherFeature::validateOptions(std::shared_ptr<ProgramOptions>) {
TRI_ASSERT(_nrAqlThreads >= 1);
if (_nrExtraThreads == 0) {
_nrExtraThreads = _nrStandardThreads;
}
if (_queueSize <= 128) {
LOG(FATAL)
<< "invalid value for `--server.maximal-queue-size', need at least 128";
@ -161,14 +170,18 @@ void DispatcherFeature::buildStandardQueue() {
LOG_TOPIC(DEBUG, Logger::STARTUP) << "setting up a standard queue with "
<< _nrStandardThreads << " threads";
_dispatcher->addStandardQueue(static_cast<size_t>(_nrStandardThreads), static_cast<size_t>(_queueSize));
_dispatcher->addStandardQueue(static_cast<size_t>(_nrStandardThreads),
static_cast<size_t>(_nrExtraThreads),
static_cast<size_t>(_queueSize));
}
void DispatcherFeature::buildAqlQueue() {
LOG_TOPIC(DEBUG, Logger::STARTUP) << "setting up the AQL standard queue with "
<< _nrAqlThreads << " threads";
_dispatcher->addAQLQueue(static_cast<size_t>(_nrAqlThreads), static_cast<size_t>(_queueSize));
_dispatcher->addAQLQueue(static_cast<size_t>(_nrAqlThreads),
static_cast<size_t>(_nrExtraThreads),
static_cast<size_t>(_queueSize));
}
void DispatcherFeature::setProcessorAffinity(std::vector<size_t> const& cores) {

View File

@ -52,6 +52,7 @@ class DispatcherFeature final
private:
uint64_t _nrStandardThreads;
uint64_t _nrExtraThreads;
uint64_t _nrAqlThreads;
uint64_t _queueSize;

View File

@ -38,9 +38,10 @@ using namespace arangodb::rest;
DispatcherQueue::DispatcherQueue(Scheduler* scheduler, Dispatcher* dispatcher,
size_t id,
Dispatcher::newDispatcherThread_fptr creator,
size_t nrThreads, size_t maxSize)
size_t nrThreads, size_t nrExtra, size_t maxSize)
: _id(id),
_nrThreads(nrThreads),
_nrExtra(nrExtra),
_maxSize(maxSize),
_waitLock(),
_readyJobs(maxSize),
@ -125,7 +126,7 @@ int DispatcherQueue::addJob(std::unique_ptr<Job>& job, bool startThread) {
}
// if all threads are blocked, start a new one - we ignore race conditions
else if (startThread || notEnoughThreads()) {
else if (notEnoughThreads()) {
startQueueThread(startThread);
}
@ -443,7 +444,12 @@ bool DispatcherQueue::notEnoughThreads() {
size_t nrRunning = _nrRunning.load(std::memory_order_relaxed);
size_t nrBlocked = (size_t)_nrBlocked.load(std::memory_order_relaxed);
return nrRunning <= _nrThreads - 1 || nrRunning <= nrBlocked;
if (nrRunning + nrBlocked >= _nrThreads + _nrExtra) {
// we have reached the absolute maximum capacity
return false;
}
return nrRunning <= (_nrThreads + _nrExtra - 1) || nrRunning <= nrBlocked;
}
////////////////////////////////////////////////////////////////////////////////

View File

@ -56,7 +56,7 @@ class DispatcherQueue {
DispatcherQueue(Scheduler*, Dispatcher*, size_t id,
Dispatcher::newDispatcherThread_fptr, size_t nrThreads,
size_t maxSize);
size_t nrExtra, size_t maxSize);
~DispatcherQueue();
@ -154,11 +154,21 @@ class DispatcherQueue {
/// @brief total number of threads
///
/// This number is fixed. It is the number of pre-configured threads from the
/// configuration file and is the initial number of threads started. The
/// dispatcher queues will try to have at least this many running threads.
/// configuration file and is the average number of threads running under
/// normal condition. Note that at server start not all threads will be
/// started instantly, as threads will be created on demand.
//////////////////////////////////////////////////////////////////////////////
size_t const _nrThreads;
//////////////////////////////////////////////////////////////////////////////
/// @brief total number of extra/overhead threads
///
/// This number is fixed. It is the maximum number of extra threads that can
/// be created if _nrThreads threads have already been created
//////////////////////////////////////////////////////////////////////////////
size_t const _nrExtra;
//////////////////////////////////////////////////////////////////////////////
/// @brief maximum queue size (number of jobs)

View File

@ -38,12 +38,7 @@ var cluster = require("@arangodb/cluster");
////////////////////////////////////////////////////////////////////////////////
function databasePrefix (req, url) {
if (req.hasOwnProperty('compatibility') && req.compatibility < 10400) {
// pre 1.4-style location response (e.g. /_api/collection/xyz)
return url;
}
// 1.4-style location response (e.g. /_db/dbname/_api/collection/xyz)
// location response (e.g. /_db/dbname/_api/collection/xyz)
return "/_db/" + encodeURIComponent(arangodb.db._name()) + url;
}

View File

@ -162,8 +162,7 @@ function post_api_database (req, res) {
var result = arangodb.db._createDatabase(json.name || "", options, users);
var returnCode = (req.compatibility <= 10400 ? actions.HTTP_OK : actions.HTTP_CREATED);
actions.resultOk(req, res, returnCode, { result : result });
actions.resultOk(req, res, actions.HTTP_CREATED, { result : result });
}
////////////////////////////////////////////////////////////////////////////////

View File

@ -75,9 +75,8 @@ function startReadingQuery (endpoint, collName, timeout) {
}
var count = 0;
while (true) {
count += 1;
if (count > 500) {
console.error("startReadingQuery: Read transaction did not begin. Giving up");
if (++count > 5) {
console.error("startReadingQuery: Read transaction did not begin. Giving up after 5 tries");
return false;
}
require("internal").wait(0.2);
@ -106,7 +105,8 @@ function startReadingQuery (endpoint, collName, timeout) {
break;
}
}
console.error("startReadingQuery: Did not find query.", r);
console.info("startReadingQuery: Did not find query.", r);
require("internal").wait(0.5, false);
}
}

View File

@ -1,5 +1,5 @@
/*jshint globalstrict:false, strict:false */
/*global assertTrue, assertEqual */
/*global assertTrue, assertEqual, fail */
////////////////////////////////////////////////////////////////////////////////
/// @brief test synchronous replication in the cluster
@ -28,13 +28,16 @@
/// @author Copyright 2016, ArangoDB GmbH, Cologne, Germany
////////////////////////////////////////////////////////////////////////////////
var jsunity = require("jsunity");
const jsunity = require("jsunity");
var arangodb = require("@arangodb");
var db = arangodb.db;
var _ = require("lodash");
var print = require("internal").print;
var wait = require("internal").wait;
const arangodb = require("@arangodb");
const db = arangodb.db;
const ERRORS = arangodb.errors;
const _ = require("lodash");
const print = require("internal").print;
const wait = require("internal").wait;
const suspendExternal = require("internal").suspendExternal;
const continueExternal = require("internal").continueExternal;
////////////////////////////////////////////////////////////////////////////////
@ -62,7 +65,6 @@ function SynchronousReplicationSuite () {
s => global.ArangoClusterInfo.getCollectionInfoCurrent("_system", cn, s)
);
let replicas = ccinfo.map(s => s.servers.length);
print("Replicas:", replicas);
if (_.all(replicas, x => x === 2)) {
print("Replication up and running!");
return true;
@ -72,6 +74,195 @@ function SynchronousReplicationSuite () {
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief fail the follower
////////////////////////////////////////////////////////////////////////////////
function failFollower() {
var follower = cinfo.shards[shards[0]][1];
var endpoint = global.ArangoClusterInfo.getServerEndpoint(follower);
// Now look for instanceInfo:
var pos = _.findIndex(global.instanceInfo.arangods,
x => x.endpoint === endpoint);
assertTrue(pos >= 0);
assertTrue(suspendExternal(global.instanceInfo.arangods[pos].pid));
}
////////////////////////////////////////////////////////////////////////////////
/// @brief heal the follower
////////////////////////////////////////////////////////////////////////////////
function healFollower() {
var follower = cinfo.shards[shards[0]][1];
var endpoint = global.ArangoClusterInfo.getServerEndpoint(follower);
// Now look for instanceInfo:
var pos = _.findIndex(global.instanceInfo.arangods,
x => x.endpoint === endpoint);
assertTrue(pos >= 0);
assertTrue(continueExternal(global.instanceInfo.arangods[pos].pid));
}
////////////////////////////////////////////////////////////////////////////////
/// @brief produce failure
////////////////////////////////////////////////////////////////////////////////
function makeFailure(failure) {
if (failure.follower) {
failFollower();
/* } else {
failLeader(); // TODO: function does not exist
*/
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief heal failure
////////////////////////////////////////////////////////////////////////////////
function healFailure(failure) {
if (failure.follower) {
healFollower();
/* } else {
healLeader(); // TODO: function does not exist
*/
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief basic operations, with various failure modes:
////////////////////////////////////////////////////////////////////////////////
function runBasicOperations(failure, healing) {
if (failure.place === 1) { makeFailure(failure); }
// Insert with check:
var id = c.insert({Hallo:12});
assertEqual(1, c.count());
if (healing.place === 1) { healFailure(healing); }
if (failure.place === 2) { makeFailure(failure); }
var doc = c.document(id._key);
assertEqual(12, doc.Hallo);
if (healing.place === 2) { healFailure(healing); }
if (failure.place === 3) { makeFailure(failure); }
var ids = c.insert([{Hallo:13}, {Hallo:14}]);
assertEqual(3, c.count());
assertEqual(2, ids.length);
if (healing.place === 3) { healFailure(healing); }
if (failure.place === 4) { makeFailure(failure); }
var docs = c.document([ids[0]._key, ids[1]._key]);
assertEqual(2, docs.length);
assertEqual(13, docs[0].Hallo);
assertEqual(14, docs[1].Hallo);
if (healing.place === 4) { healFailure(healing); }
if (failure.place === 5) { makeFailure(failure); }
// Replace with check:
c.replace(id._key, {"Hallo": 100});
if (healing.place === 5) { healFailure(healing); }
if (failure.place === 6) { makeFailure(failure); }
doc = c.document(id._key);
assertEqual(100, doc.Hallo);
if (healing.place === 6) { healFailure(healing); }
if (failure.place === 7) { makeFailure(failure); }
c.replace([ids[0]._key, ids[1]._key], [{Hallo:101}, {Hallo:102}]);
if (healing.place === 7) { healFailure(healing); }
if (failure.place === 8) { makeFailure(failure); }
docs = c.document([ids[0]._key, ids[1]._key]);
assertEqual(2, docs.length);
assertEqual(101, docs[0].Hallo);
assertEqual(102, docs[1].Hallo);
if (healing.place === 8) { healFailure(healing); }
if (failure.place === 9) { makeFailure(failure); }
// Update with check:
c.update(id._key, {"Hallox": 105});
if (healing.place === 9) { healFailure(healing); }
if (failure.place === 10) { makeFailure(failure); }
doc = c.document(id._key);
assertEqual(100, doc.Hallo);
assertEqual(105, doc.Hallox);
if (healing.place === 10) { healFailure(healing); }
if (failure.place === 11) { makeFailure(failure); }
c.update([ids[0]._key, ids[1]._key], [{Hallox:106}, {Hallox:107}]);
if (healing.place === 11) { healFailure(healing); }
if (failure.place === 12) { makeFailure(failure); }
docs = c.document([ids[0]._key, ids[1]._key]);
assertEqual(2, docs.length);
assertEqual(101, docs[0].Hallo);
assertEqual(102, docs[1].Hallo);
assertEqual(106, docs[0].Hallox);
assertEqual(107, docs[1].Hallox);
if (healing.place === 12) { healFailure(healing); }
if (failure.place === 13) { makeFailure(failure); }
// AQL:
var q = db._query(`FOR x IN @@cn
FILTER x.Hallo > 0
SORT x.Hallo
RETURN {"Hallo": x.Hallo}`, {"@cn": cn});
docs = q.toArray();
assertEqual(3, docs.length);
assertEqual([{Hallo:100}, {Hallo:101}, {Hallo:102}], docs);
if (healing.place === 13) { healFailure(healing); }
if (failure.place === 14) { makeFailure(failure); }
// Remove with check:
c.remove(id._key);
if (healing.place === 14) { healFailure(healing); }
if (failure.place === 15) { makeFailure(failure); }
try {
doc = c.document(id._key);
fail();
}
catch (e1) {
assertEqual(ERRORS.ERROR_ARANGO_DOCUMENT_NOT_FOUND.code, e1.errorNum);
}
assertEqual(2, c.count());
if (healing.place === 15) { healFailure(healing); }
if (failure.place === 16) { makeFailure(failure); }
c.remove([ids[0]._key, ids[1]._key]);
if (healing.place === 16) { healFailure(healing); }
if (failure.place === 17) { makeFailure(failure); }
docs = c.document([ids[0]._key, ids[1]._key]);
assertEqual(2, docs.length);
assertTrue(docs[0].error);
assertTrue(docs[1].error);
if (healing.place === 17) { healFailure(healing); }
}
////////////////////////////////////////////////////////////////////////////////
/// @brief the actual tests
////////////////////////////////////////////////////////////////////////////////
return {
////////////////////////////////////////////////////////////////////////////////
@ -80,7 +271,7 @@ function SynchronousReplicationSuite () {
setUp : function () {
db._drop(cn);
c = db._create(cn, {numberOfShards: 2, replicationFactor: 2});
c = db._create(cn, {numberOfShards: 1, replicationFactor: 2});
},
////////////////////////////////////////////////////////////////////////////////
@ -96,7 +287,6 @@ function SynchronousReplicationSuite () {
////////////////////////////////////////////////////////////////////////////////
testCheckInstanceInfo : function () {
require("internal").print("InstanceInfo is:", global.instanceInfo);
assertTrue(global.instanceInfo !== undefined);
},
@ -106,7 +296,197 @@ function SynchronousReplicationSuite () {
testSetup : function () {
assertTrue(waitForSynchronousReplication());
assertEqual(12, 12);
},
////////////////////////////////////////////////////////////////////////////////
/// @brief run a standard check without failures:
////////////////////////////////////////////////////////////////////////////////
testBasicOperations : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({}, {});
},
////////////////////////////////////////////////////////////////////////////////
/// @brief run a standard check with failures:
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFailureFollower : function () {
assertTrue(waitForSynchronousReplication());
failFollower();
runBasicOperations({}, {});
healFollower();
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 1
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail1 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:1, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 2
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail2 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:2, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 3
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail3 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:3, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 4
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail4 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:4, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 5
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail5 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:5, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 6
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail6 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:6, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 7
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail7 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:7, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 8
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail8 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:8, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 9
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail9 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:9, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 10
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail10 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:10, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 11
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail11 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:11, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 12
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail12 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:12, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 13
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail13 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:13, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 14
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail14 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:14, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 15
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail15 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:15, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 16
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail16 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:16, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
/// @brief fail in place 17
////////////////////////////////////////////////////////////////////////////////
testBasicOperationsFollowerFail17 : function () {
assertTrue(waitForSynchronousReplication());
runBasicOperations({place:17, follower:true}, {place:17, follower: true});
assertTrue(waitForSynchronousReplication());
},
////////////////////////////////////////////////////////////////////////////////
@ -114,6 +494,8 @@ function SynchronousReplicationSuite () {
////////////////////////////////////////////////////////////////////////////////
testDummy : function () {
assertEqual(12, 12);
wait(15);
}
};