1
0
Fork 0

[devel] fix state clientlookuptable (#9066)

This commit is contained in:
Kaveh Vahedipour 2019-05-30 04:24:46 +02:00 committed by Frank Celler
parent 7dbb26c0c0
commit 773f3c8422
12 changed files with 263 additions and 94 deletions

View File

@ -68,7 +68,10 @@ v3.5.0-rc.2 (2019-05-23)
using the RocksDB engine. The index types "hash", "skiplist" and "persistent" are using the RocksDB engine. The index types "hash", "skiplist" and "persistent" are
just aliases of each other with the RocksDB engine, so there is no need to offer all just aliases of each other with the RocksDB engine, so there is no need to offer all
of them. After initially only offering "hash" indexes, we decided to only offer of them. After initially only offering "hash" indexes, we decided to only offer
indexes of type "persistent", as it is technically the most appropriate description. indexes of type "persistent", as it is technically the most
appropriate description.
* fix client id lookup table in state
v3.5.0-rc.1 (2019-05-14) v3.5.0-rc.1 (2019-05-14)

View File

@ -86,6 +86,21 @@ bool FailedServer::start(bool& aborts) {
} else if (jobId.second) { } else if (jobId.second) {
aborts = true; aborts = true;
JobContext(PENDING, jobId.first, _snapshot, _agent).abort("failed server"); JobContext(PENDING, jobId.first, _snapshot, _agent).abort("failed server");
}
// Special case for moveshards that have this server as from server (and thus do not lock it)
Node::Children const& pends = _snapshot.hasAsChildren(pendingPrefix).first;
for (auto const& subJob : pends) {
if (subJob.second->hasAsString("type").first == "moveShard") {
if (subJob.second->hasAsString("fromServer").first == _server) {
JobContext(PENDING, subJob.first, _snapshot, _agent).abort("From server failed");
aborts = true;
}
}
}
if (aborts) {
return false; return false;
} }

View File

@ -249,6 +249,10 @@ size_t Job::countGoodOrBadServersInList(Node const& snap, VPackSlice const& serv
if (serverName.isString()) { if (serverName.isString()) {
// serverName not a string? Then don't count // serverName not a string? Then don't count
std::string serverStr = serverName.copyString(); std::string serverStr = serverName.copyString();
// Ignore a potential _ prefix, which can occur on leader resign:
if (serverStr.size() > 0 && serverStr[0] == '_') {
serverStr.erase(0, 1); // remove trailing _
}
// Now look up this server: // Now look up this server:
auto it = healthData.find(serverStr); auto it = healthData.find(serverStr);
if (it != healthData.end()) { if (it != healthData.end()) {
@ -675,6 +679,14 @@ void Job::addReleaseShard(Builder& trx, std::string const& shard) {
} }
} }
void Job::addPreconditionJobStillInPending(Builder& pre, std::string const& jobId) {
pre.add(VPackValue("/Target/Pending/" + jobId));
{
VPackObjectBuilder guard(&pre);
pre.add("oldEmpty", VPackValue(false));
}
}
std::string Job::checkServerHealth(Node const& snapshot, std::string const& server) { std::string Job::checkServerHealth(Node const& snapshot, std::string const& server) {
auto status = snapshot.hasAsString(healthPrefix + server + "/Status"); auto status = snapshot.hasAsString(healthPrefix + server + "/Status");

View File

@ -178,6 +178,7 @@ struct Job {
std::string const& health); std::string const& health);
static void addPreconditionShardNotBlocked(Builder& pre, std::string const& shard); static void addPreconditionShardNotBlocked(Builder& pre, std::string const& shard);
static void addPreconditionUnchanged(Builder& pre, std::string const& key, Slice value); static void addPreconditionUnchanged(Builder& pre, std::string const& key, Slice value);
static void addPreconditionJobStillInPending(Builder& pre, std::string const& jobId);
static std::string checkServerHealth(Node const& snapshot, std::string const& server); static std::string checkServerHealth(Node const& snapshot, std::string const& server);
}; };

View File

@ -219,10 +219,15 @@ bool MoveShard::start(bool&) {
// Check that the toServer is in state "GOOD": // Check that the toServer is in state "GOOD":
std::string health = checkServerHealth(_snapshot, _to); std::string health = checkServerHealth(_snapshot, _to);
if (health != "GOOD") { if (health != "GOOD") {
LOG_TOPIC("00639", DEBUG, Logger::SUPERVISION) if (health == "BAD") {
<< "server " << _to << " is currently " << health LOG_TOPIC("de055", DEBUG, Logger::SUPERVISION)
<< ", not starting MoveShard job " << _jobId; << "server " << _to << " is currently " << health
return false; << ", not starting MoveShard job " << _jobId;
return false;
} else { // FAILED
finish("", "", false, "toServer is FAILED");
return false;
}
} }
// Check that _to is not in `Target/CleanedServers`: // Check that _to is not in `Target/CleanedServers`:
@ -450,6 +455,17 @@ JOB_STATUS MoveShard::pendingLeader() {
Builder pre; // precondition Builder pre; // precondition
bool finishedAfterTransaction = false; bool finishedAfterTransaction = false;
// Check if any of the servers in the Plan are FAILED, if so,
// we abort:
if (plan.isArray() &&
Job::countGoodOrBadServersInList(_snapshot, plan) < plan.length()) {
LOG_TOPIC("de056", DEBUG, Logger::SUPERVISION)
<< "MoveShard (leader): found FAILED server in Plan, aborting job, db: "
<< _database << " coll: " << _collection << " shard: " << _shard;
abort("failed server in Plan");
return FAILED;
}
if (plan[0].copyString() == _from) { if (plan[0].copyString() == _from) {
// Still the old leader, let's check that the toServer is insync: // Still the old leader, let's check that the toServer is insync:
size_t done = 0; // count the number of shards for which _to is in sync: size_t done = 0; // count the number of shards for which _to is in sync:
@ -664,6 +680,20 @@ JOB_STATUS MoveShard::pendingLeader() {
} }
JOB_STATUS MoveShard::pendingFollower() { JOB_STATUS MoveShard::pendingFollower() {
// Check if any of the servers in the Plan are FAILED, if so,
// we abort:
std::string planPath =
planColPrefix + _database + "/" + _collection + "/shards/" + _shard;
Slice plan = _snapshot.hasAsSlice(planPath).first;
if (plan.isArray() &&
Job::countGoodOrBadServersInList(_snapshot, plan) < plan.length()) {
LOG_TOPIC("f8c22", DEBUG, Logger::SUPERVISION)
<< "MoveShard (follower): found FAILED server in Plan, aborting job, db: "
<< _database << " coll: " << _collection << " shard: " << _shard;
abort("failed server in Plan");
return FAILED;
}
// Find the other shards in the same distributeShardsLike group: // Find the other shards in the same distributeShardsLike group:
std::vector<Job::shard_t> shardsLikeMe = std::vector<Job::shard_t> shardsLikeMe =
clones(_snapshot, _database, _collection, _shard); clones(_snapshot, _database, _collection, _shard);
@ -858,6 +888,7 @@ arangodb::Result MoveShard::abort(std::string const& reason) {
// Current still as is // Current still as is
trx.add(curPath, current); trx.add(curPath, current);
}); });
addPreconditionJobStillInPending(trx, _jobId);
} }
} }
write_ret_t res = singleWriteTransaction(_agent, trx, false); write_ret_t res = singleWriteTransaction(_agent, trx, false);

View File

@ -375,6 +375,17 @@ Store& Node::store() { return *(root()._store); }
Store const& Node::store() const { return *(root()._store); } Store const& Node::store() const { return *(root()._store); }
Store* Node::getStore() {
Node* par = _parent;
Node* tmp = this;
while (par != nullptr) {
tmp = par;
par = par->_parent;
}
return tmp->_store; // Can be nullptr if we are not in a Node that belongs
// to a store.
}
// velocypack value type of this node // velocypack value type of this node
ValueType Node::valueType() const { return slice().type(); } ValueType Node::valueType() const { return slice().type(); }
@ -396,11 +407,14 @@ TimePoint const& Node::timeToLive() const {
// remove time to live entry for this node // remove time to live entry for this node
bool Node::removeTimeToLive() { bool Node::removeTimeToLive() {
if (_store != nullptr) {
_store->removeTTL(uri()); Store* s = getStore(); // We could be in a Node that belongs to a store,
if (_ttl != std::chrono::system_clock::time_point()) { // or in one that doesn't.
_ttl = std::chrono::system_clock::time_point(); if (s != nullptr) {
} s->removeTTL(uri());
}
if (_ttl != std::chrono::system_clock::time_point()) {
_ttl = std::chrono::system_clock::time_point();
} }
return true; return true;
} }

View File

@ -195,6 +195,13 @@ class Node {
/// @brief Get our container /// @brief Get our container
Store const& store() const; Store const& store() const;
private:
/// @brief Get store if it exists:
Store* getStore();
public:
/// @brief Create JSON representation of this node and below /// @brief Create JSON representation of this node and below
std::string toJson() const; std::string toJson() const;

View File

@ -53,6 +53,7 @@ using namespace arangodb::aql;
using namespace arangodb::consensus; using namespace arangodb::consensus;
using namespace arangodb::velocypack; using namespace arangodb::velocypack;
using namespace arangodb::rest; using namespace arangodb::rest;
using namespace arangodb::basics;
/// Constructor: /// Constructor:
State::State() State::State()
@ -284,43 +285,38 @@ index_t State::logNonBlocking(index_t idx, velocypack::Slice const& slice,
: persist(idx, term, millis, slice, clientId); : persist(idx, term, millis, slice, clientId);
if (!success) { // log to disk or die if (!success) { // log to disk or die
if (leading) { LOG_TOPIC("f5adb", FATAL, Logger::AGENCY)
LOG_TOPIC("f5adb", FATAL, Logger::AGENCY) << "RAFT member fails to persist log entries!";
<< "RAFT leader fails to persist log entries!"; FATAL_ERROR_EXIT();
}
logEmplaceBackNoLock(log_t(idx, term, buf, clientId));
return _log.back().index;
}
void State::logEmplaceBackNoLock(log_t&& l) {
if (!l.clientId.empty()) {
try {
_clientIdLookupTable.emplace( // keep track of client or die
std::pair<std::string, index_t>{l.clientId, l.index});
} catch (...) {
LOG_TOPIC("f5ade", FATAL, Logger::AGENCY)
<< "RAFT member fails to expand client lookup table!";
FATAL_ERROR_EXIT(); FATAL_ERROR_EXIT();
} else {
LOG_TOPIC("50f4c", ERR, Logger::AGENCY)
<< "RAFT follower fails to persist log entries!";
return 0;
} }
} }
try { try {
_log.push_back(log_t(idx, term, buf, clientId)); // log to RAM or die _log.emplace_back(std::forward<log_t>(l)); // log to RAM or die
} catch (std::bad_alloc const&) { } catch (std::bad_alloc const&) {
if (leading) { LOG_TOPIC("f5adc", FATAL, Logger::AGENCY)
LOG_TOPIC("81502", FATAL, Logger::AGENCY) << "RAFT member fails to allocate volatile log entries!";
<< "RAFT leader fails to allocate volatile log entries!"; FATAL_ERROR_EXIT();
FATAL_ERROR_EXIT();
} else {
LOG_TOPIC("18c09", ERR, Logger::AGENCY)
<< "RAFT follower fails to allocate volatile log entries!";
return 0;
}
} }
if (leading) {
try {
_clientIdLookupTable.emplace( // keep track of client or die
std::pair<std::string, index_t>(clientId, idx));
} catch (...) {
LOG_TOPIC("4ab75", FATAL, Logger::AGENCY)
<< "RAFT leader fails to expand client lookup table!";
FATAL_ERROR_EXIT();
}
}
return _log.back().index;
} }
/// Log transactions (follower) /// Log transactions (follower)
@ -498,7 +494,7 @@ size_t State::removeConflicts(query_t const& transactions, bool gotSnapshot) {
// volatile logs, as mentioned above, this will never make _log // volatile logs, as mentioned above, this will never make _log
// completely empty! // completely empty!
_log.erase(_log.begin() + pos, _log.end()); logEraseNoLock(_log.begin() + pos, _log.end());
LOG_TOPIC("1321d", TRACE, Logger::AGENCY) << "removeConflicts done: ndups=" << ndups LOG_TOPIC("1321d", TRACE, Logger::AGENCY) << "removeConflicts done: ndups=" << ndups
<< " first log entry: " << _log.front().index << " first log entry: " << _log.front().index
@ -516,6 +512,29 @@ size_t State::removeConflicts(query_t const& transactions, bool gotSnapshot) {
return ndups; return ndups;
} }
void State::logEraseNoLock(
std::deque<log_t>::iterator rbegin, std::deque<log_t>::iterator rend) {
for (auto lit = rbegin; lit != rend; lit++) {
std::string const& clientId = lit->clientId;
if (!clientId.empty()) {
auto ret = _clientIdLookupTable.equal_range(clientId);
for (auto it = ret.first; it != ret.second;) {
if (it->second == lit->index) {
it = _clientIdLookupTable.erase(it);
} else {
it++;
}
}
}
}
_log.erase(rbegin, rend);
}
/// Get log entries from indices "start" to "end" /// Get log entries from indices "start" to "end"
std::vector<log_t> State::get(index_t start, index_t end) const { std::vector<log_t> State::get(index_t start, index_t end) const {
std::vector<log_t> entries; std::vector<log_t> entries;
@ -748,7 +767,7 @@ bool State::loadCollections(TRI_vocbase_t* vocbase,
std::shared_ptr<Buffer<uint8_t>> buf = std::make_shared<Buffer<uint8_t>>(); std::shared_ptr<Buffer<uint8_t>> buf = std::make_shared<Buffer<uint8_t>>();
VPackSlice value = arangodb::velocypack::Slice::emptyObjectSlice(); VPackSlice value = arangodb::velocypack::Slice::emptyObjectSlice();
buf->append(value.startAs<char const>(), value.byteSize()); buf->append(value.startAs<char const>(), value.byteSize());
_log.push_back(log_t(index_t(0), term_t(0), buf, std::string())); _log.emplace_back(log_t(index_t(0), term_t(0), buf, std::string()));
persist(0, 0, 0, value, std::string()); persist(0, 0, 0, value, std::string());
} }
_ready = true; _ready = true;
@ -810,7 +829,7 @@ bool State::loadLastCompactedSnapshot(Store& store, index_t& index, term_t& term
VPackSlice ii = i.resolveExternals(); VPackSlice ii = i.resolveExternals();
try { try {
store = ii; store = ii;
index = basics::StringUtils::uint64(ii.get("_key").copyString()); index = StringUtils::uint64(ii.get("_key").copyString());
term = ii.get("term").getNumber<uint64_t>(); term = ii.get("term").getNumber<uint64_t>();
return true; return true;
} catch (std::exception const& e) { } catch (std::exception const& e) {
@ -862,8 +881,9 @@ bool State::loadCompacted() {
buffer_t tmp = std::make_shared<arangodb::velocypack::Buffer<uint8_t>>(); buffer_t tmp = std::make_shared<arangodb::velocypack::Buffer<uint8_t>>();
_agent->setPersistedState(ii); _agent->setPersistedState(ii);
try { try {
_cur = basics::StringUtils::uint64(ii.get("_key").copyString()); _cur = StringUtils::uint64(ii.get("_key").copyString());
_log.clear(); // will be filled in loadRemaining _log.clear(); // will be filled in loadRemaining
_clientIdLookupTable.clear();
// Schedule next compaction: // Schedule next compaction:
_lastCompactionAt = _cur; _lastCompactionAt = _cur;
_nextCompactionAfter = _cur + _agent->config().compactionStepSize(); _nextCompactionAfter = _cur + _agent->config().compactionStepSize();
@ -1021,7 +1041,7 @@ bool State::loadRemaining() {
: std::string(); : std::string();
// Dummy fill missing entries (Not good at all.) // Dummy fill missing entries (Not good at all.)
index_t index(basics::StringUtils::uint64(ii.get(StaticStrings::KeyString).copyString())); index_t index(StringUtils::uint64(ii.get(StaticStrings::KeyString).copyString()));
// Ignore log entries, which are older than lastIndex: // Ignore log entries, which are older than lastIndex:
if (index >= lastIndex) { if (index >= lastIndex) {
@ -1033,7 +1053,9 @@ bool State::loadRemaining() {
term_t term(ii.get("term").getNumber<uint64_t>()); term_t term(ii.get("term").getNumber<uint64_t>());
for (index_t i = lastIndex + 1; i < index; ++i) { for (index_t i = lastIndex + 1; i < index; ++i) {
LOG_TOPIC("f95c7", WARN, Logger::AGENCY) << "Missing index " << i << " in RAFT log."; LOG_TOPIC("f95c7", WARN, Logger::AGENCY) << "Missing index " << i << " in RAFT log.";
_log.push_back(log_t(i, term, buf, std::string())); _log.emplace_back(log_t(i, term, buf, std::string()));
// This has empty clientId, so we do not need to adjust
// _clientIdLookupTable.
lastIndex = i; lastIndex = i;
} }
// After this loop, index will be lastIndex + 1 // After this loop, index will be lastIndex + 1
@ -1041,17 +1063,9 @@ bool State::loadRemaining() {
if (index == lastIndex + 1 || (index == lastIndex && _log.empty())) { if (index == lastIndex + 1 || (index == lastIndex && _log.empty())) {
// Real entries // Real entries
try { logEmplaceBackNoLock(
_log.push_back(log_t(basics::StringUtils::uint64( log_t(StringUtils::uint64(ii.get(StaticStrings::KeyString).copyString()),
ii.get(StaticStrings::KeyString).copyString()), ii.get("term").getNumber<uint64_t>(), tmp, clientId));
ii.get("term").getNumber<uint64_t>(), tmp, clientId));
} catch (std::exception const& e) {
LOG_TOPIC("44208", ERR, Logger::AGENCY)
<< "Failed to convert " + ii.get(StaticStrings::KeyString).copyString() +
" to integer."
<< e.what();
}
lastIndex = index; lastIndex = index;
} }
} }
@ -1156,7 +1170,7 @@ bool State::compactVolatile(index_t cind, index_t keep) {
index_t cut = cind - keep; index_t cut = cind - keep;
MUTEX_LOCKER(mutexLocker, _logLock); MUTEX_LOCKER(mutexLocker, _logLock);
if (!_log.empty() && cut > _cur && cut - _cur < _log.size()) { if (!_log.empty() && cut > _cur && cut - _cur < _log.size()) {
_log.erase(_log.begin(), _log.begin() + (cut - _cur)); logEraseNoLock(_log.begin(), _log.begin() + (cut - _cur));
TRI_ASSERT(_log.begin()->index == cut); TRI_ASSERT(_log.begin()->index == cut);
_cur = _log.begin()->index; _cur = _log.begin()->index;
} }
@ -1324,6 +1338,7 @@ bool State::storeLogFromSnapshot(Store& snapshot, index_t index, term_t term) {
// volatile logs // volatile logs
_log.clear(); _log.clear();
_clientIdLookupTable.clear();
_cur = index; _cur = index;
// This empty log should soon be rectified! // This empty log should soon be rectified!
return true; return true;
@ -1495,7 +1510,7 @@ std::shared_ptr<VPackBuilder> State::latestAgencyState(TRI_vocbase_t& vocbase,
VPackSlice ii = result[0].resolveExternals(); VPackSlice ii = result[0].resolveExternals();
buffer_t tmp = std::make_shared<arangodb::velocypack::Buffer<uint8_t>>(); buffer_t tmp = std::make_shared<arangodb::velocypack::Buffer<uint8_t>>();
store = ii; store = ii;
index = arangodb::basics::StringUtils::uint64(ii.get("_key").copyString()); index = StringUtils::uint64(ii.get("_key").copyString());
term = ii.get("term").getNumber<uint64_t>(); term = ii.get("term").getNumber<uint64_t>();
LOG_TOPIC("d838b", INFO, Logger::AGENCY) LOG_TOPIC("d838b", INFO, Logger::AGENCY)
<< "Read snapshot at index " << index << " with term " << term; << "Read snapshot at index " << index << " with term " << term;
@ -1528,7 +1543,7 @@ std::shared_ptr<VPackBuilder> State::latestAgencyState(TRI_vocbase_t& vocbase,
std::string clientId = std::string clientId =
req.hasKey("clientId") ? req.get("clientId").copyString() : std::string(); req.hasKey("clientId") ? req.get("clientId").copyString() : std::string();
log_t entry(basics::StringUtils::uint64(ii.get(StaticStrings::KeyString).copyString()), log_t entry(StringUtils::uint64(ii.get(StaticStrings::KeyString).copyString()),
ii.get("term").getNumber<uint64_t>(), tmp, clientId); ii.get("term").getNumber<uint64_t>(), tmp, clientId);
if (entry.index <= index) { if (entry.index <= index) {

View File

@ -90,6 +90,19 @@ class State {
/// @brief non-locking version of at /// @brief non-locking version of at
log_t atNoLock(index_t) const; log_t atNoLock(index_t) const;
/**
* @brief Erase element range from _log
* @param rbegin Start of range
* @param end End of range
*/
void logEraseNoLock(std::deque<log_t>::iterator rbegin, std::deque<log_t>::iterator rend);
/**
* @brief Emplace log entry at back
* @param l log entry
*/
void logEmplaceBackNoLock(log_t&& l);
public: public:
/// @brief Check for a log entry, returns 0, if the log does not /// @brief Check for a log entry, returns 0, if the log does not
/// contain an entry with index `index`, 1, if it does contain one /// contain an entry with index `index`, 1, if it does contain one

View File

@ -1220,6 +1220,7 @@ void Supervision::workJobs() {
bool selectRandom = todos.size() > maximalJobsPerRound; bool selectRandom = todos.size() > maximalJobsPerRound;
LOG_TOPIC("00567", TRACE, Logger::SUPERVISION) << "Begin ToDos of type Failed*"; LOG_TOPIC("00567", TRACE, Logger::SUPERVISION) << "Begin ToDos of type Failed*";
bool doneFailedJob = false;
while (it != todos.end()) { while (it != todos.end()) {
if (selectRandom && RandomGenerator::interval(static_cast<uint64_t>(todos.size())) > maximalJobsPerRound) { if (selectRandom && RandomGenerator::interval(static_cast<uint64_t>(todos.size())) > maximalJobsPerRound) {
LOG_TOPIC("675fe", TRACE, Logger::SUPERVISION) << "Skipped ToDo Job"; LOG_TOPIC("675fe", TRACE, Logger::SUPERVISION) << "Skipped ToDo Job";
@ -1234,28 +1235,32 @@ void Supervision::workJobs() {
.run(_haveAborts); .run(_haveAborts);
LOG_TOPIC("98115", TRACE, Logger::SUPERVISION) << "Finish JobContext::run()"; LOG_TOPIC("98115", TRACE, Logger::SUPERVISION) << "Finish JobContext::run()";
it = todos.erase(it); it = todos.erase(it);
doneFailedJob = true;
} else { } else {
++it; ++it;
} }
} }
// Do not start other jobs, if above resilience jobs aborted stuff // Do not start other jobs, if above resilience jobs aborted stuff
if (!_haveAborts) { if (!_haveAborts && !doneFailedJob) {
LOG_TOPIC("00654", TRACE, Logger::SUPERVISION) << "Begin ToDos"; LOG_TOPIC("00654", TRACE, Logger::SUPERVISION) << "Begin ToDos";
for (auto const& todoEnt : todos) { for (auto const& todoEnt : todos) {
if (selectRandom && RandomGenerator::interval(static_cast<uint64_t>(todos.size())) > maximalJobsPerRound) { if (selectRandom && RandomGenerator::interval(static_cast<uint64_t>(todos.size())) > maximalJobsPerRound) {
LOG_TOPIC("77889", TRACE, Logger::SUPERVISION) << "Skipped ToDo Job"; LOG_TOPIC("77889", TRACE, Logger::SUPERVISION) << "Skipped ToDo Job";
continue; continue;
} }
auto const& jobNode = *todoEnt.second;
auto const& jobNode = *(todoEnt.second); if (jobNode.hasAsString("type").first.compare(0, FAILED.length(), FAILED) != 0) {
LOG_TOPIC("aa667", TRACE, Logger::SUPERVISION) << "Begin JobContext::run()"; LOG_TOPIC("aa667", TRACE, Logger::SUPERVISION) << "Begin JobContext::run()";
JobContext(TODO, jobNode.hasAsString("jobId").first, _snapshot, _agent) JobContext(TODO, jobNode.hasAsString("jobId").first, _snapshot, _agent)
.run(dummy); .run(dummy);
LOG_TOPIC("65bcd", TRACE, Logger::SUPERVISION) << "Finish JobContext::run()"; LOG_TOPIC("65bcd", TRACE, Logger::SUPERVISION) << "Finish JobContext::run()";
}
} }
} }
LOG_TOPIC("a55ce", DEBUG, Logger::SUPERVISION) << "Updating snapshot after ToDo";
updateSnapshot();
LOG_TOPIC("08641", TRACE, Logger::SUPERVISION) << "Begin Pendings"; LOG_TOPIC("08641", TRACE, Logger::SUPERVISION) << "Begin Pendings";
auto const& pends = _snapshot.hasAsChildren(pendingPrefix).first; auto const& pends = _snapshot.hasAsChildren(pendingPrefix).first;

View File

@ -568,6 +568,11 @@ TEST_F(MoveShardTest, the_job_should_wait_until_the_target_server_is_good) {
}; };
Mock<AgentInterface> mockAgent; Mock<AgentInterface> mockAgent;
When(Method(mockAgent, write)).AlwaysDo([&](query_t const& q, consensus::AgentInterface::WriteMode w) -> write_ret_t {
CHECK_FAILURE("ToDo", q);
return fakeWriteResult;
});
When(Method(mockAgent, waitFor)).AlwaysReturn();
AgentInterface& agent = mockAgent.get(); AgentInterface& agent = mockAgent.get();
auto builder = createTestStructure(baseStructure.toBuilder().slice(), ""); auto builder = createTestStructure(baseStructure.toBuilder().slice(), "");

View File

@ -115,7 +115,6 @@ function agencyTestSuite () {
ret.push({compactions: JSON.parse(request(compaction).body), ret.push({compactions: JSON.parse(request(compaction).body),
state: JSON.parse(request(state).body), url: url}); state: JSON.parse(request(state).body), url: url});
}); });
return ret; return ret;
} }
@ -124,26 +123,70 @@ function agencyTestSuite () {
// We simply try all agency servers in turn until one gives us an HTTP // We simply try all agency servers in turn until one gives us an HTTP
// response: // response:
var res; var res;
var inquire = false;
var clientIds = [];
list.forEach(function (trx) {
if (Array.isArray(trx) && trx.length === 3 &&
typeof(trx[0]) === 'object' && typeof(trx[2]) === 'string') {
clientIds.push(trx[2]);
}
});
while (true) { while (true) {
res = request({url: agencyLeader + "/_api/agency/" + api,
method: "POST", followRedirect: false, if (!inquire) {
body: JSON.stringify(list), res = request({url: agencyLeader + "/_api/agency/" + api,
headers: {"Content-Type": "application/json"}, method: "POST", followRedirect: false,
timeout: timeout /* essentially for the huge trx package body: JSON.stringify(list),
running under ASAN in the CI */ }); headers: {"Content-Type": "application/json"},
if(res.statusCode === 307) { timeout: timeout /* essentially for the huge trx package
running under ASAN in the CI */ });
} else { // inquire. Remove successful commits. For later retries
res = request({url: agencyLeader + "/_api/agency/inquire",
method: "POST", followRedirect: false,
body: JSON.stringify(clientIds),
headers: {"Content-Type": "application/json"},
timeout: timeout
});
}
if (res.statusCode === 307) {
agencyLeader = res.headers.location; agencyLeader = res.headers.location;
var l = 0; var l = 0;
for (var i = 0; i < 3; ++i) { for (var i = 0; i < 3; ++i) {
l = agencyLeader.indexOf('/', l+1); l = agencyLeader.indexOf('/', l+1);
} }
agencyLeader = agencyLeader.substring(0,l); agencyLeader = agencyLeader.substring(0,l);
if (clientIds.length > 0 && api === 'write') {
inquire = true;
}
require('console').topic("agency=info", 'Redirected to ' + agencyLeader); require('console').topic("agency=info", 'Redirected to ' + agencyLeader);
} else if (res.statusCode !== 503) { continue;
} else if (res.statusCode === 503) {
require('console').topic("agency=info", 'Waiting for leader ... ');
if (clientIds.length > 0 && api === 'write') {
inquire = true;
}
wait(1.0);
continue;
}
if (!inquire) {
break; // done, let's report the result, whatever it is
}
// In case of inquiry, we probably have done some of the transactions:
var done = 0;
res.bodyParsed = JSON.parse(res.body);
res.bodyParsed.results.forEach(function (index) {
if (index > 0) {
done++;
}
});
if (done === clientIds.length) {
break; break;
} else { } else {
require('console').topic("agency=info", 'Waiting for leader ... '); list = list.slice(done);
wait(1.0); inquire = false;
} }
} }
try { try {
@ -174,10 +217,11 @@ function agencyTestSuite () {
function doCountTransactions(count, start) { function doCountTransactions(count, start) {
let i, res; let i, res;
let counter = 0;
let trxs = []; let trxs = [];
for (i = start; i < start + count; ++i) { for (i = start; i < start + count; ++i) {
let key = "/key"+i; let key = "/key"+i;
let trx = [{}]; let trx = [{},{},"clientid" + counter++];
trx[0][key] = "value" + i; trx[0][key] = "value" + i;
trxs.push(trx); trxs.push(trx);
if (trxs.length >= 200 || i === start + count - 1) { if (trxs.length >= 200 || i === start + count - 1) {
@ -209,8 +253,8 @@ function agencyTestSuite () {
var agents = getCompactions(servers), i, old; var agents = getCompactions(servers), i, old;
var ready = true; var ready = true;
for (i = 1; i < agents.length; ++i) { for (i = 1; i < agents.length; ++i) {
if (agents[0].state[agents[0].state.length-1].index !== if (agents[0].state.log[agents[0].state.log.length-1].index !==
agents[i].state[agents[i].state.length-1].index) { agents[i].state.log[agents[i].state.log.length-1].index) {
ready = false; ready = false;
break; break;
} }
@ -221,7 +265,7 @@ function agencyTestSuite () {
agents.forEach( function (agent) { agents.forEach( function (agent) {
var results = agent.compactions.result; // All compactions var results = agent.compactions.result; // All compactions
var llog = agent.state[agent.state.length-1]; // Last log entry var llog = agent.state.log[agent.state.log.length-1]; // Last log entry
llogi = llog.index; // Last log index llogi = llog.index; // Last log index
var lcomp = results[results.length-1]; // Last compaction entry var lcomp = results[results.length-1]; // Last compaction entry
var lcompi = parseInt(lcomp._key); // Last compaction index var lcompi = parseInt(lcomp._key); // Last compaction index
@ -232,7 +276,7 @@ function agencyTestSuite () {
var foobar = accessAgency("read", [["foobar"]]).bodyParsed[0].foobar; var foobar = accessAgency("read", [["foobar"]]).bodyParsed[0].foobar;
var n = 0; var n = 0;
var keepsize = compactionConfig.compactionKeepSize; var keepsize = compactionConfig.compactionKeepSize;
var flog = agent.state[0]; // First log entry var flog = agent.state.log[0]; // First log entry
var flogi = flog.index; // First log index var flogi = flog.index; // First log index
// Expect to find last compaction maximally // Expect to find last compaction maximally
@ -250,7 +294,7 @@ function agencyTestSuite () {
if(lcomp.readDB[0].hasOwnProperty("foobar")) { if(lcomp.readDB[0].hasOwnProperty("foobar")) {
// All log entries > last compaction index, // All log entries > last compaction index,
// which are {"foobar":{"op":"increment"}} // which are {"foobar":{"op":"increment"}}
agent.state.forEach( function(log) { agent.state.log.forEach( function(log) {
if (log.index > lcompi) { if (log.index > lcompi) {
if (log.query.foobar !== undefined) { if (log.query.foobar !== undefined) {
++n; ++n;
@ -673,7 +717,6 @@ function agencyTestSuite () {
wait(1.1); wait(1.1);
assertEqual(readAndCheck([["/a/y"]]), [{a:{}}]); assertEqual(readAndCheck([["/a/y"]]), [{a:{}}]);
writeAndCheck([[{"/a/y":{"op":"set","new":12, "ttl": 1}}]]); writeAndCheck([[{"/a/y":{"op":"set","new":12, "ttl": 1}}]]);
writeAndCheck([[{"/a/y":{"op":"set","new":12}}]]);
assertEqual(readAndCheck([["a/y"]]), [{"a":{"y":12}}]); assertEqual(readAndCheck([["a/y"]]), [{"a":{"y":12}}]);
wait(1.1); wait(1.1);
assertEqual(readAndCheck([["/a/y"]]), [{a:{}}]); assertEqual(readAndCheck([["/a/y"]]), [{a:{}}]);
@ -687,6 +730,12 @@ function agencyTestSuite () {
assertEqual(readAndCheck([["/foo"]]), [{"foo":{}}]); assertEqual(readAndCheck([["/foo"]]), [{"foo":{}}]);
assertEqual(readAndCheck([["/foo/bar"]]), [{"foo":{}}]); assertEqual(readAndCheck([["/foo/bar"]]), [{"foo":{}}]);
assertEqual(readAndCheck([["/foo/bar/baz"]]), [{"foo":{}}]); assertEqual(readAndCheck([["/foo/bar/baz"]]), [{"foo":{}}]);
writeAndCheck([[{"a/u":{"op":"set","new":25, "ttl": 2}}]]);
assertEqual(readAndCheck([["/a/u"]]), [{"a":{"u":25}}]);
writeAndCheck([[{"a/u":{"op":"set","new":26}}]]);
assertEqual(readAndCheck([["/a/u"]]), [{"a":{"u":26}}]);
wait(3.0); // key should still be there
assertEqual(readAndCheck([["/a/u"]]), [{"a":{"u":26}}]);
}, },
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -1038,7 +1087,6 @@ function agencyTestSuite () {
assertEqual(res.statusCode, 403); assertEqual(res.statusCode, 403);
}, },
/*
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief Compaction /// @brief Compaction
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -1067,7 +1115,7 @@ function agencyTestSuite () {
count3, "keys, from log entry", cur + count + count2, "on."); count3, "keys, from log entry", cur + count + count2, "on.");
doCountTransactions(count3, count + count2); doCountTransactions(count3, count + count2);
}, },
*/
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief Huge transaction package /// @brief Huge transaction package
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -1076,7 +1124,7 @@ function agencyTestSuite () {
writeAndCheck([[{"a":{"op":"delete"}}]]); // cleanup first writeAndCheck([[{"a":{"op":"delete"}}]]); // cleanup first
var huge = []; var huge = [];
for (var i = 0; i < 20000; ++i) { for (var i = 0; i < 20000; ++i) {
huge.push([{"a":{"op":"increment"}}]); huge.push([{"a":{"op":"increment"}}, {}, "huge" + i]);
} }
writeAndCheck(huge, 600); writeAndCheck(huge, 600);
assertEqual(readAndCheck([["a"]]), [{"a":20000}]); assertEqual(readAndCheck([["a"]]), [{"a":20000}]);
@ -1090,8 +1138,8 @@ function agencyTestSuite () {
writeAndCheck([[{"a":{"op":"delete"}}]]); // cleanup first writeAndCheck([[{"a":{"op":"delete"}}]]); // cleanup first
var trx = []; var trx = [];
for (var i = 0; i < 100; ++i) { for (var i = 0; i < 100; ++i) {
trx.push([{"a":{"op":"increment"}}]); trx.push([{"a":{"op":"increment"}}, {}, "inc" + i]);
trx.push([{"a":{"op":"decrement"}}]); trx.push([{"a":{"op":"decrement"}}, {}, "dec" + i]);
} }
writeAndCheck(trx); writeAndCheck(trx);
assertEqual(readAndCheck([["a"]]), [{"a":0}]); assertEqual(readAndCheck([["a"]]), [{"a":0}]);
@ -1139,12 +1187,13 @@ function agencyTestSuite () {
} }
}, },
/*
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
/// @brief Test compaction step/keep /// @brief Test compaction step/keep
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
testCompactionStepKeep : function() { // Test currently deactivated, it at the very least takes very long,
// it might be broken in its entirety.
/*testCompactionStepKeep : function() {
// prepare transaction package for tests // prepare transaction package for tests
var transaction = [], i; var transaction = [], i;
@ -1186,8 +1235,7 @@ function agencyTestSuite () {
assertTrue(evalComp()>0); assertTrue(evalComp()>0);
} }
*/
*/
}; };
} }