1
0
Fork 0

[3.3] Move Shard Bug 4567124 (#9749)

* Fixed abort in moveshard in the case new leader is not in Current.
* Updated Changelog.
This commit is contained in:
Lars Maier 2019-08-20 11:40:28 +02:00 committed by Max Neunhöffer
parent 1f27aac6b4
commit 4b3fc15a45
3 changed files with 103 additions and 17 deletions

View File

@ -1,6 +1,8 @@
v3.3.25 (2019-XX-XX)
--------------------
* Fixed bug in MoveShard::abort which causes a duplicate entry in the follower list. (Internal Bug #4378)
* Correct rocksdb statistics to report sums from column families instead of
single value from default column family

View File

@ -840,20 +840,23 @@ arangodb::Result MoveShard::abort() {
if (_isLeader) {
// All changes to Plan for all shards:
doForAllShards(_snapshot, _database, shardsLikeMe,
[this, &trx](Slice plan, Slice current, std::string& planPath, std::string& curPath) {
// Restore leader to be _from:
trx.add(VPackValue(planPath));
{
VPackArrayBuilder guard(&trx);
trx.add(VPackValue(_from));
VPackArrayIterator iter(plan);
++iter; // skip the first
while (iter.valid()) {
trx.add(iter.value());
++iter;
}
}
});
[this, &trx](Slice plan, Slice current, std::string& planPath, std::string& curPath) {
// Restore leader to be _from:
trx.add(VPackValue(planPath));
{
VPackArrayBuilder guard(&trx);
trx.add(VPackValue(_from));
for (auto const& srv : VPackArrayIterator(plan)) {
// from could be in plan as <from> or <_from>. Exclude to server always.
if (srv.isEqualString(_from) || srv.isEqualString("_" + _from) || srv.isEqualString(_to)) {
continue ;
}
trx.add(srv);
}
// Add to server last. Will be removed by removeFollower if to much
trx.add(VPackValue(_to));
}
});
} else {
// All changes to Plan for all shards:
doForAllShards(_snapshot, _database, shardsLikeMe,
@ -863,7 +866,7 @@ arangodb::Result MoveShard::abort() {
{
VPackArrayBuilder guard(&trx);
for (auto const& srv : VPackArrayIterator(plan)) {
if (srv.copyString() != _to) {
if (false == srv.isEqualString(_to)) {
trx.add(srv);
}
}

View File

@ -1606,7 +1606,7 @@ SECTION("a pending moveshard job should also put the original server back into p
LOG_DEVEL << q->slice().toJson() << " " << __LINE__;
auto writes = q->slice()[0][0];
CHECK(writes.get("/arango/Target/Pending/1").get("op").copyString() == "delete");
REQUIRE(q->slice()[0].length() == 2); // Precondition: to Server not leader yet
REQUIRE(q->slice()[0].length() == 2); // Precondition: to Server not leader yet
CHECK(writes.get("/arango/Supervision/DBServers/" + FREE_SERVER).get("op").copyString() == "delete");
CHECK(writes.get("/arango/Supervision/Shards/" + SHARD).get("op").copyString() == "delete");
CHECK(std::string(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD).typeName()) == "array");
@ -1836,7 +1836,7 @@ SECTION("aborting the job while a leader transition is in progress (for example
auto writes = q->slice()[0][0];
CHECK(writes.get("/arango/Target/Pending/1").get("op").copyString() == "delete");
REQUIRE(q->slice()[0].length() == 2); // Precondition: to Server not leader yet
REQUIRE(q->slice()[0].length() == 2); // Precondition: to Server not leader yet
CHECK(writes.get("/arango/Supervision/DBServers/" + FREE_SERVER).get("op").copyString() == "delete");
CHECK(writes.get("/arango/Supervision/Shards/" + SHARD).get("op").copyString() == "delete");
CHECK(std::string(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD).typeName()) == "array");
@ -1860,6 +1860,87 @@ SECTION("aborting the job while a leader transition is in progress (for example
Verify(Method(mockAgent,write));
}
SECTION("aborting the job while the new leader is already in place should not break plan") {
std::function<std::unique_ptr<VPackBuilder>(VPackSlice const&, std::string const&)> createTestStructure = [&](VPackSlice const& s, std::string const& path) {
std::unique_ptr<VPackBuilder> builder;
builder.reset(new VPackBuilder());
if (s.isObject()) {
builder->add(VPackValue(VPackValueType::Object));
for (auto const& it: VPackObjectIterator(s)) {
auto childBuilder = createTestStructure(it.value, path + "/" + it.key.copyString());
if (childBuilder) {
builder->add(it.key.copyString(), childBuilder->slice());
}
}
if (path == "/arango/Target/Pending") {
VPackBuilder pendingJob;
{
VPackObjectBuilder b(&pendingJob);
auto plainJob = createJob(COLLECTION, SHARD_LEADER, FREE_SERVER);
for (auto const& it: VPackObjectIterator(plainJob.slice())) {
pendingJob.add(it.key.copyString(), it.value);
}
pendingJob.add("timeCreated", VPackValue(timepointToString(std::chrono::system_clock::now())));
}
builder->add(jobId, pendingJob.slice());
} else if (path == "/arango/Supervision/DBServers") {
builder->add(FREE_SERVER, VPackValue("1"));
} else if (path == "/arango/Supervision/Shards") {
builder->add(SHARD, VPackValue("1"));
}
builder->close();
} else {
if (path == "/arango/Current/Collections/" + DATABASE + "/" + COLLECTION + "/" + SHARD + "/servers") {
builder->add(VPackValue(VPackValueType::Array));
builder->add(VPackValue("_" + SHARD_LEADER));
builder->add(VPackValue(SHARD_FOLLOWER1));
builder->close();
} else if (path == "/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD) {
builder->add(VPackValue(VPackValueType::Array));
builder->add(VPackValue(FREE_SERVER));
builder->add(VPackValue(SHARD_LEADER));
builder->add(VPackValue(SHARD_FOLLOWER1));
builder->close();
} else {
builder->add(s);
}
}
return builder;
};
Mock<AgentInterface> mockAgent;
When(Method(mockAgent, waitFor)).AlwaysReturn();
When(Method(mockAgent, write)).Do([&](query_t const& q, consensus::AgentInterface::WriteMode w) -> write_ret_t {
INFO("WriteTransaction: " << q->slice().toJson());
auto writes = q->slice()[0][0];
CHECK(writes.get("/arango/Target/Pending/1").get("op").copyString() == "delete");
REQUIRE(q->slice()[0].length() == 2); // Precondition: to Server not leader yet
CHECK(writes.get("/arango/Supervision/DBServers/" + FREE_SERVER).get("op").copyString() == "delete");
CHECK(writes.get("/arango/Supervision/Shards/" + SHARD).get("op").copyString() == "delete");
CHECK(std::string(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD).typeName()) == "array");
// well apparently this job is not responsible to cleanup its mess
CHECK(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD).length() >= 3);
CHECK(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD)[0].copyString() == SHARD_LEADER);
CHECK(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD)[1].copyString() == SHARD_FOLLOWER1);
CHECK(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD)[2].copyString() == FREE_SERVER);
CHECK(std::string(writes.get("/arango/Target/Failed/1").typeName()) == "object");
return fakeWriteResult;
});
AgentInterface& agent = mockAgent.get();
auto builder = createTestStructure(baseStructure.toBuilder().slice(), "");
REQUIRE(builder);
Node agency = createAgencyFromBuilder(*builder);
INFO("Agency: " << agency);
auto moveShard = MoveShard(agency, &agent, PENDING, jobId);
moveShard.abort();
Verify(Method(mockAgent,write));
}
SECTION("if we are ready to resign the old server then finally move to the new leader") {
std::function<std::unique_ptr<VPackBuilder>(VPackSlice const&, std::string const&)> createTestStructure = [&](VPackSlice const& s, std::string const& path) {
std::unique_ptr<VPackBuilder> builder;