From 4b3fc15a4526fa059d885df1fdcc9b84b0942ec2 Mon Sep 17 00:00:00 2001 From: Lars Maier Date: Tue, 20 Aug 2019 11:40:28 +0200 Subject: [PATCH] [3.3] Move Shard Bug 4567124 (#9749) * Fixed abort in moveshard in the case new leader is not in Current. * Updated Changelog. --- CHANGELOG | 2 + arangod/Agency/MoveShard.cpp | 33 +++++++------ tests/Agency/MoveShardTest.cpp | 85 +++++++++++++++++++++++++++++++++- 3 files changed, 103 insertions(+), 17 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 625a7c608f..108734ae08 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ v3.3.25 (2019-XX-XX) -------------------- +* Fixed bug in MoveShard::abort which causes a duplicate entry in the follower list. (Internal Bug #4378) + * Correct rocksdb statistics to report sums from column families instead of single value from default column family diff --git a/arangod/Agency/MoveShard.cpp b/arangod/Agency/MoveShard.cpp index 237d7c74da..39edebe8ce 100644 --- a/arangod/Agency/MoveShard.cpp +++ b/arangod/Agency/MoveShard.cpp @@ -840,20 +840,23 @@ arangodb::Result MoveShard::abort() { if (_isLeader) { // All changes to Plan for all shards: doForAllShards(_snapshot, _database, shardsLikeMe, - [this, &trx](Slice plan, Slice current, std::string& planPath, std::string& curPath) { - // Restore leader to be _from: - trx.add(VPackValue(planPath)); - { - VPackArrayBuilder guard(&trx); - trx.add(VPackValue(_from)); - VPackArrayIterator iter(plan); - ++iter; // skip the first - while (iter.valid()) { - trx.add(iter.value()); - ++iter; - } - } - }); + [this, &trx](Slice plan, Slice current, std::string& planPath, std::string& curPath) { + // Restore leader to be _from: + trx.add(VPackValue(planPath)); + { + VPackArrayBuilder guard(&trx); + trx.add(VPackValue(_from)); + for (auto const& srv : VPackArrayIterator(plan)) { + // from could be in plan as or <_from>. Exclude to server always. + if (srv.isEqualString(_from) || srv.isEqualString("_" + _from) || srv.isEqualString(_to)) { + continue ; + } + trx.add(srv); + } + // Add to server last. Will be removed by removeFollower if to much + trx.add(VPackValue(_to)); + } + }); } else { // All changes to Plan for all shards: doForAllShards(_snapshot, _database, shardsLikeMe, @@ -863,7 +866,7 @@ arangodb::Result MoveShard::abort() { { VPackArrayBuilder guard(&trx); for (auto const& srv : VPackArrayIterator(plan)) { - if (srv.copyString() != _to) { + if (false == srv.isEqualString(_to)) { trx.add(srv); } } diff --git a/tests/Agency/MoveShardTest.cpp b/tests/Agency/MoveShardTest.cpp index 7d8cdcd9d0..cf652a2d12 100644 --- a/tests/Agency/MoveShardTest.cpp +++ b/tests/Agency/MoveShardTest.cpp @@ -1606,7 +1606,7 @@ SECTION("a pending moveshard job should also put the original server back into p LOG_DEVEL << q->slice().toJson() << " " << __LINE__; auto writes = q->slice()[0][0]; CHECK(writes.get("/arango/Target/Pending/1").get("op").copyString() == "delete"); - REQUIRE(q->slice()[0].length() == 2); // Precondition: to Server not leader yet + REQUIRE(q->slice()[0].length() == 2); // Precondition: to Server not leader yet CHECK(writes.get("/arango/Supervision/DBServers/" + FREE_SERVER).get("op").copyString() == "delete"); CHECK(writes.get("/arango/Supervision/Shards/" + SHARD).get("op").copyString() == "delete"); CHECK(std::string(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD).typeName()) == "array"); @@ -1836,7 +1836,7 @@ SECTION("aborting the job while a leader transition is in progress (for example auto writes = q->slice()[0][0]; CHECK(writes.get("/arango/Target/Pending/1").get("op").copyString() == "delete"); - REQUIRE(q->slice()[0].length() == 2); // Precondition: to Server not leader yet + REQUIRE(q->slice()[0].length() == 2); // Precondition: to Server not leader yet CHECK(writes.get("/arango/Supervision/DBServers/" + FREE_SERVER).get("op").copyString() == "delete"); CHECK(writes.get("/arango/Supervision/Shards/" + SHARD).get("op").copyString() == "delete"); CHECK(std::string(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD).typeName()) == "array"); @@ -1860,6 +1860,87 @@ SECTION("aborting the job while a leader transition is in progress (for example Verify(Method(mockAgent,write)); } +SECTION("aborting the job while the new leader is already in place should not break plan") { + std::function(VPackSlice const&, std::string const&)> createTestStructure = [&](VPackSlice const& s, std::string const& path) { + std::unique_ptr builder; + builder.reset(new VPackBuilder()); + if (s.isObject()) { + builder->add(VPackValue(VPackValueType::Object)); + for (auto const& it: VPackObjectIterator(s)) { + auto childBuilder = createTestStructure(it.value, path + "/" + it.key.copyString()); + if (childBuilder) { + builder->add(it.key.copyString(), childBuilder->slice()); + } + } + + if (path == "/arango/Target/Pending") { + VPackBuilder pendingJob; + { + VPackObjectBuilder b(&pendingJob); + auto plainJob = createJob(COLLECTION, SHARD_LEADER, FREE_SERVER); + for (auto const& it: VPackObjectIterator(plainJob.slice())) { + pendingJob.add(it.key.copyString(), it.value); + } + pendingJob.add("timeCreated", VPackValue(timepointToString(std::chrono::system_clock::now()))); + } + builder->add(jobId, pendingJob.slice()); + } else if (path == "/arango/Supervision/DBServers") { + builder->add(FREE_SERVER, VPackValue("1")); + } else if (path == "/arango/Supervision/Shards") { + builder->add(SHARD, VPackValue("1")); + } + builder->close(); + } else { + if (path == "/arango/Current/Collections/" + DATABASE + "/" + COLLECTION + "/" + SHARD + "/servers") { + builder->add(VPackValue(VPackValueType::Array)); + builder->add(VPackValue("_" + SHARD_LEADER)); + builder->add(VPackValue(SHARD_FOLLOWER1)); + builder->close(); + } else if (path == "/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD) { + builder->add(VPackValue(VPackValueType::Array)); + builder->add(VPackValue(FREE_SERVER)); + builder->add(VPackValue(SHARD_LEADER)); + builder->add(VPackValue(SHARD_FOLLOWER1)); + builder->close(); + } else { + builder->add(s); + } + } + return builder; + }; + + Mock mockAgent; + When(Method(mockAgent, waitFor)).AlwaysReturn(); + When(Method(mockAgent, write)).Do([&](query_t const& q, consensus::AgentInterface::WriteMode w) -> write_ret_t { + INFO("WriteTransaction: " << q->slice().toJson()); + + auto writes = q->slice()[0][0]; + CHECK(writes.get("/arango/Target/Pending/1").get("op").copyString() == "delete"); + REQUIRE(q->slice()[0].length() == 2); // Precondition: to Server not leader yet + CHECK(writes.get("/arango/Supervision/DBServers/" + FREE_SERVER).get("op").copyString() == "delete"); + CHECK(writes.get("/arango/Supervision/Shards/" + SHARD).get("op").copyString() == "delete"); + CHECK(std::string(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD).typeName()) == "array"); + // well apparently this job is not responsible to cleanup its mess + CHECK(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD).length() >= 3); + CHECK(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD)[0].copyString() == SHARD_LEADER); + CHECK(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD)[1].copyString() == SHARD_FOLLOWER1); + CHECK(writes.get("/arango/Plan/Collections/" + DATABASE + "/" + COLLECTION + "/shards/" + SHARD)[2].copyString() == FREE_SERVER); + CHECK(std::string(writes.get("/arango/Target/Failed/1").typeName()) == "object"); + + return fakeWriteResult; + }); + AgentInterface& agent = mockAgent.get(); + + auto builder = createTestStructure(baseStructure.toBuilder().slice(), ""); + REQUIRE(builder); + Node agency = createAgencyFromBuilder(*builder); + + INFO("Agency: " << agency); + auto moveShard = MoveShard(agency, &agent, PENDING, jobId); + moveShard.abort(); + Verify(Method(mockAgent,write)); +} + SECTION("if we are ready to resign the old server then finally move to the new leader") { std::function(VPackSlice const&, std::string const&)> createTestStructure = [&](VPackSlice const& s, std::string const& path) { std::unique_ptr builder;