mirror of https://gitee.com/bigwinds/arangodb
990 lines
35 KiB
C++
990 lines
35 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2014-2018 ArangoDB GmbH, Cologne, Germany
|
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Kaveh Vahedipour
|
|
/// @author Matthew Von-Maszewski
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "SynchronizeShard.h"
|
|
|
|
#include "Agency/TimeString.h"
|
|
#include "Agency/AgencyStrings.h"
|
|
#include "ApplicationFeatures/ApplicationServer.h"
|
|
#include "Basics/VelocyPackHelper.h"
|
|
#include "Cluster/ActionDescription.h"
|
|
#include "Cluster/ClusterComm.h"
|
|
#include "Cluster/ClusterFeature.h"
|
|
#include "Cluster/FollowerInfo.h"
|
|
#include "Cluster/MaintenanceFeature.h"
|
|
#include "Cluster/ServerState.h"
|
|
#include "Replication/DatabaseTailingSyncer.h"
|
|
#include "Replication/DatabaseInitialSyncer.h"
|
|
#include "Replication/DatabaseReplicationApplier.h"
|
|
#include "Replication/GlobalInitialSyncer.h"
|
|
#include "Replication/GlobalReplicationApplier.h"
|
|
#include "Replication/ReplicationApplierConfiguration.h"
|
|
#include "Replication/ReplicationFeature.h"
|
|
#include "RestServer/DatabaseFeature.h"
|
|
#include "Transaction/StandaloneContext.h"
|
|
#include "Utils/SingleCollectionTransaction.h"
|
|
#include "VocBase/LogicalCollection.h"
|
|
#include "VocBase/Methods/Collections.h"
|
|
#include "VocBase/Methods/Databases.h"
|
|
|
|
#include <velocypack/Compare.h>
|
|
#include <velocypack/Iterator.h>
|
|
#include <velocypack/Slice.h>
|
|
#include <velocypack/velocypack-aliases.h>
|
|
|
|
|
|
using namespace arangodb::application_features;
|
|
using namespace arangodb::maintenance;
|
|
using namespace arangodb::methods;
|
|
using namespace arangodb::transaction;
|
|
using namespace arangodb;
|
|
using namespace arangodb::consensus;
|
|
|
|
|
|
std::string const ENDPOINT("endpoint");
|
|
std::string const INCLUDE("include");
|
|
std::string const INCLUDE_SYSTEM("includeSystem");
|
|
std::string const INCREMENTAL("incremental");
|
|
std::string const KEEP_BARRIER("keepBarrier");
|
|
std::string const LEADER_ID("leaderId");
|
|
std::string const BARRIER_ID("barrierId");
|
|
std::string const LAST_LOG_TICK("lastLogTick");
|
|
std::string const API_REPLICATION("/_api/replication/");
|
|
std::string const REPL_ADD_FOLLOWER(API_REPLICATION + "addFollower");
|
|
std::string const REPL_BARRIER_API(API_REPLICATION + "barrier/");
|
|
std::string const REPL_HOLD_READ_LOCK(API_REPLICATION + "holdReadLockCollection");
|
|
std::string const REPL_REM_FOLLOWER(API_REPLICATION + "removeFollower");
|
|
std::string const RESTRICT_TYPE("restrictType");
|
|
std::string const RESTRICT_COLLECTIONS("restrictCollections");
|
|
std::string const SKIP_CREATE_DROP("skipCreateDrop");
|
|
std::string const TTL("ttl");
|
|
using namespace std::chrono;
|
|
|
|
SynchronizeShard::SynchronizeShard(
|
|
MaintenanceFeature& feature, ActionDescription const& desc) :
|
|
ActionBase(feature, desc) {
|
|
|
|
std::stringstream error;
|
|
|
|
if (!desc.has(COLLECTION)) {
|
|
error << "collection must be specified";
|
|
}
|
|
TRI_ASSERT(desc.has(COLLECTION));
|
|
|
|
if (!desc.has(DATABASE)) {
|
|
error << "database must be specified";
|
|
}
|
|
TRI_ASSERT(desc.has(DATABASE));
|
|
|
|
if (!desc.has(SHARD)) {
|
|
error << "SHARD must be specified";
|
|
}
|
|
TRI_ASSERT(desc.has(SHARD));
|
|
|
|
if (!desc.has(THE_LEADER)) {
|
|
error << "leader must be stecified";
|
|
}
|
|
TRI_ASSERT(desc.has(THE_LEADER));
|
|
|
|
if (!desc.has(SHARD_VERSION)) {
|
|
error << "local shard version must be specified. ";
|
|
}
|
|
TRI_ASSERT(desc.has(SHARD_VERSION));
|
|
|
|
if (!error.str().empty()) {
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "SynchronizeShard: " << error.str();
|
|
_result.reset(TRI_ERROR_INTERNAL, error.str());
|
|
setState(FAILED);
|
|
}
|
|
}
|
|
|
|
class SynchronizeShardCallback : public arangodb::ClusterCommCallback {
|
|
public:
|
|
explicit SynchronizeShardCallback(SynchronizeShard* callie) {};
|
|
virtual bool operator()(arangodb::ClusterCommResult*) override final {
|
|
return true;
|
|
}
|
|
};
|
|
|
|
SynchronizeShard::~SynchronizeShard() {}
|
|
|
|
arangodb::Result getReadLockId(
|
|
std::string const& endpoint, std::string const& database,
|
|
std::string const& clientId, double timeout, uint64_t& id) {
|
|
|
|
std::string error("startReadLockOnLeader: Failed to get read lock - ");
|
|
|
|
auto cc = arangodb::ClusterComm::instance();
|
|
if (cc == nullptr) { // nullptr only happens during controlled shutdown
|
|
return arangodb::Result(
|
|
TRI_ERROR_SHUTTING_DOWN, "startReadLockOnLeader: Shutting down");
|
|
}
|
|
|
|
auto comres = cc->syncRequest(
|
|
TRI_NewTickServer(), endpoint, rest::RequestType::GET,
|
|
DB + database + REPL_HOLD_READ_LOCK, std::string(),
|
|
std::unordered_map<std::string, std::string>(), timeout);
|
|
|
|
auto result = comres->result;
|
|
|
|
if (result != nullptr && result->getHttpReturnCode() == 200) {
|
|
auto const idv = result->getBodyVelocyPack();
|
|
auto const& idSlice = idv->slice();
|
|
TRI_ASSERT(idSlice.isObject());
|
|
TRI_ASSERT(idSlice.hasKey(ID));
|
|
try {
|
|
id = std::stoll(idSlice.get(ID).copyString());
|
|
} catch (std::exception const&) {
|
|
error += " expecting id to be int64_t ";
|
|
error += idSlice.toJson();
|
|
return arangodb::Result(TRI_ERROR_INTERNAL, error);
|
|
}
|
|
} else {
|
|
if (result) {
|
|
error.append(result->getHttpReturnMessage());
|
|
} else {
|
|
error.append(comres->stringifyErrorMessage());
|
|
}
|
|
return arangodb::Result(TRI_ERROR_INTERNAL, error);
|
|
}
|
|
|
|
return arangodb::Result();
|
|
}
|
|
|
|
arangodb::Result collectionCount(
|
|
std::shared_ptr<arangodb::LogicalCollection> const& col, uint64_t& c) {
|
|
|
|
std::string collectionName(col->name());
|
|
auto ctx = std::make_shared<transaction::StandaloneContext>(col->vocbase());
|
|
SingleCollectionTransaction trx(
|
|
ctx, collectionName, AccessMode::Type::READ);
|
|
|
|
Result res = trx.begin();
|
|
if (!res.ok()) {
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
|
<< "Failed to start count transaction: " << res;
|
|
return res;
|
|
}
|
|
|
|
OperationResult opResult = trx.count(collectionName,
|
|
arangodb::transaction::CountType::Normal);
|
|
res = trx.finish(opResult.result);
|
|
|
|
if (res.fail()) {
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
|
<< "Failed to finish count transaction: " << res;
|
|
return res;
|
|
}
|
|
|
|
VPackSlice s = opResult.slice();
|
|
TRI_ASSERT(s.isNumber());
|
|
c = s.getNumber<uint64_t>();
|
|
|
|
return opResult.result;
|
|
}
|
|
|
|
arangodb::Result addShardFollower(
|
|
std::string const& endpoint, std::string const& database,
|
|
std::string const& shard, uint64_t lockJobId,
|
|
std::string const& clientId, double timeout = 120.0) {
|
|
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
|
<< "addShardFollower: tell the leader to put us into the follower list...";
|
|
|
|
auto cc = arangodb::ClusterComm::instance();
|
|
if (cc == nullptr) { // nullptr only happens during controlled shutdown
|
|
return arangodb::Result(
|
|
TRI_ERROR_SHUTTING_DOWN, "startReadLockOnLeader: Shutting down");
|
|
}
|
|
|
|
try {
|
|
DatabaseGuard guard(database);
|
|
auto vocbase = &guard.database();
|
|
|
|
auto collection = vocbase->lookupCollection(shard);
|
|
if (collection == nullptr) {
|
|
std::string errorMsg(
|
|
"SynchronizeShard::addShardFollower: Failed to lookup collection ");
|
|
errorMsg += shard;
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << errorMsg;
|
|
return arangodb::Result(TRI_ERROR_ARANGO_DATA_SOURCE_NOT_FOUND, errorMsg);
|
|
}
|
|
|
|
uint64_t docCount;
|
|
Result res = collectionCount(collection, docCount);
|
|
if (res.fail()) {
|
|
return res;
|
|
}
|
|
VPackBuilder body;
|
|
{ VPackObjectBuilder b(&body);
|
|
body.add(FOLLOWER_ID, VPackValue(arangodb::ServerState::instance()->getId()));
|
|
body.add(SHARD, VPackValue(shard));
|
|
body.add("checksum", VPackValue(std::to_string(docCount)));
|
|
if (lockJobId != 0) {
|
|
body.add("readLockId", VPackValue(std::to_string(lockJobId)));
|
|
} else { // short cut case
|
|
if (docCount != 0) {
|
|
// This can happen if we once were an in-sync follower and a
|
|
// synchronization request has timed out, but still runs on our
|
|
// side here. In this case, we can simply continue with the slow
|
|
// path and run the full sync protocol. Therefore we error out
|
|
// here. Note that we are in the lockJobId == 0 case, which is
|
|
// the shortcut.
|
|
std::string msg = "Short cut synchronization for shard " + shard
|
|
+ " did not work, since we got a document in the meantime.";
|
|
LOG_TOPIC(INFO, Logger::MAINTENANCE) << msg;
|
|
return arangodb::Result(TRI_ERROR_INTERNAL, msg);
|
|
}
|
|
}
|
|
}
|
|
|
|
auto comres = cc->syncRequest(
|
|
TRI_NewTickServer(), endpoint, rest::RequestType::PUT,
|
|
DB + database + REPL_ADD_FOLLOWER, body.toJson(),
|
|
std::unordered_map<std::string, std::string>(), timeout);
|
|
|
|
auto result = comres->result;
|
|
std::string errorMessage (
|
|
"addShardFollower: could not add us to the leader's follower list. ");
|
|
if (result == nullptr || result->getHttpReturnCode() != 200) {
|
|
if (lockJobId != 0) {
|
|
errorMessage += comres->stringifyErrorMessage();
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << errorMessage;
|
|
} else {
|
|
errorMessage += "with shortcut.";
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << errorMessage;
|
|
}
|
|
return arangodb::Result(TRI_ERROR_INTERNAL, errorMessage);
|
|
}
|
|
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "cancelReadLockOnLeader: success";
|
|
return arangodb::Result();
|
|
} catch (std::exception const& e) {
|
|
std::string errorMsg(
|
|
"SynchronizeShard::addShardFollower: Failed to lookup database ");
|
|
errorMsg += database;
|
|
errorMsg += " exception: ";
|
|
errorMsg += e.what();
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << errorMsg;
|
|
return arangodb::Result(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND, errorMsg);
|
|
}
|
|
}
|
|
|
|
arangodb::Result removeShardFollower(
|
|
std::string const& endpoint, std::string const& database,
|
|
std::string const& shard, std::string const& clientId, double timeout = 120.0) {
|
|
|
|
LOG_TOPIC(WARN, Logger::MAINTENANCE) <<
|
|
"removeShardFollower: tell the leader to take us off the follower list...";
|
|
|
|
auto cc = arangodb::ClusterComm::instance();
|
|
if (cc == nullptr) { // nullptr only happens during controlled shutdown
|
|
return arangodb::Result(
|
|
TRI_ERROR_SHUTTING_DOWN, "startReadLockOnLeader: Shutting down");
|
|
}
|
|
|
|
VPackBuilder body;
|
|
{ VPackObjectBuilder b(&body);
|
|
body.add(SHARD, VPackValue(shard));
|
|
body.add(FOLLOWER_ID,
|
|
VPackValue(arangodb::ServerState::instance()->getId())); }
|
|
|
|
// Note that we always use the _system database here because the actual
|
|
// database might be gone already on the leader and we need to cancel
|
|
// the read lock under all circumstances.
|
|
auto comres = cc->syncRequest(
|
|
TRI_NewTickServer(), endpoint, rest::RequestType::PUT,
|
|
DB + database + REPL_REM_FOLLOWER, body.toJson(),
|
|
std::unordered_map<std::string, std::string>(), timeout);
|
|
|
|
auto result = comres->result;
|
|
if (result == nullptr || result->getHttpReturnCode() != 200) {
|
|
std::string errorMessage(
|
|
"removeShardFollower: could not remove us from the leader's follower list: ");
|
|
errorMessage += result->getHttpReturnCode();
|
|
errorMessage += comres->stringifyErrorMessage();
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << errorMessage;
|
|
return arangodb::Result(TRI_ERROR_INTERNAL, errorMessage);
|
|
}
|
|
|
|
LOG_TOPIC(WARN, Logger::MAINTENANCE) << "removeShardFollower: success" ;
|
|
return arangodb::Result();
|
|
}
|
|
|
|
arangodb::Result cancelReadLockOnLeader(
|
|
std::string const& endpoint, std::string const& database,
|
|
uint64_t lockJobId, std::string const& clientId,
|
|
double timeout = 10.0) {
|
|
|
|
auto cc = arangodb::ClusterComm::instance();
|
|
if (cc == nullptr) { // nullptr only happens during controlled shutdown
|
|
return arangodb::Result(
|
|
TRI_ERROR_SHUTTING_DOWN, "startReadLockOnLeader: Shutting down");
|
|
}
|
|
|
|
VPackBuilder body;
|
|
{ VPackObjectBuilder b(&body);
|
|
body.add(ID, VPackValue(std::to_string(lockJobId))); }
|
|
|
|
// Note that we always use the _system database here because the actual
|
|
// database might be gone already on the leader and we need to cancel
|
|
// the read lock under all circumstances.
|
|
auto comres = cc->syncRequest(
|
|
TRI_NewTickServer(), endpoint, rest::RequestType::DELETE_REQ,
|
|
DB + StaticStrings::SystemDatabase + REPL_HOLD_READ_LOCK, body.toJson(),
|
|
std::unordered_map<std::string, std::string>(), timeout);
|
|
|
|
auto result = comres->result;
|
|
|
|
if (result == nullptr || result->getHttpReturnCode() != 200) {
|
|
auto errorMessage = comres->stringifyErrorMessage();
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
|
<< "cancelReadLockOnLeader: exception caught for " << body.toJson()
|
|
<< ": " << errorMessage;
|
|
return arangodb::Result(TRI_ERROR_INTERNAL, errorMessage);
|
|
}
|
|
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "cancelReadLockOnLeader: success";
|
|
return arangodb::Result();
|
|
}
|
|
|
|
arangodb::Result cancelBarrier(
|
|
std::string const& endpoint, std::string const& database,
|
|
int64_t barrierId, std::string const& clientId,
|
|
double timeout = 120.0) {
|
|
|
|
if (barrierId <= 0) {
|
|
return Result();
|
|
}
|
|
|
|
auto cc = arangodb::ClusterComm::instance();
|
|
if (cc == nullptr) { // nullptr only happens during controlled shutdown
|
|
return arangodb::Result(
|
|
TRI_ERROR_SHUTTING_DOWN, "startReadLockOnLeader: Shutting down");
|
|
}
|
|
|
|
auto comres = cc->syncRequest(
|
|
TRI_NewTickServer(), endpoint, rest::RequestType::DELETE_REQ,
|
|
DB + database + REPL_BARRIER_API + std::to_string(barrierId), std::string(),
|
|
std::unordered_map<std::string, std::string>(), timeout);
|
|
|
|
if (comres->status == CL_COMM_SENT) {
|
|
auto result = comres->result;
|
|
if (result != nullptr && result->getHttpReturnCode() != 200 &&
|
|
result->getHttpReturnCode() != 204) {
|
|
auto errorMessage = comres->stringifyErrorMessage();
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
|
<< "CancelBarrier: error" << errorMessage;
|
|
return arangodb::Result(TRI_ERROR_INTERNAL, errorMessage);
|
|
}
|
|
} else {
|
|
std::string error ("CancelBarrier: failed to send message to leader : status ");
|
|
error += comres->status;
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << error;
|
|
return arangodb::Result(TRI_ERROR_INTERNAL, error);
|
|
}
|
|
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "cancelBarrier: success";
|
|
return arangodb::Result();
|
|
}
|
|
|
|
arangodb::Result SynchronizeShard::getReadLock(
|
|
std::string const& endpoint, std::string const& database,
|
|
std::string const& collection, std::string const& clientId,
|
|
uint64_t rlid, double timeout) {
|
|
|
|
auto cc = arangodb::ClusterComm::instance();
|
|
if (cc == nullptr) { // nullptr only happens during controlled shutdown
|
|
return arangodb::Result(
|
|
TRI_ERROR_SHUTTING_DOWN, "startReadLockOnLeader: Shutting down");
|
|
}
|
|
|
|
VPackBuilder body;
|
|
{ VPackObjectBuilder o(&body);
|
|
body.add(ID, VPackValue(std::to_string(rlid)));
|
|
body.add(COLLECTION, VPackValue(collection));
|
|
body.add(TTL, VPackValue(timeout)); }
|
|
|
|
auto url = DB + database + REPL_HOLD_READ_LOCK;
|
|
|
|
cc->asyncRequest(
|
|
TRI_NewTickServer(), endpoint, rest::RequestType::POST, url,
|
|
std::make_shared<std::string>(body.toJson()),
|
|
std::unordered_map<std::string, std::string>(),
|
|
std::make_shared<SynchronizeShardCallback>(this), timeout, true, timeout);
|
|
|
|
// Intentionally do not look at the outcome, even in case of an error
|
|
// we must make sure that the read lock on the leader is not active!
|
|
// This is done automatically below.
|
|
|
|
size_t count = 0;
|
|
while (++count < 20) { // wait for some time until read lock established:
|
|
|
|
// Now check that we hold the read lock:
|
|
auto putres = cc->syncRequest(
|
|
TRI_NewTickServer(), endpoint, rest::RequestType::PUT, url, body.toJson(),
|
|
std::unordered_map<std::string, std::string>(), timeout);
|
|
|
|
auto result = putres->result;
|
|
if (result != nullptr && result->getHttpReturnCode() == 200) {
|
|
auto const vp = putres->result->getBodyVelocyPack();
|
|
auto const& slice = vp->slice();
|
|
TRI_ASSERT(slice.isObject());
|
|
if (slice.hasKey("lockHeld") && slice.get("lockHeld").isBoolean() &&
|
|
slice.get("lockHeld").getBool()) {
|
|
return arangodb::Result();
|
|
}
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
|
<< "startReadLockOnLeader: Lock not yet acquired...";
|
|
} else {
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
|
<< "startReadLockOnLeader: Do not see read lock yet:"
|
|
<< putres->stringifyErrorMessage();
|
|
}
|
|
|
|
std::this_thread::sleep_for(duration<double>(.5));
|
|
}
|
|
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "startReadLockOnLeader: giving up";
|
|
|
|
try {
|
|
auto r = cc->syncRequest(
|
|
TRI_NewTickServer(), endpoint, rest::RequestType::DELETE_REQ, url, body.toJson(),
|
|
std::unordered_map<std::string, std::string>(), timeout);
|
|
if (r->result == nullptr || r->result->getHttpReturnCode() != 200) {
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
|
<< "startReadLockOnLeader: cancelation error for shard - " << collection
|
|
<< " " << r->getErrorCode() << ": " << r->stringifyErrorMessage();
|
|
}
|
|
} catch (std::exception const& e) {
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
|
<< "startReadLockOnLeader: expection in cancel: " << e.what();
|
|
}
|
|
|
|
return arangodb::Result(TRI_ERROR_CLUSTER_TIMEOUT, "startReadLockOnLeader: giving up");
|
|
|
|
}
|
|
|
|
bool isStopping() {
|
|
return application_features::ApplicationServer::isStopping();
|
|
}
|
|
|
|
arangodb::Result SynchronizeShard::startReadLockOnLeader(
|
|
std::string const& endpoint, std::string const& database,
|
|
std::string const& collection, std::string const& clientId,
|
|
uint64_t& rlid, double timeout) {
|
|
|
|
// Read lock id
|
|
rlid = 0;
|
|
arangodb::Result result =
|
|
getReadLockId(endpoint, database, clientId, timeout, rlid);
|
|
if (!result.ok()) {
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << result.errorMessage();
|
|
return result;
|
|
} else {
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "Got read lock id: " << rlid;
|
|
}
|
|
|
|
result = getReadLock(endpoint, database, collection, clientId, rlid, timeout);
|
|
|
|
return result;
|
|
}
|
|
|
|
enum ApplierType {
|
|
APPLIER_DATABASE,
|
|
APPLIER_GLOBAL
|
|
};
|
|
|
|
arangodb::Result replicationSynchronize(
|
|
std::shared_ptr<arangodb::LogicalCollection> const &col, VPackSlice const& config,
|
|
ApplierType applierType, std::shared_ptr<VPackBuilder> sy) {
|
|
|
|
auto& vocbase = col->vocbase();
|
|
|
|
auto database = vocbase.name();
|
|
|
|
auto shard = col->name();
|
|
|
|
bool keepBarrier = config.get(KEEP_BARRIER).getBool();
|
|
std::string leaderId;
|
|
if (config.hasKey(LEADER_ID)) {
|
|
leaderId = config.get(LEADER_ID).copyString();
|
|
}
|
|
|
|
ReplicationApplierConfiguration configuration =
|
|
ReplicationApplierConfiguration::fromVelocyPack(config, database);
|
|
configuration.validate();
|
|
|
|
std::shared_ptr<InitialSyncer> syncer;
|
|
|
|
if (applierType == APPLIER_DATABASE) {
|
|
// database-specific synchronization
|
|
syncer.reset(new DatabaseInitialSyncer(vocbase, configuration));
|
|
|
|
if (!leaderId.empty()) {
|
|
syncer->setLeaderId(leaderId);
|
|
}
|
|
} else if (applierType == APPLIER_GLOBAL) {
|
|
configuration._skipCreateDrop = false;
|
|
syncer.reset(new GlobalInitialSyncer(configuration));
|
|
} else {
|
|
TRI_ASSERT(false);
|
|
}
|
|
|
|
try {
|
|
Result r = syncer->run(configuration._incremental);
|
|
|
|
if (r.fail()) {
|
|
LOG_TOPIC(ERR, Logger::REPLICATION)
|
|
<< "initial sync failed for database '" << database << "': "
|
|
<< r.errorMessage();
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
|
r.errorNumber(), "cannot sync from remote endpoint: " +
|
|
r.errorMessage() + ". last progress message was '" + syncer->progress()
|
|
+ "'");
|
|
}
|
|
|
|
{ VPackObjectBuilder o(sy.get());
|
|
if (keepBarrier) {
|
|
sy->add(BARRIER_ID, VPackValue(syncer->stealBarrier()));
|
|
}
|
|
sy->add(LAST_LOG_TICK, VPackValue(syncer->getLastLogTick()));
|
|
sy->add(VPackValue(COLLECTIONS));
|
|
{ VPackArrayBuilder a(sy.get());
|
|
for (auto const& i : syncer->getProcessedCollections()) {
|
|
VPackObjectBuilder e(sy.get());
|
|
sy->add(ID, VPackValue(i.first));
|
|
sy->add(NAME, VPackValue(i.second));
|
|
}}}
|
|
|
|
} catch (arangodb::basics::Exception const& ex) {
|
|
std::string s("cannot sync from remote endpoint: ");
|
|
s += ex.what() + std::string(". last progress message was '") + syncer->progress() + "'";
|
|
return Result(ex.code(), s);
|
|
} catch (std::exception const& ex) {
|
|
std::string s("cannot sync from remote endpoint: ");
|
|
s += ex.what() + std::string(". last progress message was '") + syncer->progress() + "'";
|
|
return Result(TRI_ERROR_INTERNAL, s);
|
|
} catch (...) {
|
|
std::string s(
|
|
"cannot sync from remote endpoint: unknown exception. last progress message was '");
|
|
s+= syncer->progress() + "'";
|
|
return Result(TRI_ERROR_INTERNAL, s);
|
|
}
|
|
|
|
return arangodb::Result();
|
|
}
|
|
|
|
|
|
arangodb::Result replicationSynchronizeFinalize(VPackSlice const& conf) {
|
|
auto const database = conf.get(DATABASE).copyString();
|
|
auto const collection = conf.get(COLLECTION).copyString();
|
|
auto const leaderId = conf.get(LEADER_ID).copyString();
|
|
auto const fromTick = conf.get("from").getNumber<uint64_t>();
|
|
|
|
ReplicationApplierConfiguration configuration =
|
|
ReplicationApplierConfiguration::fromVelocyPack(conf, database);
|
|
// will throw if invalid
|
|
configuration.validate();
|
|
|
|
DatabaseGuard guard(database);
|
|
DatabaseTailingSyncer syncer(guard.database(), configuration, fromTick, true, 0);
|
|
|
|
if (!leaderId.empty()) {
|
|
syncer.setLeaderId(leaderId);
|
|
}
|
|
|
|
Result r;
|
|
try {
|
|
r = syncer.syncCollectionFinalize(collection);
|
|
} catch (arangodb::basics::Exception const& ex) {
|
|
r = Result(ex.code(), ex.what());
|
|
} catch (std::exception const& ex) {
|
|
r = Result(TRI_ERROR_INTERNAL, ex.what());
|
|
} catch (...) {
|
|
r = Result(TRI_ERROR_INTERNAL, "unknown exception");
|
|
}
|
|
|
|
if (r.fail()) {
|
|
LOG_TOPIC(ERR, Logger::REPLICATION)
|
|
<< "syncCollectionFinalize failed: " << r.errorMessage();
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
bool SynchronizeShard::first() {
|
|
std::string database = _description.get(DATABASE);
|
|
std::string planId = _description.get(COLLECTION);
|
|
std::string shard = _description.get(SHARD);
|
|
std::string leader = _description.get(THE_LEADER);
|
|
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
|
<< "SynchronizeShard: synchronizing shard '" << database << "/" << shard
|
|
<< "' for central '" << database << "/" << planId << "'";
|
|
|
|
auto* clusterInfo = ClusterInfo::instance();
|
|
auto const ourselves = arangodb::ServerState::instance()->getId();
|
|
auto startTime = system_clock::now();
|
|
auto const startTimeStr = timepointToString(startTime);
|
|
auto const clientId(database + planId + shard + leader);
|
|
|
|
// First wait until the leader has created the shard (visible in
|
|
// Current in the Agency) or we or the shard have vanished from
|
|
// the plan:
|
|
while(true) {
|
|
|
|
if (isStopping()) {
|
|
_result.reset(TRI_ERROR_SHUTTING_DOWN);
|
|
return false;
|
|
}
|
|
|
|
std::vector<std::string> planned;
|
|
auto result = clusterInfo->getShardServers(shard, planned);
|
|
|
|
if (!result.ok() ||
|
|
std::find(planned.begin(), planned.end(), ourselves) == planned.end() ||
|
|
planned.front() != leader) {
|
|
// Things have changed again, simply terminate:
|
|
auto const endTime = system_clock::now();
|
|
std::stringstream error;
|
|
error << "cancelled, " << database << "/" << shard << ", " << database
|
|
<< "/" << planId << ", started " << startTimeStr << ", ended "
|
|
<< timepointToString(endTime);
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
|
|
_result.reset(TRI_ERROR_FAILED, error.str());
|
|
return false;
|
|
}
|
|
|
|
auto ci = clusterInfo->getCollectionNT(database, planId);
|
|
if (ci == nullptr) {
|
|
auto const endTime = system_clock::now();
|
|
std::stringstream msg;
|
|
msg << "exception in getCollection, " << database << "/"
|
|
<< shard << ", " << database
|
|
<< "/" << planId << ", started " << startTimeStr << ", ended "
|
|
<< timepointToString(endTime);
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "SynchronizeOneShard: "
|
|
<< msg.str();
|
|
_result.reset(TRI_ERROR_FAILED, msg.str());
|
|
return false;
|
|
}
|
|
|
|
std::string const cid = std::to_string(ci->id());
|
|
std::shared_ptr<CollectionInfoCurrent> cic =
|
|
ClusterInfo::instance()->getCollectionCurrent(database, cid);
|
|
std::vector<std::string> current = cic->servers(shard);
|
|
|
|
if (current.empty()) {
|
|
Result(TRI_ERROR_FAILED,
|
|
"synchronizeOneShard: cancelled, no servers in 'Current'");
|
|
}
|
|
if (current.front() == leader) {
|
|
if (std::find(current.begin(), current.end(), ourselves) == current.end()) {
|
|
break; // start synchronization work
|
|
}
|
|
// We are already there, this is rather strange, but never mind:
|
|
auto const endTime = system_clock::now();
|
|
std::stringstream error;
|
|
error
|
|
<< "already done, " << database << "/" << shard
|
|
<< ", " << database << "/" << planId << ", started "
|
|
<< startTimeStr << ", ended " << timepointToString(endTime);
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
|
|
_result.reset(TRI_ERROR_FAILED, error.str());
|
|
return false;
|
|
}
|
|
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: waiting for leader, " << database
|
|
<< "/" << shard << ", " << database << "/" << planId;
|
|
|
|
std::this_thread::sleep_for(duration<double>(0.2));
|
|
|
|
}
|
|
|
|
// Once we get here, we know that the leader is ready for sync, so we give it a try:
|
|
|
|
try {
|
|
|
|
DatabaseGuard guard(database);
|
|
auto vocbase = &guard.database();
|
|
|
|
auto collection = vocbase->lookupCollection(shard);
|
|
if (collection == nullptr) {
|
|
std::stringstream error;
|
|
error << "failed to lookup local shard " << shard;
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
|
|
_result.reset(TRI_ERROR_ARANGO_DATA_SOURCE_NOT_FOUND, error.str());
|
|
return false;
|
|
}
|
|
|
|
auto ep = clusterInfo->getServerEndpoint(leader);
|
|
uint64_t docCount;
|
|
if (!collectionCount(collection, docCount).ok()) {
|
|
std::stringstream error;
|
|
error << "failed to get a count on leader " << shard;
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "SynchronizeShard " << error.str();
|
|
_result.reset(TRI_ERROR_INTERNAL, error.str());
|
|
return false;
|
|
}
|
|
|
|
if (docCount == 0) {
|
|
// We have a short cut:
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) <<
|
|
"synchronizeOneShard: trying short cut to synchronize local shard '" <<
|
|
database << "/" << shard << "' for central '" << database << "/" <<
|
|
planId << "'";
|
|
try {
|
|
|
|
auto asResult = addShardFollower(ep, database, shard, 0, clientId, 60.0);
|
|
|
|
if (asResult.ok()) {
|
|
|
|
auto const endTime = system_clock::now();
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: shortcut worked, done, " << database << "/"
|
|
<< shard << ", " << database << "/" << planId <<", started: "
|
|
<< startTimeStr << " ended: " << timepointToString(endTime);
|
|
collection->followers()->setTheLeader(leader);
|
|
notify();
|
|
return false;
|
|
}
|
|
} catch (...) {}
|
|
}
|
|
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: trying to synchronize local shard '" << database
|
|
<< "/" << shard << "' for central '" << database << "/" << planId << "'";
|
|
|
|
try {
|
|
// First once without a read transaction:
|
|
|
|
if (isStopping()) {
|
|
_result.reset(TRI_ERROR_INTERNAL, "server is shutting down");
|
|
}
|
|
|
|
// Mark us as follower for this leader such that we begin
|
|
// accepting replication operations, note that this is also
|
|
// used for the initial synchronization:
|
|
|
|
collection->followers()->setTheLeader(leader);
|
|
|
|
if (leader.empty()) {
|
|
collection->followers()->clear();
|
|
}
|
|
|
|
// do not reset followers when we resign at this time...we are
|
|
// still the only source of truth to trust, in particular, in the
|
|
// planned leader resignation, we will shortly after the call to
|
|
// this function here report the controlled resignation to the
|
|
// agency. This report must still contain the correct follower list
|
|
// or else the supervision is super angry with us.
|
|
|
|
startTime = system_clock::now();
|
|
|
|
VPackBuilder config;
|
|
{ VPackObjectBuilder o(&config);
|
|
config.add(ENDPOINT, VPackValue(ep));
|
|
config.add(INCREMENTAL, VPackValue(docCount > 0)); // use dump if possible
|
|
config.add(KEEP_BARRIER, VPackValue(true));
|
|
config.add(LEADER_ID, VPackValue(leader));
|
|
config.add(SKIP_CREATE_DROP, VPackValue(true));
|
|
config.add(RESTRICT_TYPE, VPackValue(INCLUDE));
|
|
config.add(VPackValue(RESTRICT_COLLECTIONS));
|
|
{ VPackArrayBuilder a(&config);
|
|
config.add(VPackValue(shard)); }
|
|
config.add(INCLUDE_SYSTEM, VPackValue(true));
|
|
config.add("verbose", VPackValue(false)); }
|
|
|
|
auto details = std::make_shared<VPackBuilder>();
|
|
|
|
Result syncRes = replicationSynchronize(
|
|
collection, config.slice(), APPLIER_DATABASE, details);
|
|
|
|
auto sy = details->slice();
|
|
auto const endTime = system_clock::now();
|
|
bool longSync = false;
|
|
|
|
// Long shard sync initialisation
|
|
if (endTime - startTime > seconds(5)) {
|
|
LOG_TOPIC(WARN, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: long call to syncCollection for shard"
|
|
<< shard << " " << syncRes.errorMessage() << " start time: "
|
|
<< timepointToString(startTime) << "end time: "
|
|
<< timepointToString(system_clock::now());
|
|
longSync = true;
|
|
}
|
|
|
|
//
|
|
if (!syncRes.ok()) {
|
|
|
|
std::stringstream error;
|
|
error << "could not initially synchronize shard " << shard << ": "
|
|
<< syncRes.errorMessage();
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
|
|
_result.reset(TRI_ERROR_INTERNAL, error.str());
|
|
return false;
|
|
|
|
} else {
|
|
|
|
VPackSlice collections = sy.get(COLLECTIONS);
|
|
|
|
if (collections.length() == 0 ||
|
|
collections[0].get("name").copyString() != shard) {
|
|
|
|
if (longSync) {
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: long sync, before cancelBarrier"
|
|
<< timepointToString(system_clock::now());
|
|
}
|
|
cancelBarrier(ep, database, sy.get(BARRIER_ID).getNumber<int64_t>(), clientId);
|
|
if (longSync) {
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: long sync, after cancelBarrier"
|
|
<< timepointToString(system_clock::now());
|
|
}
|
|
|
|
std::stringstream error;
|
|
error << "shard " << shard << " seems to be gone from leader!";
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "SynchronizeOneShard: " << error.str();
|
|
_result.reset(TRI_ERROR_INTERNAL, error.str());
|
|
return false;
|
|
|
|
} else {
|
|
|
|
// Now start a read transaction to stop writes:
|
|
uint64_t lockJobId = 0;
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: startReadLockOnLeader: " << ep << ":"
|
|
<< database << ":" << collection->name();
|
|
Result result = startReadLockOnLeader(
|
|
ep, database, collection->name(), clientId, lockJobId);
|
|
if (result.ok()) {
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
|
<< "lockJobId: " << lockJobId;
|
|
} else {
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: error in startReadLockOnLeader:"
|
|
<< result.errorMessage();
|
|
}
|
|
|
|
cancelBarrier(ep, database, sy.get("barrierId").getNumber<int64_t>(), clientId);
|
|
|
|
if (lockJobId != 0) {
|
|
|
|
VPackBuilder builder;
|
|
{ VPackObjectBuilder o(&builder);
|
|
builder.add(ENDPOINT, VPackValue(ep));
|
|
builder.add(DATABASE, VPackValue(database));
|
|
builder.add(COLLECTION, VPackValue(shard));
|
|
builder.add(LEADER_ID, VPackValue(leader));
|
|
builder.add("from", sy.get(LAST_LOG_TICK));
|
|
builder.add("requestTimeout", VPackValue(60.0));
|
|
builder.add("connectTimeout", VPackValue(60.0));
|
|
}
|
|
|
|
Result fres = replicationSynchronizeFinalize (builder.slice());
|
|
|
|
if (fres.ok()) {
|
|
result = addShardFollower(ep, database, shard, lockJobId, clientId, 60.0);
|
|
|
|
if (!result.ok()) {
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: failed to add follower"
|
|
<< result.errorMessage();
|
|
}
|
|
} else {
|
|
std::string errorMessage(
|
|
"synchronizeOneshard: error in syncCollectionFinalize: ") ;
|
|
errorMessage += fres.errorMessage();
|
|
result = Result(TRI_ERROR_INTERNAL, errorMessage);
|
|
}
|
|
|
|
// This result is unused, only in logs
|
|
Result lockResult = cancelReadLockOnLeader(ep, database, lockJobId, clientId, 60.0);
|
|
if (!lockResult.ok()) {
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: read lock has timed out for shard " << shard;
|
|
}
|
|
} else {
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: lockJobId was false for shard" << shard;
|
|
}
|
|
|
|
if (result.ok()) {
|
|
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: synchronization worked for shard " << shard;
|
|
_result.reset(TRI_ERROR_NO_ERROR);
|
|
} else {
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: synchronization failed for shard " << shard;
|
|
std::string errorMessage(
|
|
"synchronizeOneShard: synchronization failed for shard "
|
|
+ shard + ":" + result.errorMessage());
|
|
_result = Result(TRI_ERROR_INTERNAL, errorMessage);;
|
|
}
|
|
}
|
|
}
|
|
} catch (std::exception const& e) {
|
|
auto const endTime = system_clock::now();
|
|
std::stringstream error;
|
|
error << "synchronization of local shard '" << database << "/" << shard
|
|
<< "' for central '" << database << "/" << planId << "' failed: "
|
|
<< e.what() << timepointToString(endTime);
|
|
LOG_TOPIC(ERR, Logger::MAINTENANCE) << error.str();
|
|
_result.reset(TRI_ERROR_INTERNAL, e.what());
|
|
return false;
|
|
}
|
|
} catch (std::exception const& e) {
|
|
LOG_TOPIC(WARN, Logger::MAINTENANCE)
|
|
<< "action " << _description << " failed with exception " << e.what();
|
|
_result.reset(TRI_ERROR_INTERNAL, e.what());
|
|
return false;
|
|
}
|
|
|
|
// Tell others that we are done:
|
|
auto const endTime = system_clock::now();
|
|
LOG_TOPIC(INFO, Logger::MAINTENANCE)
|
|
<< "synchronizeOneShard: done, " << database << "/" << shard << ", "
|
|
<< database << "/" << planId << ", started: "
|
|
<< timepointToString(startTime) << ", ended: " << timepointToString(endTime);
|
|
|
|
notify();
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
void SynchronizeShard::setState(ActionState state) {
|
|
|
|
if ((COMPLETE==state || FAILED==state) && _state != state) {
|
|
TRI_ASSERT(_description.has("shard"));
|
|
_feature.incShardVersion(_description.get("shard"));
|
|
}
|
|
|
|
ActionBase::setState(state);
|
|
|
|
}
|