1
0
Fork 0
arangodb/arangod/Cluster/DBServerAgencySync.cpp

320 lines
12 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2014-2018 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Jan Steemann
////////////////////////////////////////////////////////////////////////////////
#include "DBServerAgencySync.h"
#include "Basics/MutexLocker.h"
#include "Cluster/ClusterFeature.h"
#include "Cluster/ClusterInfo.h"
#include "Cluster/FollowerInfo.h"
#include "Cluster/HeartbeatThread.h"
#include "Cluster/Maintenance.h"
#include "Cluster/MaintenanceFeature.h"
#include "Cluster/MaintenanceStrings.h"
#include "Cluster/ServerState.h"
#include "Logger/Logger.h"
#include "RestServer/DatabaseFeature.h"
#include "RestServer/SystemDatabaseFeature.h"
#include "Utils/DatabaseGuard.h"
#include "VocBase/LogicalCollection.h"
#include "VocBase/Methods/Databases.h"
#include "VocBase/vocbase.h"
using namespace arangodb;
using namespace arangodb::application_features;
using namespace arangodb::methods;
using namespace arangodb::rest;
DBServerAgencySync::DBServerAgencySync(HeartbeatThread* heartbeat)
: _heartbeat(heartbeat) {}
void DBServerAgencySync::work() {
LOG_TOPIC(TRACE, Logger::CLUSTER) << "starting plan update handler";
_heartbeat->setReady();
DBServerAgencySyncResult result = execute();
_heartbeat->dispatchedJobResult(result);
}
Result DBServerAgencySync::getLocalCollections(VPackBuilder& collections) {
using namespace arangodb::basics;
Result result;
DatabaseFeature* dbfeature = nullptr;
try {
dbfeature = ApplicationServer::getFeature<DatabaseFeature>("Database");
} catch (...) {
}
if (dbfeature == nullptr) {
LOG_TOPIC(ERR, Logger::HEARTBEAT) << "Failed to get feature database";
return Result(TRI_ERROR_INTERNAL, "Failed to get feature database");
}
VPackObjectBuilder c(&collections);
for (auto const& database : Databases::list()) {
try {
DatabaseGuard guard(database);
auto vocbase = &guard.database();
collections.add(VPackValue(database));
VPackObjectBuilder db(&collections);
auto cols = vocbase->collections(false);
for (auto const& collection : cols) {
if (!collection->system()) {
collections.add(VPackValue(collection->name()));
VPackObjectBuilder col(&collections);
collection->properties(collections, true, false);
auto const& folls = collection->followers();
std::string const theLeader = folls->getLeader();
bool theLeaderTouched = folls->getLeaderTouched();
// Note that whenever theLeader was set explicitly since the collection
// object was created, we believe it. Otherwise, we do not accept
// that we are the leader. This is to circumvent the problem that
// after a restart we would implicitly be assumed to be the leader.
collections.add("theLeader", VPackValue(theLeaderTouched ? theLeader : "NOT_YET_TOUCHED"));
collections.add("theLeaderTouched", VPackValue(theLeaderTouched));
if (theLeader.empty() && theLeaderTouched) {
// we are the leader ourselves
// In this case we report our in-sync followers here in the format
// of the agency: [ leader, follower1, follower2, ... ]
collections.add(VPackValue("servers"));
{
VPackArrayBuilder guard(&collections);
collections.add(VPackValue(arangodb::ServerState::instance()->getId()));
std::shared_ptr<std::vector<ServerID> const> srvs = folls->get();
for (auto const& s : *srvs) {
collections.add(VPackValue(s));
}
}
}
}
}
} catch (std::exception const& e) {
return Result(TRI_ERROR_INTERNAL,
std::string("Failed to guard database ") + database + ": " + e.what());
}
}
return Result();
}
DBServerAgencySyncResult DBServerAgencySync::execute() {
// default to system database
TRI_ASSERT(AgencyCommManager::isEnabled());
AgencyComm comm;
using namespace std::chrono;
using clock = std::chrono::steady_clock;
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "DBServerAgencySync::execute starting";
auto* sysDbFeature =
application_features::ApplicationServer::lookupFeature<SystemDatabaseFeature>();
MaintenanceFeature* mfeature =
ApplicationServer::getFeature<MaintenanceFeature>("Maintenance");
arangodb::SystemDatabaseFeature::ptr vocbase =
sysDbFeature ? sysDbFeature->use() : nullptr;
DBServerAgencySyncResult result;
if (vocbase == nullptr) {
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "DBServerAgencySync::execute no vocbase";
result.errorMessage = "DBServerAgencySync::execute no vocbase";
return result;
}
Result tmp;
VPackBuilder rb;
auto clusterInfo = ClusterInfo::instance();
auto plan = clusterInfo->getPlan();
auto serverId = arangodb::ServerState::instance()->getId();
if (plan == nullptr) {
// TODO increase log level, except during shutdown?
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "DBServerAgencySync::execute no plan";
result.errorMessage = "DBServerAgencySync::execute no plan";
return result;
}
VPackBuilder local;
Result glc = getLocalCollections(local);
if (!glc.ok()) {
// FIXMEMAINTENANCE: if this fails here, then result is empty, is this
// intended? I also notice that there is another Result object "tmp"
// that is going to eat bad results in few lines later. Again, is
// that the correct action? If so, how about supporting comments in
// the code for both.
result.errorMessage = "Could not do getLocalCollections for phase 1.";
return result;
}
auto start = clock::now();
try {
// in previous life handlePlanChange
VPackObjectBuilder o(&rb);
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "DBServerAgencySync::phaseOne";
tmp = arangodb::maintenance::phaseOne(plan->slice(), local.slice(),
serverId, *mfeature, rb);
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "DBServerAgencySync::phaseOne done";
// Give some asynchronous jobs created in phaseOne a chance to complete
// before we collect data for phaseTwo:
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "DBServerAgencySync::hesitating between phases 1 and 2 for 0.1s...";
std::this_thread::sleep_for(std::chrono::milliseconds(100));
auto current = clusterInfo->getCurrent();
if (current == nullptr) {
// TODO increase log level, except during shutdown?
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "DBServerAgencySync::execute no current";
result.errorMessage = "DBServerAgencySync::execute no current";
return result;
}
LOG_TOPIC(TRACE, Logger::MAINTENANCE)
<< "DBServerAgencySync::phaseTwo - current state: " << current->toJson();
local.clear();
glc = getLocalCollections(local);
// We intentionally refetch local collections here, such that phase 2
// can already see potential changes introduced by phase 1. The two
// phases are sufficiently independent that this is OK.
LOG_TOPIC(TRACE, Logger::MAINTENANCE)
<< "DBServerAgencySync::phaseTwo - local state: " << local.toJson();
if (!glc.ok()) {
result.errorMessage = "Could not do getLocalCollections for phase 2.";
return result;
}
LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "DBServerAgencySync::phaseTwo";
tmp = arangodb::maintenance::phaseTwo(plan->slice(), current->slice(),
local.slice(), serverId, *mfeature, rb);
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "DBServerAgencySync::phaseTwo done";
} catch (std::exception const& e) {
LOG_TOPIC(ERR, Logger::MAINTENANCE) << "Failed to handle plan change: " << e.what();
}
if (rb.isClosed()) {
auto report = rb.slice();
if (report.isObject()) {
std::vector<std::string> path = {maintenance::PHASE_TWO, "agency"};
if (report.hasKey(path) && report.get(path).isObject()) {
auto agency = report.get(path);
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "DBServerAgencySync reporting to Current: " << agency.toJson();
// Report to current
if (!agency.isEmptyObject()) {
std::vector<AgencyOperation> operations;
std::vector<AgencyPrecondition> preconditions;
for (auto const& ao : VPackObjectIterator(agency)) {
auto const key = ao.key.copyString();
auto const op = ao.value.get("op").copyString();
if (ao.value.hasKey("precondition")) {
auto const precondition = ao.value.get("precondition");
preconditions.push_back(
AgencyPrecondition(
precondition.keyAt(0).copyString(), AgencyPrecondition::Type::VALUE, precondition.valueAt(0)));
}
if (op == "set") {
auto const value = ao.value.get("payload");
operations.push_back(AgencyOperation(key, AgencyValueOperationType::SET, value));
} else if (op == "delete") {
operations.push_back(AgencyOperation(key, AgencySimpleOperationType::DELETE_OP));
}
}
operations.push_back(AgencyOperation("Current/Version",
AgencySimpleOperationType::INCREMENT_OP));
AgencyWriteTransaction currentTransaction(operations, preconditions);
AgencyCommResult r = comm.sendTransactionWithFailover(currentTransaction);
if (!r.successful()) {
LOG_TOPIC(INFO, Logger::MAINTENANCE)
<< "Error reporting to agency: _statusCode: " << r.errorCode()
<< " message: " << r.errorMessage()
<< ". This can be ignored, since it will be retried automatically.";
} else {
LOG_TOPIC(DEBUG, Logger::MAINTENANCE)
<< "Invalidating current in ClusterInfo";
clusterInfo->invalidateCurrent();
}
}
}
// FIXMEMAINTENANCE: If comm.sendTransactionWithFailover()
// fails, the result is ok() based upon phaseTwo()'s execution?
if (tmp.ok()) {
result = DBServerAgencySyncResult(
true,
report.hasKey("Plan") ? report.get("Plan").get("Version").getNumber<uint64_t>() : 0,
report.hasKey("Current")
? report.get("Current").get("Version").getNumber<uint64_t>()
: 0);
} else {
// Report an error:
result = DBServerAgencySyncResult(false, "Error in phase 2: " + tmp.errorMessage(),
0, 0);
}
}
} else {
result.errorMessage = "Report from phase 1 and 2 was not closed.";
}
auto took = duration<double>(clock::now() - start).count();
if (took > 30.0) {
LOG_TOPIC(WARN, Logger::MAINTENANCE) << "DBServerAgencySync::execute "
"took "
<< took << " s to execute handlePlanChange";
}
return result;
}