1
0
Fork 0

[3.5] agency lock left behind (#10022)

* short timeout issue and discarded agency lock removal

* short timeout issue and discarded agency lock removal

* no hot backup in 3.5.0
This commit is contained in:
Kaveh Vahedipour 2019-09-16 23:13:54 +02:00 committed by KVS85
parent 0b1d5be33e
commit 999e4b8873
3 changed files with 19 additions and 8 deletions

View File

@ -4311,8 +4311,9 @@ arangodb::Result ClusterInfo::agencyHotBackupLock(std::string const& backupId,
if (!rv->slice().isObject() || !rv->slice().hasKey("results") ||
!rv->slice().get("results").isArray() || rv->slice().get("results").length() != 2) {
return arangodb::Result(TRI_ERROR_HOT_BACKUP_INTERNAL,
"invalid agency result while acuiring backup lock");
return arangodb::Result(
TRI_ERROR_HOT_BACKUP_INTERNAL,
"invalid agency result while acquiring backup lock");
}
auto ar = rv->slice().get("results");
@ -4366,6 +4367,8 @@ arangodb::Result ClusterInfo::agencyHotBackupLock(std::string const& backupId,
std::this_thread::sleep_for(std::chrono::duration<double>(wait));
}
agencyHotBackupUnlock(backupId, timeout, supervisionOff);
return arangodb::Result(
TRI_ERROR_HOT_BACKUP_INTERNAL,
"timeout waiting for maintenance mode to be activated in agency");
@ -4436,11 +4439,10 @@ arangodb::Result ClusterInfo::agencyHotBackupUnlock(std::string const& backupId,
if (result.successful()) {
if (!result.slice().isArray() || result.slice().length() != 1 ||
!result.slice()[0].hasKey(modepv) || !result.slice()[0].get(modepv).isString()) {
return arangodb::Result(TRI_ERROR_HOT_BACKUP_INTERNAL, std::
string("invalid JSON from agency, when desctivating supervision mode:") +
result
.slice()
.toJson());
return arangodb::Result(
TRI_ERROR_HOT_BACKUP_INTERNAL,
std::string("invalid JSON from agency, when deactivating supervision mode:") +
result.slice().toJson());
}
if (result.slice()[0].get(modepv).isEqualString("Normal")) {

View File

@ -4047,6 +4047,13 @@ arangodb::Result hotBackupCoordinator(VPackSlice const payload, VPackBuilder& re
double timeout = (payload.isObject() && payload.hasKey("timeout"))
? payload.get("timeout").getNumber<double>()
: 120.;
// unreasonably short even under allowInconsistent
if (timeout < 2.5) {
auto const tmp = timeout;
timeout = 2.5;
LOG_TOPIC("67ae2", WARN, Logger::BACKUP)
<< "Backup timeout " << tmp << " is too short - raising to " << timeout;
}
using namespace std::chrono;
auto end = steady_clock::now() + milliseconds(static_cast<uint64_t>(1000 * timeout));
@ -4057,6 +4064,7 @@ arangodb::Result hotBackupCoordinator(VPackSlice const payload, VPackBuilder& re
// We specifically want to make sure that no other backup is going on.
bool supervisionOff = false;
auto result = ci->agencyHotBackupLock(backupId, timeout, supervisionOff);
if (!result.ok()) {
// Failed to go to backup mode
result.reset(TRI_ERROR_HOT_BACKUP_INTERNAL,
@ -4069,6 +4077,7 @@ arangodb::Result hotBackupCoordinator(VPackSlice const payload, VPackBuilder& re
LOG_TOPIC("352d6", INFO, Logger::BACKUP)
<< "hot backup didn't get to locking phase within " << timeout << "s.";
auto hlRes = ci->agencyHotBackupUnlock(backupId, timeout, supervisionOff);
return arangodb::Result(TRI_ERROR_CLUSTER_TIMEOUT,
"hot backup timeout before locking phase");
}