1
0
Fork 0

[3.5] agency lock left behind (#10022)

* short timeout issue and discarded agency lock removal

* short timeout issue and discarded agency lock removal

* no hot backup in 3.5.0
This commit is contained in:
Kaveh Vahedipour 2019-09-16 23:13:54 +02:00 committed by KVS85
parent 0b1d5be33e
commit 999e4b8873
3 changed files with 19 additions and 8 deletions

View File

@ -22,7 +22,7 @@ v3.5.1 (XXXX-XX-XX)
* Disallow creation of TTL indexes on sub-attributes.
Creation of such indexes was not caught before, but the resulting
Creation of such indexes was not caught before, but the resulting
indexes were defunct. From now on the creation of TTL indexes on sub-
attributes is disallowed.

View File

@ -4311,8 +4311,9 @@ arangodb::Result ClusterInfo::agencyHotBackupLock(std::string const& backupId,
if (!rv->slice().isObject() || !rv->slice().hasKey("results") ||
!rv->slice().get("results").isArray() || rv->slice().get("results").length() != 2) {
return arangodb::Result(TRI_ERROR_HOT_BACKUP_INTERNAL,
"invalid agency result while acuiring backup lock");
return arangodb::Result(
TRI_ERROR_HOT_BACKUP_INTERNAL,
"invalid agency result while acquiring backup lock");
}
auto ar = rv->slice().get("results");
@ -4366,6 +4367,8 @@ arangodb::Result ClusterInfo::agencyHotBackupLock(std::string const& backupId,
std::this_thread::sleep_for(std::chrono::duration<double>(wait));
}
agencyHotBackupUnlock(backupId, timeout, supervisionOff);
return arangodb::Result(
TRI_ERROR_HOT_BACKUP_INTERNAL,
"timeout waiting for maintenance mode to be activated in agency");
@ -4436,11 +4439,10 @@ arangodb::Result ClusterInfo::agencyHotBackupUnlock(std::string const& backupId,
if (result.successful()) {
if (!result.slice().isArray() || result.slice().length() != 1 ||
!result.slice()[0].hasKey(modepv) || !result.slice()[0].get(modepv).isString()) {
return arangodb::Result(TRI_ERROR_HOT_BACKUP_INTERNAL, std::
string("invalid JSON from agency, when desctivating supervision mode:") +
result
.slice()
.toJson());
return arangodb::Result(
TRI_ERROR_HOT_BACKUP_INTERNAL,
std::string("invalid JSON from agency, when deactivating supervision mode:") +
result.slice().toJson());
}
if (result.slice()[0].get(modepv).isEqualString("Normal")) {

View File

@ -4047,6 +4047,13 @@ arangodb::Result hotBackupCoordinator(VPackSlice const payload, VPackBuilder& re
double timeout = (payload.isObject() && payload.hasKey("timeout"))
? payload.get("timeout").getNumber<double>()
: 120.;
// unreasonably short even under allowInconsistent
if (timeout < 2.5) {
auto const tmp = timeout;
timeout = 2.5;
LOG_TOPIC("67ae2", WARN, Logger::BACKUP)
<< "Backup timeout " << tmp << " is too short - raising to " << timeout;
}
using namespace std::chrono;
auto end = steady_clock::now() + milliseconds(static_cast<uint64_t>(1000 * timeout));
@ -4057,6 +4064,7 @@ arangodb::Result hotBackupCoordinator(VPackSlice const payload, VPackBuilder& re
// We specifically want to make sure that no other backup is going on.
bool supervisionOff = false;
auto result = ci->agencyHotBackupLock(backupId, timeout, supervisionOff);
if (!result.ok()) {
// Failed to go to backup mode
result.reset(TRI_ERROR_HOT_BACKUP_INTERNAL,
@ -4069,6 +4077,7 @@ arangodb::Result hotBackupCoordinator(VPackSlice const payload, VPackBuilder& re
LOG_TOPIC("352d6", INFO, Logger::BACKUP)
<< "hot backup didn't get to locking phase within " << timeout << "s.";
auto hlRes = ci->agencyHotBackupUnlock(backupId, timeout, supervisionOff);
return arangodb::Result(TRI_ERROR_CLUSTER_TIMEOUT,
"hot backup timeout before locking phase");
}