mirror of https://gitee.com/bigwinds/arangodb
Fix behaviour of synchronous replication in dropFollower case.
If we are already in shutdown, we do not drop a follower. If we cannot drop a follower (no contact to agency), we error out.
This commit is contained in:
parent
f1db8666ac
commit
29921d32a8
|
@ -24,6 +24,7 @@
|
|||
|
||||
#include "FollowerInfo.h"
|
||||
|
||||
#include "ApplicationFeatures/ApplicationServer.h"
|
||||
#include "Cluster/ServerState.h"
|
||||
#include "VocBase/LogicalCollection.h"
|
||||
|
||||
|
@ -183,7 +184,12 @@ void FollowerInfo::add(ServerID const& sid) {
|
|||
/// since been dropped (see `dropFollowerInfo` below).
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void FollowerInfo::remove(ServerID const& sid) {
|
||||
bool FollowerInfo::remove(ServerID const& sid) {
|
||||
if (application_features::ApplicationServer::isStopping()) {
|
||||
// If we are already shutting down, we cannot be trusted any more with
|
||||
// such an important decision like dropping a follower.
|
||||
return false;
|
||||
}
|
||||
MUTEX_LOCKER(locker, _mutex);
|
||||
|
||||
// First check if there is anything to do:
|
||||
|
@ -195,7 +201,7 @@ void FollowerInfo::remove(ServerID const& sid) {
|
|||
}
|
||||
}
|
||||
if (!found) {
|
||||
return; // nothing to do
|
||||
return true; // nothing to do
|
||||
}
|
||||
|
||||
auto v = std::make_shared<std::vector<ServerID>>();
|
||||
|
@ -207,10 +213,11 @@ void FollowerInfo::remove(ServerID const& sid) {
|
|||
}
|
||||
}
|
||||
}
|
||||
auto _oldFollowers = _followers;
|
||||
_followers = v; // will cast to std::vector<ServerID> const
|
||||
#ifdef DEBUG_SYNC_REPLICATION
|
||||
if (!AgencyCommManager::MANAGER) {
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
// Now tell the agency, path is
|
||||
|
@ -267,13 +274,15 @@ void FollowerInfo::remove(ServerID const& sid) {
|
|||
usleep(500000);
|
||||
} while (TRI_microtime() < startTime + 30);
|
||||
if (!success) {
|
||||
_followers = _oldFollowers;
|
||||
LOG_TOPIC(ERR, Logger::CLUSTER)
|
||||
<< "FollowerInfo::remove, timeout in agency operation for key " << path;
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief clear follower list, no changes in agency necesary
|
||||
/// @brief clear follower list, no changes in agency necessary
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void FollowerInfo::clear() {
|
||||
|
|
|
@ -67,7 +67,7 @@ class FollowerInfo {
|
|||
/// way.
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void remove(ServerID const& s);
|
||||
bool remove(ServerID const& s);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief clear follower list, no changes in agency necesary
|
||||
|
|
|
@ -1534,10 +1534,16 @@ OperationResult transaction::Methods::insertLocal(
|
|||
}
|
||||
if (!replicationWorked) {
|
||||
auto const& followerInfo = collection->followers();
|
||||
followerInfo->remove((*followers)[i]);
|
||||
LOG_TOPIC(ERR, Logger::REPLICATION)
|
||||
<< "insertLocal: dropping follower " << (*followers)[i]
|
||||
<< " for shard " << collectionName;
|
||||
if (followerInfo->remove((*followers)[i])) {
|
||||
LOG_TOPIC(WARN, Logger::REPLICATION)
|
||||
<< "insertLocal: dropping follower " << (*followers)[i]
|
||||
<< " for shard " << collectionName;
|
||||
} else {
|
||||
LOG_TOPIC(ERR, Logger::REPLICATION)
|
||||
<< "insertLocal: could not drop follower "
|
||||
<< (*followers)[i] << " for shard " << collectionName;
|
||||
THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_COULD_NOT_DROP_FOLLOWER);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1850,7 +1856,16 @@ OperationResult transaction::Methods::modifyLocal(
|
|||
}
|
||||
if (!replicationWorked) {
|
||||
auto const& followerInfo = collection->followers();
|
||||
followerInfo->remove((*followers)[i]);
|
||||
if (followerInfo->remove((*followers)[i])) {
|
||||
LOG_TOPIC(WARN, Logger::REPLICATION)
|
||||
<< "modifyLocal: dropping follower " << (*followers)[i]
|
||||
<< " for shard " << collectionName;
|
||||
} else {
|
||||
LOG_TOPIC(ERR, Logger::REPLICATION)
|
||||
<< "modifyLocal: could not drop follower "
|
||||
<< (*followers)[i] << " for shard " << collectionName;
|
||||
THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_COULD_NOT_DROP_FOLLOWER);
|
||||
}
|
||||
LOG_TOPIC(ERR, Logger::REPLICATION)
|
||||
<< "modifyLocal: dropping follower " << (*followers)[i]
|
||||
<< " for shard " << collectionName;
|
||||
|
@ -2094,10 +2109,16 @@ OperationResult transaction::Methods::removeLocal(
|
|||
}
|
||||
if (!replicationWorked) {
|
||||
auto const& followerInfo = collection->followers();
|
||||
followerInfo->remove((*followers)[i]);
|
||||
LOG_TOPIC(ERR, Logger::REPLICATION)
|
||||
<< "removeLocal: dropping follower " << (*followers)[i]
|
||||
<< " for shard " << collectionName;
|
||||
if (followerInfo->remove((*followers)[i])) {
|
||||
LOG_TOPIC(WARN, Logger::REPLICATION)
|
||||
<< "removeLocal: dropping follower " << (*followers)[i]
|
||||
<< " for shard " << collectionName;
|
||||
} else {
|
||||
LOG_TOPIC(ERR, Logger::REPLICATION)
|
||||
<< "removeLocal: could not drop follower "
|
||||
<< (*followers)[i] << " for shard " << collectionName;
|
||||
THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_COULD_NOT_DROP_FOLLOWER);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2270,10 +2291,16 @@ OperationResult transaction::Methods::truncateLocal(
|
|||
requests[i].result.answer_code == rest::ResponseCode::OK);
|
||||
if (!replicationWorked) {
|
||||
auto const& followerInfo = collection->followers();
|
||||
followerInfo->remove((*followers)[i]);
|
||||
LOG_TOPIC(ERR, Logger::REPLICATION)
|
||||
<< "truncateLocal: dropping follower " << (*followers)[i]
|
||||
<< " for shard " << collectionName;
|
||||
if (followerInfo->remove((*followers)[i])) {
|
||||
LOG_TOPIC(WARN, Logger::REPLICATION)
|
||||
<< "truncateLocal: dropping follower " << (*followers)[i]
|
||||
<< " for shard " << collectionName;
|
||||
} else {
|
||||
LOG_TOPIC(ERR, Logger::REPLICATION)
|
||||
<< "truncateLocal: could not drop follower "
|
||||
<< (*followers)[i] << " for shard " << collectionName;
|
||||
THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_COULD_NOT_DROP_FOLLOWER);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -166,6 +166,7 @@
|
|||
"ERROR_CLUSTER_MUST_NOT_DROP_COLL_OTHER_DISTRIBUTESHARDSLIKE" : { "code" : 1485, "message" : "must not drop collection while another has a distributeShardsLike attribute pointing to it" },
|
||||
"ERROR_CLUSTER_UNKNOWN_DISTRIBUTESHARDSLIKE" : { "code" : 1486, "message" : "must not have a distributeShardsLike attribute pointing to an unknown collection" },
|
||||
"ERROR_CLUSTER_INSUFFICIENT_DBSERVERS" : { "code" : 1487, "message" : "the number of current dbservers is lower than the requested replicationFactor" },
|
||||
"ERROR_CLUSTER_COULD_NOT_DROP_FOLLOWER" : { "code" : 1488, "message" : "a follower could not be dropped in agency" },
|
||||
"ERROR_QUERY_KILLED" : { "code" : 1500, "message" : "query killed" },
|
||||
"ERROR_QUERY_PARSE" : { "code" : 1501, "message" : "%s" },
|
||||
"ERROR_QUERY_EMPTY" : { "code" : 1502, "message" : "query is empty" },
|
||||
|
|
|
@ -202,7 +202,7 @@ ERROR_CLUSTER_CHAIN_OF_DISTRIBUTESHARDSLIKE,1484,"chain of distributeShardsLike
|
|||
ERROR_CLUSTER_MUST_NOT_DROP_COLL_OTHER_DISTRIBUTESHARDSLIKE,1485,"must not drop collection while another has a distributeShardsLike attribute pointing to it","Will be raised if one tries to drop a collection to which another collection points with its distributeShardsLike attribute."
|
||||
ERROR_CLUSTER_UNKNOWN_DISTRIBUTESHARDSLIKE,1486,"must not have a distributeShardsLike attribute pointing to an unknown collection","Will be raised if one tries to create a collection which points to an unknown collection in its distributeShardsLike attribute."
|
||||
ERROR_CLUSTER_INSUFFICIENT_DBSERVERS,1487,"the number of current dbservers is lower than the requested replicationFactor","Will be raised if one tries to create a collection with a replicationFactor greater than the available number of DBServers."
|
||||
|
||||
ERROR_CLUSTER_COULD_NOT_DROP_FOLLOWER,1488,"a follower could not be dropped in agency","Will be raised if a follower that ought to be dropped could not be dropped in the agency (under Current)."
|
||||
|
||||
################################################################################
|
||||
## ArangoDB query errors
|
||||
|
|
|
@ -162,6 +162,7 @@ void TRI_InitializeErrorMessages () {
|
|||
REG_ERROR(ERROR_CLUSTER_MUST_NOT_DROP_COLL_OTHER_DISTRIBUTESHARDSLIKE, "must not drop collection while another has a distributeShardsLike attribute pointing to it");
|
||||
REG_ERROR(ERROR_CLUSTER_UNKNOWN_DISTRIBUTESHARDSLIKE, "must not have a distributeShardsLike attribute pointing to an unknown collection");
|
||||
REG_ERROR(ERROR_CLUSTER_INSUFFICIENT_DBSERVERS, "the number of current dbservers is lower than the requested replicationFactor");
|
||||
REG_ERROR(ERROR_CLUSTER_COULD_NOT_DROP_FOLLOWER, "a follower could not be dropped in agency");
|
||||
REG_ERROR(ERROR_QUERY_KILLED, "query killed");
|
||||
REG_ERROR(ERROR_QUERY_PARSE, "%s");
|
||||
REG_ERROR(ERROR_QUERY_EMPTY, "query is empty");
|
||||
|
|
|
@ -400,6 +400,9 @@
|
|||
/// - 1487: @LIT{the number of current dbservers is lower than the requested replicationFactor}
|
||||
/// Will be raised if one tries to create a collection with a
|
||||
/// replicationFactor greater than the available number of DBServers.
|
||||
/// - 1488: @LIT{a follower could not be dropped in agency}
|
||||
/// Will be raised if a follower that ought to be dropped could not be
|
||||
/// dropped in the agency (under Current).
|
||||
/// - 1500: @LIT{query killed}
|
||||
/// Will be raised when a running query is killed by an explicit admin
|
||||
/// command.
|
||||
|
@ -2372,6 +2375,17 @@ void TRI_InitializeErrorMessages ();
|
|||
|
||||
#define TRI_ERROR_CLUSTER_INSUFFICIENT_DBSERVERS (1487)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief 1488: ERROR_CLUSTER_COULD_NOT_DROP_FOLLOWER
|
||||
///
|
||||
/// a follower could not be dropped in agency
|
||||
///
|
||||
/// Will be raised if a follower that ought to be dropped could not be dropped
|
||||
/// in the agency (under Current).
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define TRI_ERROR_CLUSTER_COULD_NOT_DROP_FOLLOWER (1488)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief 1500: ERROR_QUERY_KILLED
|
||||
///
|
||||
|
|
Loading…
Reference in New Issue