1
0
Fork 0

Merge branch 'generic-col-types' of github.com:arangodb/arangodb into obi-velocystream

* 'generic-col-types' of github.com:arangodb/arangodb:
  Improve version handling in ClusterInfo.
  Add further logging where a bug is likely.
  make error message unambiguous

Conflicts:
	lib/Rest/HttpResponse.cpp
This commit is contained in:
Jan Christoph Uhde 2016-09-05 16:42:41 +02:00
commit d0fdfa727a
5 changed files with 73 additions and 55 deletions

View File

@ -379,9 +379,13 @@ void ClusterInfo::loadPlan() {
DatabaseFeature* databaseFeature = DatabaseFeature* databaseFeature =
application_features::ApplicationServer::getFeature<DatabaseFeature>( application_features::ApplicationServer::getFeature<DatabaseFeature>(
"Database"); "Database");
uint64_t storedVersion = _planProt.version; ++_planProt.wantedVersion; // Indicate that after *NOW* somebody has to
MUTEX_LOCKER(mutexLocker, _planProt.mutex); // reread from the agency!
if (_planProt.version > storedVersion) { MUTEX_LOCKER(mutexLocker, _planProt.mutex); // only one may work at a time
uint64_t storedVersion = _planProt.wantedVersion; // this is the version
// we will set in the end
if (_planProt.doneVersion == storedVersion) {
// Somebody else did, what we intended to do, so just return // Somebody else did, what we intended to do, so just return
return; return;
} }
@ -502,7 +506,7 @@ void ClusterInfo::loadPlan() {
_shards.swap(newShards); _shards.swap(newShards);
_shardKeys.swap(newShardKeys); _shardKeys.swap(newShardKeys);
} }
_planProt.version++; // such that others notice our change _planProt.doneVersion = storedVersion;
_planProt.isValid = true; // will never be reset to false _planProt.isValid = true; // will never be reset to false
} else { } else {
LOG(ERR) << "\"Plan\" is not an object in agency"; LOG(ERR) << "\"Plan\" is not an object in agency";
@ -525,9 +529,12 @@ void ClusterInfo::loadPlan() {
static std::string const prefixCurrent = "Current"; static std::string const prefixCurrent = "Current";
void ClusterInfo::loadCurrent() { void ClusterInfo::loadCurrent() {
uint64_t storedVersion = _currentProt.version; ++_currentProt.wantedVersion; // Indicate that after *NOW* somebody has to
MUTEX_LOCKER(mutexLocker, _currentProt.mutex); // reread from the agency!
if (_currentProt.version > storedVersion) { MUTEX_LOCKER(mutexLocker, _currentProt.mutex); // only one may work at a time
uint64_t storedVersion = _currentProt.wantedVersion; // this is the version
// we will set at the end
if (_currentProt.doneVersion == storedVersion) {
// Somebody else did, what we intended to do, so just return // Somebody else did, what we intended to do, so just return
return; return;
} }
@ -617,7 +624,7 @@ void ClusterInfo::loadCurrent() {
_currentCollections.swap(newCollections); _currentCollections.swap(newCollections);
_shardIds.swap(newShardIds); _shardIds.swap(newShardIds);
} }
_currentProt.version++; // such that others notice our change _currentProt.doneVersion = storedVersion;
_currentProt.isValid = true; // will never be reset to false _currentProt.isValid = true; // will never be reset to false
} else { } else {
LOG(ERR) << "Current is not an object!"; LOG(ERR) << "Current is not an object!";
@ -1851,9 +1858,12 @@ static std::string const prefixServers = "Current/ServersRegistered";
void ClusterInfo::loadServers() { void ClusterInfo::loadServers() {
uint64_t storedVersion = _serversProt.version; ++_serversProt.wantedVersion; // Indicate that after *NOW* somebody has to
// reread from the agency!
MUTEX_LOCKER(mutexLocker, _serversProt.mutex); MUTEX_LOCKER(mutexLocker, _serversProt.mutex);
if (_serversProt.version > storedVersion) { uint64_t storedVersion = _serversProt.wantedVersion; // this is the version
// we will set in the end
if (_serversProt.doneVersion == storedVersion) {
// Somebody else did, what we intended to do, so just return // Somebody else did, what we intended to do, so just return
return; return;
} }
@ -1883,7 +1893,7 @@ void ClusterInfo::loadServers() {
{ {
WRITE_LOCKER(writeLocker, _serversProt.lock); WRITE_LOCKER(writeLocker, _serversProt.lock);
_servers.swap(newServers); _servers.swap(newServers);
_serversProt.version++; // such that others notice our change _serversProt.doneVersion = storedVersion;
_serversProt.isValid = true; // will never be reset to false _serversProt.isValid = true; // will never be reset to false
} }
return; return;
@ -1976,9 +1986,12 @@ std::string ClusterInfo::getServerName(std::string const& endpoint) {
static std::string const prefixCurrentCoordinators = "Current/Coordinators"; static std::string const prefixCurrentCoordinators = "Current/Coordinators";
void ClusterInfo::loadCurrentCoordinators() { void ClusterInfo::loadCurrentCoordinators() {
uint64_t storedVersion = _coordinatorsProt.version; ++_coordinatorsProt.wantedVersion; // Indicate that after *NOW* somebody
// has to reread from the agency!
MUTEX_LOCKER(mutexLocker, _coordinatorsProt.mutex); MUTEX_LOCKER(mutexLocker, _coordinatorsProt.mutex);
if (_coordinatorsProt.version > storedVersion) { uint64_t storedVersion = _coordinatorsProt.wantedVersion; // this is the
// version we will set in the end
if (_coordinatorsProt.doneVersion == storedVersion) {
// Somebody else did, what we intended to do, so just return // Somebody else did, what we intended to do, so just return
return; return;
} }
@ -2004,7 +2017,7 @@ void ClusterInfo::loadCurrentCoordinators() {
{ {
WRITE_LOCKER(writeLocker, _coordinatorsProt.lock); WRITE_LOCKER(writeLocker, _coordinatorsProt.lock);
_coordinators.swap(newCoordinators); _coordinators.swap(newCoordinators);
_coordinatorsProt.version++; // such that others notice our change _coordinatorsProt.doneVersion = storedVersion;
_coordinatorsProt.isValid = true; // will never be reset to false _coordinatorsProt.isValid = true; // will never be reset to false
} }
return; return;
@ -2028,9 +2041,12 @@ static std::string const prefixTargetCleaned = "Target/CleanedOutServers";
static std::string const prefixTargetFailed = "Target/FailedServers"; static std::string const prefixTargetFailed = "Target/FailedServers";
void ClusterInfo::loadCurrentDBServers() { void ClusterInfo::loadCurrentDBServers() {
uint64_t storedVersion = _DBServersProt.version; ++_DBServersProt.wantedVersion; // Indicate that after *NOW* somebody has to
// reread from the agency!
MUTEX_LOCKER(mutexLocker, _DBServersProt.mutex); MUTEX_LOCKER(mutexLocker, _DBServersProt.mutex);
if (_DBServersProt.version > storedVersion) { uint64_t storedVersion = _DBServersProt.wantedVersion; // this is the version
// we will set in the end
if (_DBServersProt.doneVersion == storedVersion) {
// Somebody else did, what we intended to do, so just return // Somebody else did, what we intended to do, so just return
return; return;
} }
@ -2089,7 +2105,7 @@ void ClusterInfo::loadCurrentDBServers() {
{ {
WRITE_LOCKER(writeLocker, _DBServersProt.lock); WRITE_LOCKER(writeLocker, _DBServersProt.lock);
_DBServers.swap(newDBServers); _DBServers.swap(newDBServers);
_DBServersProt.version++; // such that others notice our change _DBServersProt.doneVersion = storedVersion;
_DBServersProt.isValid = true; // will never be reset to false _DBServersProt.isValid = true; // will never be reset to false
} }
return; return;

View File

@ -529,28 +529,28 @@ class ClusterInfo {
// Cached data from the agency, we reload whenever necessary: // Cached data from the agency, we reload whenever necessary:
// We group the data, each group has an atomic "valid-flag" // We group the data, each group has an atomic "valid-flag" which is
// which is used for lazy loading in the beginning. It starts // used for lazy loading in the beginning. It starts as false, is set
// as false, is set to true at each reload and is never reset // to true at each reload and is only reset to false if the cache
// to false in the lifetime of the server. The variable is // needs to be invalidated. The variable is atomic to be able to check
// atomic to be able to check it without acquiring // it without acquiring the read lock (see below). Flush is just an
// the read lock (see below). Flush is just an explicit reload // explicit reload for all data and is only used in tests.
// for all data and is only used in tests.
// Furthermore, each group has a mutex that protects against // Furthermore, each group has a mutex that protects against
// simultaneously contacting the agency for an update. // simultaneously contacting the agency for an update.
// In addition, each group has an atomic version number, this is used // In addition, each group has two atomic version numbers, these are
// to prevent a stampede if multiple threads notice concurrently // used to prevent a stampede if multiple threads notice concurrently
// that an update from the agency is necessary. Finally, there is // that an update from the agency is necessary. Finally, there is a
// a read/write lock which protects the actual data structure. // read/write lock which protects the actual data structure.
// We encapsulate this protection in the struct ProtectionData: // We encapsulate this protection in the struct ProtectionData:
struct ProtectionData { struct ProtectionData {
std::atomic<bool> isValid; std::atomic<bool> isValid;
Mutex mutex; Mutex mutex;
std::atomic<uint64_t> version; std::atomic<uint64_t> wantedVersion;
std::atomic<uint64_t> doneVersion;
arangodb::basics::ReadWriteLock lock; arangodb::basics::ReadWriteLock lock;
ProtectionData() : isValid(false), version(0) {} ProtectionData() : isValid(false), wantedVersion(0), doneVersion(0) {}
}; };
// The servers, first all, we only need Current here: // The servers, first all, we only need Current here:

View File

@ -699,8 +699,7 @@ int InitialSyncer::handleCollectionDump(
} }
if (response->getHttpReturnCode() == 404) { if (response->getHttpReturnCode() == 404) {
// unknown job, we can abort // unknown job, we can abort
errorMsg = "no response received from master at " + errorMsg = "job not found on master at " + _masterInfo._endpoint;
_masterInfo._endpoint;
return TRI_ERROR_REPLICATION_NO_RESPONSE; return TRI_ERROR_REPLICATION_NO_RESPONSE;
} }
} }
@ -889,8 +888,7 @@ int InitialSyncer::handleCollectionSync(
} }
if (response->getHttpReturnCode() == 404) { if (response->getHttpReturnCode() == 404) {
// unknown job, we can abort // unknown job, we can abort
errorMsg = "no response received from master at " + errorMsg = "job not found on master at " + _masterInfo._endpoint;
_masterInfo._endpoint;
return TRI_ERROR_REPLICATION_NO_RESPONSE; return TRI_ERROR_REPLICATION_NO_RESPONSE;
} }
} }

View File

@ -141,7 +141,12 @@ class GeneralResponse {
addPayload(std::forward<Payload>(payload), &options, resolveExternals); addPayload(std::forward<Payload>(payload), &options, resolveExternals);
} }
void addPayloadPreconditions() { TRI_ASSERT(_vpackPayloads.size() == 0); } void addPayloadPreconditions() {
if (_vpackPayloads.size() != 0) {
LOG(ERR) << "Payload set twice";
TRI_ASSERT(_vpackPayloads.size() == 0);
}
}
virtual void addPayloadPreHook(bool inputIsBuffer, bool& resolveExternals, virtual void addPayloadPreHook(bool inputIsBuffer, bool& resolveExternals,
bool& skipBody) {} bool& skipBody) {}
void addPayload(VPackSlice const&, void addPayload(VPackSlice const&,

View File

@ -306,7 +306,6 @@ void HttpResponse::addPayloadPostHook(
bool resolveExternals = true, bool bodySkipped = false) { bool resolveExternals = true, bool bodySkipped = false) {
VPackSlice const* slicePtr; VPackSlice const* slicePtr;
VPackSlice tmpSlice; VPackSlice tmpSlice;
if (!bodySkipped) { if (!bodySkipped) {
// we have Probably resolved externals // we have Probably resolved externals
TRI_ASSERT(!_vpackPayloads.empty()); TRI_ASSERT(!_vpackPayloads.empty());