1
0
Fork 0

Bug fix/collection babies race timeout (#9185)

* Fixed include guard.

* Forward port of 3.4 bug-fix

* Removed lockers alltogether we are secured mutex already

* Fixed recursive lock gathering
This commit is contained in:
Michael Hackstein 2019-06-13 19:11:24 +02:00 committed by Frank Celler
parent cc125b377c
commit 2c78e2471b
4 changed files with 39 additions and 26 deletions

View File

@ -1,6 +1,9 @@
devel
-----
* Speed up collection creation process in cluster, if not all agency callbacks are
delivered successfully.
* increased performance of document inserts, by reducing the number of checks in unique / primary indexes
* fixed a callback function in the web UI where the variable `this` was out of scope.

View File

@ -125,7 +125,7 @@ bool AgencyCallback::execute(std::shared_ptr<VPackBuilder> newData) {
return result;
}
void AgencyCallback::executeByCallbackOrTimeout(double maxTimeout) {
bool AgencyCallback::executeByCallbackOrTimeout(double maxTimeout) {
// One needs to acquire the mutex of the condition variable
// before entering this function!
if (!_cv.wait(static_cast<uint64_t>(maxTimeout * 1000000.0)) &&
@ -134,5 +134,7 @@ void AgencyCallback::executeByCallbackOrTimeout(double maxTimeout) {
<< "Waiting done and nothing happended. Refetching to be sure";
// mop: watches have not triggered during our sleep...recheck to be sure
refetchAndUpdate(false, true); // Force a check
return true;
}
return false;
}

View File

@ -112,9 +112,12 @@ class AgencyCallback {
//////////////////////////////////////////////////////////////////////////////
/// @brief wait until a callback is received or a timeout has happened
///
/// @return true => if we got woken up after maxTimeout
/// false => if someone else ringed the condition variable
//////////////////////////////////////////////////////////////////////////////
void executeByCallbackOrTimeout(double);
bool executeByCallbackOrTimeout(double);
//////////////////////////////////////////////////////////////////////////////
/// @brief private members

View File

@ -1977,13 +1977,9 @@ Result ClusterInfo::createCollectionsCoordinator(std::string const& databaseName
if (nrDone->load(std::memory_order_acquire) == infos.size()) {
{
// We need to lock all condition variables
std::vector<::arangodb::basics::ConditionLocker> lockers;
for (auto& cb : agencyCallbacks) {
CONDITION_LOCKER(locker, cb->_cv);
}
// We do not need to lock all condition variables
// we are save by cacheMutex
cbGuard.fire();
// After the guard is done we can release the lockers
}
// Now we need to remove TTL + the IsBuilding flag in Agency
opers.clear();
@ -2009,13 +2005,9 @@ Result ClusterInfo::createCollectionsCoordinator(std::string const& databaseName
}
if (tmpRes > TRI_ERROR_NO_ERROR) {
{
// We need to lock all condition variables
std::vector<::arangodb::basics::ConditionLocker> lockers;
for (auto& cb : agencyCallbacks) {
CONDITION_LOCKER(locker, cb->_cv);
}
// We do not need to lock all condition variables
// we are save by cacheMutex
cbGuard.fire();
// After the guard is done we can release the lockers
}
// report error
@ -2047,9 +2039,22 @@ Result ClusterInfo::createCollectionsCoordinator(std::string const& databaseName
TRI_ASSERT(agencyCallbacks.size() == infos.size());
for (size_t i = 0; i < infos.size(); ++i) {
if (infos[i].state == ClusterCollectionCreationInfo::INIT) {
bool wokenUp = false;
{
// This one has not responded, wait for it.
CONDITION_LOCKER(locker, agencyCallbacks[i]->_cv);
agencyCallbacks[i]->executeByCallbackOrTimeout(interval);
wokenUp = agencyCallbacks[i]->executeByCallbackOrTimeout(interval);
}
if (wokenUp) {
++i;
// We got woken up by waittime, not by callback.
// Let us check if we skipped other callbacks as well
for (; i < infos.size(); ++i) {
if (infos[i].state == ClusterCollectionCreationInfo::INIT) {
agencyCallbacks[i]->refetchAndUpdate(true, false);
}
}
}
break;
}
}