mirror of https://gitee.com/bigwinds/arangodb
Fix index creation in cluster. (#7440)
* Fix index creation in cluster. Simplify and correct error handling logic in ensureIndexCoordinator. * After index creation, wait until index appears. We wait until the Supervision has removed the isBuilding flag and the coordinator has reloaded the Plan. * More index handling fixes. * Directly remove isBuilding in ensureIndexCoordinator (again). * Fix catch tests by holding mutex shorter. * Better mutex handling in ClusterInfo.
This commit is contained in:
parent
683d45bd68
commit
ae29e5d2ba
|
@ -1860,12 +1860,11 @@ int ClusterInfo::createCollectionCoordinator(std::string const& databaseName,
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
CONDITION_LOCKER(locker, agencyCallback->_cv);
|
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
|
||||||
int tmpRes = dbServerResult->load(std::memory_order_acquire);
|
int tmpRes = dbServerResult->load(std::memory_order_acquire);
|
||||||
if (tmpRes >= 0) {
|
if (tmpRes >= 0) {
|
||||||
|
CONDITION_LOCKER(locker, agencyCallback->_cv);
|
||||||
cbGuard.fire(); // unregister cb before accessing errMsg
|
cbGuard.fire(); // unregister cb before accessing errMsg
|
||||||
errorMsg = *errMsg;
|
errorMsg = *errMsg;
|
||||||
loadCurrent();
|
loadCurrent();
|
||||||
|
@ -1909,7 +1908,10 @@ int ClusterInfo::createCollectionCoordinator(std::string const& databaseName,
|
||||||
return setErrormsg(TRI_ERROR_SHUTTING_DOWN, errorMsg);
|
return setErrormsg(TRI_ERROR_SHUTTING_DOWN, errorMsg);
|
||||||
}
|
}
|
||||||
|
|
||||||
agencyCallback->executeByCallbackOrTimeout(interval);
|
{
|
||||||
|
CONDITION_LOCKER(locker, agencyCallback->_cv);
|
||||||
|
agencyCallback->executeByCallbackOrTimeout(interval);
|
||||||
|
}
|
||||||
if (!application_features::ApplicationServer::isRetryOK()) {
|
if (!application_features::ApplicationServer::isRetryOK()) {
|
||||||
return setErrormsg(TRI_ERROR_CLUSTER_TIMEOUT, errorMsg);
|
return setErrormsg(TRI_ERROR_CLUSTER_TIMEOUT, errorMsg);
|
||||||
}
|
}
|
||||||
|
@ -2458,10 +2460,6 @@ int ClusterInfo::ensureIndexCoordinator(
|
||||||
VPackSlice const& slice, bool create, VPackBuilder& resultBuilder,
|
VPackSlice const& slice, bool create, VPackBuilder& resultBuilder,
|
||||||
std::string& errorMsg, double timeout) {
|
std::string& errorMsg, double timeout) {
|
||||||
|
|
||||||
// The timepoint at which we timeout
|
|
||||||
using namespace std::chrono;
|
|
||||||
auto const endTime = steady_clock::now() + duration<double>(timeout);
|
|
||||||
|
|
||||||
// check index id
|
// check index id
|
||||||
uint64_t iid = 0;
|
uint64_t iid = 0;
|
||||||
|
|
||||||
|
@ -2482,9 +2480,12 @@ int ClusterInfo::ensureIndexCoordinator(
|
||||||
// Keep trying for 2 minutes, if it's preconditions, which are stopping us
|
// Keep trying for 2 minutes, if it's preconditions, which are stopping us
|
||||||
do {
|
do {
|
||||||
resultBuilder.clear();
|
resultBuilder.clear();
|
||||||
errorCode = ensureIndexCoordinatorWithoutRollback(
|
errorCode = ensureIndexCoordinatorInner(
|
||||||
databaseName, collectionID, idString, slice, create, resultBuilder,
|
databaseName, collectionID, idString, slice, create, resultBuilder,
|
||||||
errorMsg, timeout);
|
errorMsg, timeout);
|
||||||
|
// Note that this function sets the errorMsg unless it is precondition
|
||||||
|
// failed, in which case we retry, if this times out, we need to set
|
||||||
|
// it ourselves, otherwise all is done!
|
||||||
|
|
||||||
if (errorCode == TRI_ERROR_HTTP_PRECONDITION_FAILED) {
|
if (errorCode == TRI_ERROR_HTTP_PRECONDITION_FAILED) {
|
||||||
auto diff = std::chrono::steady_clock::now() - start;
|
auto diff = std::chrono::steady_clock::now() - start;
|
||||||
|
@ -2493,144 +2494,56 @@ int ClusterInfo::ensureIndexCoordinator(
|
||||||
std::this_thread::sleep_for(std::chrono::steady_clock::duration(wt));
|
std::this_thread::sleep_for(std::chrono::steady_clock::duration(wt));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
errorCode
|
||||||
|
= setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_CREATE_INDEX_IN_PLAN,
|
||||||
|
errorMsg);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
} while(true);
|
} while(true);
|
||||||
} catch (basics::Exception const& ex) {
|
} catch (basics::Exception const& ex) {
|
||||||
errorCode = ex.code();
|
errorCode = ex.code();
|
||||||
|
setErrormsg(errorCode, errorMsg);
|
||||||
|
errorMsg += std::string(", exception: ") + ex.what();
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
errorCode = TRI_ERROR_INTERNAL;
|
errorCode = TRI_ERROR_INTERNAL;
|
||||||
|
setErrormsg(errorCode, errorMsg);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (application_features::ApplicationServer::isStopping()) {
|
// We get here in any case eventually, regardless of whether we have
|
||||||
return errorCode;
|
// - succeeded with lookup or index creation
|
||||||
|
// - failed because of a timeout and rollback
|
||||||
|
// - some other error
|
||||||
|
// There is nothing more to do here.
|
||||||
|
|
||||||
|
if (!application_features::ApplicationServer::isStopping()) {
|
||||||
|
loadPlan();
|
||||||
}
|
}
|
||||||
loadPlan();
|
return errorCode;
|
||||||
|
|
||||||
std::string const indexPath =
|
|
||||||
"Plan/Collections/" + databaseName + "/" + collectionID + "/indexes";
|
|
||||||
|
|
||||||
auto const resultSlice = resultBuilder.slice();
|
|
||||||
VPackBuilder oldPlanIndex;
|
|
||||||
{ VPackObjectBuilder b(&oldPlanIndex);
|
|
||||||
for (auto const& entry : VPackObjectIterator(resultSlice)) {
|
|
||||||
auto const key = entry.key.copyString();
|
|
||||||
if (key != "isNewlyCreated") {
|
|
||||||
oldPlanIndex.add(entry.key.copyString(), entry.value);
|
|
||||||
}}}
|
|
||||||
|
|
||||||
std::uint64_t sleepFor = 50;
|
|
||||||
if (errorCode == TRI_ERROR_NO_ERROR) { // Success so far
|
|
||||||
|
|
||||||
// Smart collection, we're done.
|
|
||||||
auto const resultSlice = resultBuilder.slice();
|
|
||||||
if (resultSlice.hasKey("isSmart") &&
|
|
||||||
resultSlice.get("isSmart").isBoolean() &&
|
|
||||||
resultSlice.get("isSmart").getBoolean()) {
|
|
||||||
return TRI_ERROR_NO_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Arangosearch we're done
|
|
||||||
if (slice.get(
|
|
||||||
arangodb::StaticStrings::IndexType).isEqualString("arangosearch")) {
|
|
||||||
return TRI_ERROR_NO_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string idStr;
|
|
||||||
if (resultSlice.hasKey("id")) {
|
|
||||||
idStr = resultSlice.get("id").copyString();
|
|
||||||
} else {
|
|
||||||
LOG_TOPIC(ERR, Logger::CLUSTER)
|
|
||||||
<< "Missing 'id' field in index creation result";
|
|
||||||
return TRI_ERROR_INTERNAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// isBuilding key to be removed, when index appears in all shards in Current
|
|
||||||
if (resultSlice.hasKey("isBuilding")) {
|
|
||||||
|
|
||||||
VPackBuilder newPlanIndex;
|
|
||||||
{ VPackObjectBuilder o(&newPlanIndex);
|
|
||||||
for (auto const& entry : VPackObjectIterator(resultSlice)) {
|
|
||||||
auto const key = entry.key.copyString();
|
|
||||||
if (key != "isBuilding" && key != "isNewlyCreated") {
|
|
||||||
newPlanIndex.add(entry.key.copyString(), entry.value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove isBuilding from Plan
|
|
||||||
AgencyWriteTransaction trx({
|
|
||||||
AgencyOperation(
|
|
||||||
indexPath, AgencyValueOperationType::REPLACE,
|
|
||||||
newPlanIndex.slice(), oldPlanIndex.slice()),
|
|
||||||
AgencyOperation(
|
|
||||||
"Plan/Version", AgencySimpleOperationType::INCREMENT_OP)});
|
|
||||||
sleepFor = 50;
|
|
||||||
while (true) {
|
|
||||||
if (_agency.sendTransactionWithFailover(trx, 0.0).successful()) {
|
|
||||||
loadPlan();
|
|
||||||
return TRI_ERROR_NO_ERROR;
|
|
||||||
}
|
|
||||||
if (steady_clock::now() > endTime) {
|
|
||||||
errorMsg = "Timed out while trying to update Plan record for the index.";
|
|
||||||
// It's fine to leave here in hopes that
|
|
||||||
// the supervision will handle this.
|
|
||||||
return TRI_ERROR_CLUSTER_TIMEOUT;
|
|
||||||
}
|
|
||||||
if (sleepFor <= 2500) {
|
|
||||||
sleepFor*=2;
|
|
||||||
}
|
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds());
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
// That's odd isBuilding key has gone already. Supervision was quicker?
|
|
||||||
loadPlan();
|
|
||||||
return TRI_ERROR_NO_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// At this time the index creation has failed and we want to roll back
|
|
||||||
// the plan entry
|
|
||||||
AgencyWriteTransaction trx(
|
|
||||||
std::vector<AgencyOperation>
|
|
||||||
{ AgencyOperation(
|
|
||||||
indexPath, AgencyValueOperationType::ERASE, oldPlanIndex.slice()),
|
|
||||||
AgencyOperation(
|
|
||||||
"Plan/Version", AgencySimpleOperationType::INCREMENT_OP)});
|
|
||||||
|
|
||||||
setErrormsg(errorCode, errorMsg);
|
|
||||||
|
|
||||||
sleepFor = 50;
|
|
||||||
while (true) {
|
|
||||||
AgencyCommResult update =
|
|
||||||
_agency.sendTransactionWithFailover(trx, 0.0);
|
|
||||||
if (update.successful()) {
|
|
||||||
loadPlan();
|
|
||||||
return errorCode;
|
|
||||||
}
|
|
||||||
if (steady_clock::now() > endTime) {
|
|
||||||
LOG_TOPIC(ERR, Logger::CLUSTER)
|
|
||||||
<< "Couldn't roll back index creation of " << idString << ". Database: "
|
|
||||||
<< databaseName << ", Collection " << collectionID;
|
|
||||||
errorMsg = "Timed out while trying to report index creation failure";
|
|
||||||
return TRI_ERROR_CLUSTER_TIMEOUT;
|
|
||||||
}
|
|
||||||
if (sleepFor <= 2500) {
|
|
||||||
sleepFor*=2;
|
|
||||||
}
|
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(sleepFor));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int ClusterInfo::ensureIndexCoordinatorWithoutRollback(
|
// The following function does the actual work of index creation: Create
|
||||||
|
// in Plan, watch Current until all dbservers for all shards have done their
|
||||||
|
// bit. If this goes wrong with a timeout, the creation operation is rolled
|
||||||
|
// back. If the `create` flag is false, this is actually a lookup operation.
|
||||||
|
// In any case, no rollback has to happen in the calling function
|
||||||
|
// ClusterInfo::ensureIndexCoordinator. Note that this method here
|
||||||
|
// sets the `isBuilding` attribute to `true`, which leads to the fact
|
||||||
|
// that the index is not yet used by queries. There is code in the
|
||||||
|
// Agency Supervision which deletes this flag once everything has been
|
||||||
|
// built successfully. This is a more robust and self-repairing solution
|
||||||
|
// than if we would take out the `isBuilding` here, since it survives a
|
||||||
|
// coordinator crash and failover operations.
|
||||||
|
// Finally note that the retry loop for the case of a failed precondition
|
||||||
|
// is outside this function here in `ensureIndexCoordinator`.
|
||||||
|
|
||||||
|
int ClusterInfo::ensureIndexCoordinatorInner(
|
||||||
std::string const& databaseName, std::string const& collectionID,
|
std::string const& databaseName, std::string const& collectionID,
|
||||||
std::string const& idString, VPackSlice const& slice, bool create,
|
std::string const& idString, VPackSlice const& slice, bool create,
|
||||||
VPackBuilder& resultBuilder, std::string& errorMsg, double timeout) {
|
VPackBuilder& resultBuilder, std::string& errorMsg, double timeout) {
|
||||||
AgencyComm ac;
|
AgencyComm ac;
|
||||||
|
|
||||||
|
using namespace std::chrono;
|
||||||
|
|
||||||
double const realTimeout = getTimeout(timeout);
|
double const realTimeout = getTimeout(timeout);
|
||||||
double const endTime = TRI_microtime() + realTimeout;
|
double const endTime = TRI_microtime() + realTimeout;
|
||||||
double const interval = getPollInterval();
|
double const interval = getPollInterval();
|
||||||
|
@ -2643,7 +2556,7 @@ int ClusterInfo::ensureIndexCoordinatorWithoutRollback(
|
||||||
|
|
||||||
AgencyCommResult previous = ac.getValues(planCollKey);
|
AgencyCommResult previous = ac.getValues(planCollKey);
|
||||||
if (!previous.successful()) {
|
if (!previous.successful()) {
|
||||||
return TRI_ERROR_CLUSTER_READING_PLAN_AGENCY;
|
return setErrormsg(TRI_ERROR_CLUSTER_READING_PLAN_AGENCY, errorMsg);
|
||||||
}
|
}
|
||||||
|
|
||||||
velocypack::Slice collection = previous.slice()[0].get(
|
velocypack::Slice collection = previous.slice()[0].get(
|
||||||
|
@ -2712,9 +2625,11 @@ int ClusterInfo::ensureIndexCoordinatorWithoutRollback(
|
||||||
|
|
||||||
// check for errors
|
// check for errors
|
||||||
if (hasError(v)) {
|
if (hasError(v)) {
|
||||||
std::string errorMsg =
|
// Note that this closure runs with the mutex in the condition
|
||||||
extractErrorMessage(shard.key.copyString(), v);
|
// variable of the agency callback, which protects writing
|
||||||
errorMsg = "Error during index creation: " + errorMsg;
|
// to *errMsg:
|
||||||
|
*errMsg = extractErrorMessage(shard.key.copyString(), v);
|
||||||
|
*errMsg = "Error during index creation: " + *errMsg;
|
||||||
// Returns the specific error number if set, or the general
|
// Returns the specific error number if set, or the general
|
||||||
// error otherwise
|
// error otherwise
|
||||||
int errNum = arangodb::basics::VelocyPackHelper::readNumericValue<int>(
|
int errNum = arangodb::basics::VelocyPackHelper::readNumericValue<int>(
|
||||||
|
@ -2731,7 +2646,6 @@ int ClusterInfo::ensureIndexCoordinatorWithoutRollback(
|
||||||
|
|
||||||
if (found == (size_t)numberOfShards) {
|
if (found == (size_t)numberOfShards) {
|
||||||
dbServerResult->store(setErrormsg(TRI_ERROR_NO_ERROR, *errMsg), std::memory_order_release);
|
dbServerResult->store(setErrormsg(TRI_ERROR_NO_ERROR, *errMsg), std::memory_order_release);
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -2783,6 +2697,7 @@ int ClusterInfo::ensureIndexCoordinatorWithoutRollback(
|
||||||
if (!result.successful()) {
|
if (!result.successful()) {
|
||||||
if (result.httpCode() ==
|
if (result.httpCode() ==
|
||||||
(int)arangodb::rest::ResponseCode::PRECONDITION_FAILED) {
|
(int)arangodb::rest::ResponseCode::PRECONDITION_FAILED) {
|
||||||
|
// Retry loop is outside!
|
||||||
return TRI_ERROR_HTTP_PRECONDITION_FAILED;
|
return TRI_ERROR_HTTP_PRECONDITION_FAILED;
|
||||||
} else {
|
} else {
|
||||||
errorMsg += " Failed to execute ";
|
errorMsg += " Failed to execute ";
|
||||||
|
@ -2794,6 +2709,10 @@ int ClusterInfo::ensureIndexCoordinatorWithoutRollback(
|
||||||
return TRI_ERROR_CLUSTER_COULD_NOT_CREATE_INDEX_IN_PLAN;
|
return TRI_ERROR_CLUSTER_COULD_NOT_CREATE_INDEX_IN_PLAN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// From here on we want to roll back the index creation if we run into
|
||||||
|
// the timeout. If this coordinator crashes, the worst that can happen
|
||||||
|
// is that the index stays in some state. In most cases, it will converge
|
||||||
|
// against the planned state.
|
||||||
loadPlan();
|
loadPlan();
|
||||||
|
|
||||||
if (numberOfShards == 0) { // smart "dummy" collection has no shards
|
if (numberOfShards == 0) { // smart "dummy" collection has no shards
|
||||||
|
@ -2804,29 +2723,119 @@ int ClusterInfo::ensureIndexCoordinatorWithoutRollback(
|
||||||
resultBuilder.add("isSmart", VPackValue(true));
|
resultBuilder.add("isSmart", VPackValue(true));
|
||||||
}
|
}
|
||||||
loadCurrent();
|
loadCurrent();
|
||||||
return TRI_ERROR_NO_ERROR;
|
return setErrormsg(TRI_ERROR_NO_ERROR, errorMsg);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
CONDITION_LOCKER(locker, agencyCallback->_cv);
|
|
||||||
|
|
||||||
while (!application_features::ApplicationServer::isStopping()) {
|
while (!application_features::ApplicationServer::isStopping()) {
|
||||||
int tmpRes = dbServerResult->load(std::memory_order_acquire);
|
int tmpRes = dbServerResult->load(std::memory_order_acquire);
|
||||||
if (tmpRes >= 0) {
|
if (tmpRes == 0) {
|
||||||
cbGuard.fire(); // unregister cb before accessing errMsg
|
// Finally, in case all is good, remove the `isBuilding` flag
|
||||||
loadCurrent();
|
// check that the index has appeared. Note that we have to have
|
||||||
{
|
// a precondition since the collection could have been deleted
|
||||||
// Copy over all elements in slice.
|
// in the meantime:
|
||||||
VPackObjectBuilder b(&resultBuilder);
|
VPackBuilder finishedPlanIndex;
|
||||||
resultBuilder.add(VPackObjectIterator(newIndexBuilder.slice()));
|
{ VPackObjectBuilder o(&finishedPlanIndex);
|
||||||
resultBuilder.add("isNewlyCreated", VPackValue(true));
|
for (auto const& entry : VPackObjectIterator(newIndexBuilder.slice())) {
|
||||||
|
auto const key = entry.key.copyString();
|
||||||
|
if (key != "isBuilding" && key != "isNewlyCreated") {
|
||||||
|
finishedPlanIndex.add(entry.key.copyString(), entry.value);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
errorMsg = *errMsg;
|
AgencyWriteTransaction trx(
|
||||||
return tmpRes;
|
{AgencyOperation(
|
||||||
|
planIndexesKey, AgencyValueOperationType::REPLACE,
|
||||||
|
finishedPlanIndex.slice(), newIndexBuilder.slice()),
|
||||||
|
AgencyOperation(
|
||||||
|
"Plan/Version", AgencySimpleOperationType::INCREMENT_OP)},
|
||||||
|
AgencyPrecondition(planIndexesKey, AgencyPrecondition::Type::EMPTY,
|
||||||
|
false));
|
||||||
|
TRI_idx_iid_t indexId = arangodb::basics::StringUtils::uint64(
|
||||||
|
newIndexBuilder.slice().get("id").copyString());
|
||||||
|
if (!_agency.sendTransactionWithFailover(trx, 0.0).successful()) {
|
||||||
|
// We just log the problem and move on, the Supervision will repair
|
||||||
|
// things in due course:
|
||||||
|
LOG_TOPIC(INFO, Logger::CLUSTER)
|
||||||
|
<< "Could not remove isBuilding flag in new index " << indexId
|
||||||
|
<< ", this will be repaired automatically.";
|
||||||
|
}
|
||||||
|
loadPlan();
|
||||||
|
// Finally check if it has appeared, if not, we take another turn,
|
||||||
|
// which does not do any harm:
|
||||||
|
auto coll = getCollection(databaseName, collectionID);
|
||||||
|
auto indexes = coll->getIndexes();
|
||||||
|
if (std::any_of(indexes.begin(), indexes.end(),
|
||||||
|
[indexId](std::shared_ptr<arangodb::Index>& index) -> bool {
|
||||||
|
return indexId == index->id();
|
||||||
|
})) {
|
||||||
|
cbGuard.fire(); // unregister cb before accessing errMsg
|
||||||
|
loadCurrent();
|
||||||
|
{
|
||||||
|
// Copy over all elements in slice.
|
||||||
|
VPackObjectBuilder b(&resultBuilder);
|
||||||
|
resultBuilder.add(VPackObjectIterator(finishedPlanIndex.slice()));
|
||||||
|
resultBuilder.add("isNewlyCreated", VPackValue(true));
|
||||||
|
}
|
||||||
|
// The mutex in the condition variable protects the access to
|
||||||
|
// *errMsg:
|
||||||
|
CONDITION_LOCKER(locker, agencyCallback->_cv);
|
||||||
|
errorMsg = *errMsg;
|
||||||
|
return tmpRes;
|
||||||
|
}
|
||||||
|
LOG_TOPIC(DEBUG, Logger::CLUSTER)
|
||||||
|
<< "Index " << indexId
|
||||||
|
<< " is complete, waiting for Supervision to remove isBuilding flag.";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (TRI_microtime() > endTime) {
|
if (tmpRes > 0 || TRI_microtime() > endTime) {
|
||||||
return setErrormsg(TRI_ERROR_CLUSTER_TIMEOUT, errorMsg);
|
// At this time the index creation has failed and we want to
|
||||||
|
// roll back the plan entry, provided the collection still exists:
|
||||||
|
AgencyWriteTransaction trx(
|
||||||
|
std::vector<AgencyOperation>(
|
||||||
|
{ AgencyOperation(
|
||||||
|
planIndexesKey, AgencyValueOperationType::ERASE,
|
||||||
|
newIndexBuilder.slice()),
|
||||||
|
AgencyOperation(
|
||||||
|
"Plan/Version", AgencySimpleOperationType::INCREMENT_OP)}),
|
||||||
|
AgencyPrecondition(planCollKey,
|
||||||
|
AgencyPrecondition::Type::EMPTY, false) );
|
||||||
|
|
||||||
|
int sleepFor = 50;
|
||||||
|
auto rollbackEndTime = steady_clock::now() + std::chrono::seconds(10);
|
||||||
|
while (true) {
|
||||||
|
AgencyCommResult update =
|
||||||
|
_agency.sendTransactionWithFailover(trx, 0.0);
|
||||||
|
if (update.successful()) {
|
||||||
|
loadPlan();
|
||||||
|
if (tmpRes < 0) { // timeout
|
||||||
|
errorMsg = "Index could not be created within timeout, giving up and rolling back index creation.";
|
||||||
|
return TRI_ERROR_CLUSTER_TIMEOUT;
|
||||||
|
}
|
||||||
|
return tmpRes;
|
||||||
|
}
|
||||||
|
if (update._statusCode == TRI_ERROR_HTTP_PRECONDITION_FAILED) {
|
||||||
|
// Collection was removed, let's break here and report outside
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (steady_clock::now() > rollbackEndTime) {
|
||||||
|
LOG_TOPIC(ERR, Logger::CLUSTER)
|
||||||
|
<< "Couldn't roll back index creation of " << idString
|
||||||
|
<< ". Database: " << databaseName << ", Collection "
|
||||||
|
<< collectionID;
|
||||||
|
if (tmpRes < 0) { // timeout
|
||||||
|
errorMsg = "Timed out while trying to roll back index creation failure";
|
||||||
|
return TRI_ERROR_CLUSTER_TIMEOUT;
|
||||||
|
}
|
||||||
|
return tmpRes;
|
||||||
|
}
|
||||||
|
if (sleepFor <= 2500) {
|
||||||
|
sleepFor*=2;
|
||||||
|
}
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(sleepFor));
|
||||||
|
}
|
||||||
|
// We only get here if the collection was dropped just in the moment
|
||||||
|
// when we wanted to roll back the index creation.
|
||||||
}
|
}
|
||||||
|
|
||||||
auto c = getCollection(databaseName, collectionID);
|
auto c = getCollection(databaseName, collectionID);
|
||||||
|
@ -2835,7 +2844,10 @@ int ClusterInfo::ensureIndexCoordinatorWithoutRollback(
|
||||||
return TRI_ERROR_ARANGO_DATA_SOURCE_NOT_FOUND;
|
return TRI_ERROR_ARANGO_DATA_SOURCE_NOT_FOUND;
|
||||||
}
|
}
|
||||||
|
|
||||||
agencyCallback->executeByCallbackOrTimeout(interval);
|
{
|
||||||
|
CONDITION_LOCKER(locker, agencyCallback->_cv);
|
||||||
|
agencyCallback->executeByCallbackOrTimeout(interval);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3728,4 +3740,4 @@ arangodb::Result ClusterInfo::getShardServers(
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
// --SECTION-- END-OF-FILE
|
// --SECTION-- END-OF-FILE
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
|
|
|
@ -659,7 +659,7 @@ class ClusterInfo {
|
||||||
/// @brief ensure an index in coordinator.
|
/// @brief ensure an index in coordinator.
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
int ensureIndexCoordinatorWithoutRollback(
|
int ensureIndexCoordinatorInner(
|
||||||
std::string const& databaseName, std::string const& collectionID,
|
std::string const& databaseName, std::string const& collectionID,
|
||||||
std::string const& idSlice, arangodb::velocypack::Slice const& slice,
|
std::string const& idSlice, arangodb::velocypack::Slice const& slice,
|
||||||
bool create, arangodb::velocypack::Builder& resultBuilder,
|
bool create, arangodb::velocypack::Builder& resultBuilder,
|
||||||
|
|
Loading…
Reference in New Issue