diff --git a/tests/js/server/resilience/repair/repair-distribute-shards-like-spec-grey.js b/tests/js/server/resilience/repair/repair-distribute-shards-like-spec-grey.js index a85ab0820a..4f341ca956 100644 --- a/tests/js/server/resilience/repair/repair-distribute-shards-like-spec-grey.js +++ b/tests/js/server/resilience/repair/repair-distribute-shards-like-spec-grey.js @@ -42,11 +42,10 @@ let dbServerCount = instanceInfo.arangods.filter(arangod => { }).length; const waitForPlanEqualCurrent = function (collection) { - const iterations = 120; const waitTime = 1.0; - const maxTime = iterations * waitTime; + const maxTime = 120; - for (let i = 0; i < iterations; i++) { + for (let start = Date.now(); (Date.now() - start)/1000 < maxTime; ) { global.ArangoClusterInfo.flush(); const shardDist = internal.getCollectionShardDistribution(collection._id); const Plan = shardDist[collection.name()].Plan; @@ -59,17 +58,15 @@ const waitForPlanEqualCurrent = function (collection) { wait(waitTime); } - console.error(`Collection "${collection}" failed to get plan in sync after ${maxTime} sec`); + console.error(`Collection "${collection}" failed to get plan in sync after ${maxTime/1000} sec`); return false; }; const waitForReplicationFactor = function (collection) { - const iterations = 120; const waitTime = 1.0; - const maxTime = iterations * waitTime; + const maxTime = 120; - - for (let i = 0; i < iterations; i++) { + for (let start = Date.now(); (Date.now() - start)/1000 < maxTime; ) { global.ArangoClusterInfo.flush(); const ci = global.ArangoClusterInfo.getCollectionInfo(internal.db._name(), collection._id); @@ -98,7 +95,7 @@ const waitForAgencyJob = function (jobId) { ].map(p => `${prefix}/${p}`); const waitInterval = 1.0; - const maxWaitTime = 120; + const maxWaitTime = 300; let jobStopped = false; let success = false; @@ -154,7 +151,7 @@ const waitForAllAgencyJobs = function () { ].map(p => `${prefix}/${p}`); const waitInterval = 1.0; - const maxWaitTime = 60; + const maxWaitTime = 300; let unfinishedJobs = Infinity; let timeout = false; @@ -217,8 +214,8 @@ const createBrokenClusterState = function ({failOnOperation = null, withData} = { distributeShardsLike: protoCollection._id }, withData); - expect(waitForPlanEqualCurrent(protoCollection)).to.be.true; - expect(waitForPlanEqualCurrent(collection)).to.be.true; + expect(waitForPlanEqualCurrent(protoCollection), 'Timeout while waiting for current to catch up to plan').to.be.true; + expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true; // IMPORTANT NOTE: Never do this in a real environment. Changing // distributeShardsLike will break your cluster! @@ -356,11 +353,11 @@ const createBrokenClusterState = function ({failOnOperation = null, withData} = return id; }; - expect(waitForPlanEqualCurrent(collection)).to.be.true; + expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true; let jobId = postMoveShardJob(leaderDbServer, freeDbServer, true); let result = waitForAgencyJob(jobId); - expect(result).to.equal(true); - expect(waitForReplicationFactor(collection)).to.be.true; + expect(result, 'Agency moveShard job either failed, or we stopped waiting due to timeout').to.equal(true); + expect(waitForReplicationFactor(collection), 'Timeout while waiting for replicationFactor to be satisfied').to.be.true; let expected = { leader: dbServerNameById[freeDbServer], followers: protoShardInfo.followers, @@ -369,11 +366,11 @@ const createBrokenClusterState = function ({failOnOperation = null, withData} = expect(expected).to.deep.equal(actual, `Expected ${JSON.stringify(expected)}, but got ${JSON.stringify(actual)} ` + `after moving leader ${dbServerNameById[leaderDbServer]} to ${dbServerNameById[freeDbServer]}`); - expect(waitForPlanEqualCurrent(collection)).to.be.true; + expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true; jobId = postMoveShardJob(followerDbServer, leaderDbServer, false); - result = waitForAgencyJob(jobId); - expect(waitForReplicationFactor(collection)).to.be.true; + result = waitForAgencyJob(jobId, 'Agency moveShard job either failed, or we stopped waiting due to timeout'); + expect(waitForReplicationFactor(collection), 'Timeout while waiting for replicationFactor to be satisfied').to.be.true; expected = { leader: dbServerNameById[freeDbServer], followers: protoShardInfo.followers.slice(1).concat([dbServerNameById[leaderDbServer]]), @@ -385,7 +382,7 @@ const createBrokenClusterState = function ({failOnOperation = null, withData} = expect(result).to.equal(true); - expect(waitForPlanEqualCurrent(collection)).to.be.true; + expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true; // IMPORTANT NOTE: Never do this in a real environment. Changing // distributeShardsLike will break your cluster! @@ -395,7 +392,7 @@ const createBrokenClusterState = function ({failOnOperation = null, withData} = ); global.ArangoAgency.increaseVersion("Plan/Version"); - expect(waitForPlanEqualCurrent(collection)).to.be.true; + expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true; return {collection, colData, protoCollection, protoData, expectedCollections}; }; @@ -408,7 +405,7 @@ const waitForJob = function (postJobRes) { expect(jobId).to.be.a('string'); const waitInterval = 1.0; - const maxWaitTime = 120; + const maxWaitTime = 300; const start = Date.now(); @@ -523,8 +520,8 @@ const distributeShardsLikeSuite = (options) => { = createCollectionOptionallyWithData(colName, { distributeShardsLike: protoCollection._id }, withData); - expect(waitForPlanEqualCurrent(protoCollection)).to.be.true; - expect(waitForPlanEqualCurrent(collection)).to.be.true; + expect(waitForPlanEqualCurrent(protoCollection), 'Timeout while waiting for current to catch up to plan').to.be.true; + expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true; // Directly posting should generally not be used, as it is likely to timeout. // Setting the header "x-arango-async: store" instead is preferred. @@ -672,9 +669,9 @@ const distributeShardsLikeSuite = (options) => { internal.debugClearFailAt(); - expect(waitForAllAgencyJobs()); - expect(waitForReplicationFactor(collection)).to.be.true; - expect(waitForPlanEqualCurrent(collection)).to.be.true; + expect(waitForAllAgencyJobs(), 'Timeout while waiting for agency jobs to finish'); + expect(waitForReplicationFactor(collection), 'Timeout while waiting for replicationFactor to be satisfied').to.be.true; + expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true; { // Before executing repairs, check via GET if the planned operations // seem right.