Increased timeouts, added a little more helpful error messages for assertions (#9135)

2019-05-30 04:30:39 +02:00 · 2019-05-30 04:30:39 +02:00 · 90abe3ba93
parent ec0cc82bf6
commit 90abe3ba93
1 changed files with 23 additions and 26 deletions
--- a/tests/js/server/resilience/repair/repair-distribute-shards-like-spec-grey.js
+++ b/tests/js/server/resilience/repair/repair-distribute-shards-like-spec-grey.js
@ -42,11 +42,10 @@ let dbServerCount = instanceInfo.arangods.filter(arangod => {
 }).length;

 const waitForPlanEqualCurrent = function (collection) {
-  const iterations = 120;
  const waitTime = 1.0;
-  const maxTime = iterations * waitTime;
+  const maxTime = 120;

-  for (let i = 0; i < iterations; i++) {
+  for (let start = Date.now(); (Date.now() - start)/1000 < maxTime; ) {
    global.ArangoClusterInfo.flush();
    const shardDist = internal.getCollectionShardDistribution(collection._id);
    const Plan = shardDist[collection.name()].Plan;
@ -59,17 +58,15 @@ const waitForPlanEqualCurrent = function (collection) {
    wait(waitTime);
  }

-  console.error(`Collection "${collection}" failed to get plan in sync after ${maxTime} sec`);
+  console.error(`Collection "${collection}" failed to get plan in sync after ${maxTime/1000} sec`);
  return false;
 };

 const waitForReplicationFactor = function (collection) {
-  const iterations = 120;
  const waitTime = 1.0;
-  const maxTime = iterations * waitTime;
+  const maxTime = 120;

-
-  for (let i = 0; i < iterations; i++) {
+  for (let start = Date.now(); (Date.now() - start)/1000 < maxTime; ) {
    global.ArangoClusterInfo.flush();
    const ci = global.ArangoClusterInfo.getCollectionInfo(internal.db._name(), collection._id);

@ -98,7 +95,7 @@ const waitForAgencyJob = function (jobId) {
  ].map(p => `${prefix}/${p}`);

  const waitInterval = 1.0;
-  const maxWaitTime = 120;
+  const maxWaitTime = 300;

  let jobStopped = false;
  let success = false;
@ -154,7 +151,7 @@ const waitForAllAgencyJobs = function () {
  ].map(p => `${prefix}/${p}`);

  const waitInterval = 1.0;
-  const maxWaitTime = 60;
+  const maxWaitTime = 300;

  let unfinishedJobs = Infinity;
  let timeout = false;
@ -217,8 +214,8 @@ const createBrokenClusterState = function ({failOnOperation = null, withData} =
    { distributeShardsLike: protoCollection._id },
    withData);

-  expect(waitForPlanEqualCurrent(protoCollection)).to.be.true;
-  expect(waitForPlanEqualCurrent(collection)).to.be.true;
+  expect(waitForPlanEqualCurrent(protoCollection), 'Timeout while waiting for current to catch up to plan').to.be.true;
+  expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true;

  // IMPORTANT NOTE: Never do this in a real environment. Changing
  // distributeShardsLike will break your cluster!
@ -356,11 +353,11 @@ const createBrokenClusterState = function ({failOnOperation = null, withData} =
    return id;
  };

-  expect(waitForPlanEqualCurrent(collection)).to.be.true;
+  expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true;
  let jobId = postMoveShardJob(leaderDbServer, freeDbServer, true);
  let result = waitForAgencyJob(jobId);
-  expect(result).to.equal(true);
-  expect(waitForReplicationFactor(collection)).to.be.true;
+  expect(result, 'Agency moveShard job either failed, or we stopped waiting due to timeout').to.equal(true);
+  expect(waitForReplicationFactor(collection), 'Timeout while waiting for replicationFactor to be satisfied').to.be.true;
  let expected = {
    leader: dbServerNameById[freeDbServer],
    followers: protoShardInfo.followers,
@ -369,11 +366,11 @@ const createBrokenClusterState = function ({failOnOperation = null, withData} =
  expect(expected).to.deep.equal(actual,
    `Expected ${JSON.stringify(expected)}, but got ${JSON.stringify(actual)} `
  + `after moving leader ${dbServerNameById[leaderDbServer]} to ${dbServerNameById[freeDbServer]}`);
-  expect(waitForPlanEqualCurrent(collection)).to.be.true;
+  expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true;

  jobId = postMoveShardJob(followerDbServer, leaderDbServer, false);
-  result = waitForAgencyJob(jobId);
-  expect(waitForReplicationFactor(collection)).to.be.true;
+  result = waitForAgencyJob(jobId, 'Agency moveShard job either failed, or we stopped waiting due to timeout');
+  expect(waitForReplicationFactor(collection), 'Timeout while waiting for replicationFactor to be satisfied').to.be.true;
  expected = {
    leader: dbServerNameById[freeDbServer],
    followers: protoShardInfo.followers.slice(1).concat([dbServerNameById[leaderDbServer]]),
@ -385,7 +382,7 @@ const createBrokenClusterState = function ({failOnOperation = null, withData} =

  expect(result).to.equal(true);

-  expect(waitForPlanEqualCurrent(collection)).to.be.true;
+  expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true;

  // IMPORTANT NOTE: Never do this in a real environment. Changing
  // distributeShardsLike will break your cluster!
@ -395,7 +392,7 @@ const createBrokenClusterState = function ({failOnOperation = null, withData} =
  );
  global.ArangoAgency.increaseVersion("Plan/Version");

-  expect(waitForPlanEqualCurrent(collection)).to.be.true;
+  expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true;
  return {collection, colData, protoCollection, protoData, expectedCollections};
 };

@ -408,7 +405,7 @@ const waitForJob = function (postJobRes) {
  expect(jobId).to.be.a('string');

  const waitInterval = 1.0;
-  const maxWaitTime = 120;
+  const maxWaitTime = 300;

  const start = Date.now();

@ -523,8 +520,8 @@ const distributeShardsLikeSuite = (options) => {
        = createCollectionOptionallyWithData(colName,
        { distributeShardsLike: protoCollection._id }, withData);

-      expect(waitForPlanEqualCurrent(protoCollection)).to.be.true;
-      expect(waitForPlanEqualCurrent(collection)).to.be.true;
+      expect(waitForPlanEqualCurrent(protoCollection), 'Timeout while waiting for current to catch up to plan').to.be.true;
+      expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true;

      // Directly posting should generally not be used, as it is likely to timeout.
      // Setting the header "x-arango-async: store" instead is preferred.
@ -672,9 +669,9 @@ const distributeShardsLikeSuite = (options) => {

        internal.debugClearFailAt();

-        expect(waitForAllAgencyJobs());
-        expect(waitForReplicationFactor(collection)).to.be.true;
-        expect(waitForPlanEqualCurrent(collection)).to.be.true;
+        expect(waitForAllAgencyJobs(), 'Timeout while waiting for agency jobs to finish');
+        expect(waitForReplicationFactor(collection), 'Timeout while waiting for replicationFactor to be satisfied').to.be.true;
+        expect(waitForPlanEqualCurrent(collection), 'Timeout while waiting for current to catch up to plan').to.be.true;

        { // Before executing repairs, check via GET if the planned operations
          // seem right.