From 31bff28489fda028f4fa5b14faa6fc67403f6b9f Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Tue, 24 May 2016 09:43:59 +0200 Subject: [PATCH 1/9] Factor out synchronizeOneShard. --- js/server/modules/@arangodb/cluster.js | 162 +++++++++++++------------ 1 file changed, 82 insertions(+), 80 deletions(-) diff --git a/js/server/modules/@arangodb/cluster.js b/js/server/modules/@arangodb/cluster.js index b94760a837..58f2f5d39f 100644 --- a/js/server/modules/@arangodb/cluster.js +++ b/js/server/modules/@arangodb/cluster.js @@ -861,6 +861,86 @@ function cleanupCurrentCollections (plannedCollections, currentCollections, /// replicated shards) //////////////////////////////////////////////////////////////////////////////// +function synchronizeOneShard(database, shard, planId, leader) { + // synchronize this shard from the leader + // this function will throw if anything goes wrong + + const rep = require("@arangodb/replication"); + + console.info("trying to synchronize local shard '%s/%s' for central '%s/%s'", + database, + shard, + database, + planId); + try { + var ep = ArangoClusterInfo.getServerEndpoint(leader); + // First once without a read transaction: + var sy = rep.syncCollection(shard, + { endpoint: ep, incremental: true, + keepBarrier: true }); + if (sy.error) { + console.error("Could not initially synchronize shard ", shard, sy); + throw "Initial sync failed"; + } else { + if (sy.collections.length === 0 || + sy.collections[0].name !== shard) { + cancelBarrier(ep, database, sy.barrierId); + throw "Shard seems to be gone from leader!"; + } else { + var ok = false; + // Now start a read transaction to stop writes: + var lockJobId = false; + try { + lockJobId = startReadLockOnLeader(ep, database, + shard, 300); + console.debug("lockJobId:", lockJobId); + } + catch (err1) { + console.error("Exception in startReadLockOnLeader:", err1); + } + finally { + cancelBarrier(ep, database, sy.barrierId); + } + if (lockJobId !== false) { + try { + var sy2 = rep.syncCollectionFinalize( + database, shard, sy.collections[0].id, + sy.lastLogTick, { endpoint: ep }); + if (sy2.error) { + console.error("Could not synchronize shard", shard, + sy2); + ok = false; + } else { + ok = addShardFollower(ep, database, shard); + } + } + catch (err3) { + console.error("Exception in syncCollectionFinalize:", err3); + } + finally { + if (!cancelReadLockOnLeader(ep, database, + lockJobId)) { + console.error("Read lock has timed out for shard", shard); + ok = false; + } + } + } else { + console.error("lockJobId was false"); + } + if (ok) { + console.info("Synchronization worked for shard", shard); + } else { + throw "Did not work."; // just to log below in catch + } + } + } + } + catch (err2) { + console.error("synchronization of local shard '%s/%s' for central '%s/%s' failed: %s", + database, shard, database, planId, JSON.stringify(err2)); + } +} + function synchronizeLocalFollowerCollections (plannedCollections, currentCollections) { var ourselves = global.ArangoServerState.id(); @@ -870,8 +950,6 @@ function synchronizeLocalFollowerCollections (plannedCollections, var localDatabases = getLocalDatabases(); var database; - var rep = require("@arangodb/replication"); - // iterate over all matching databases for (database in plannedCollections) { if (plannedCollections.hasOwnProperty(database)) { @@ -911,84 +989,8 @@ function synchronizeLocalFollowerCollections (plannedCollections, "come back later to this shard..."); } else { if (inCurrent.servers.indexOf(ourselves) === -1) { - // we not in there - must synchronize this shard from - // the leader - console.info("trying to synchronize local shard '%s/%s' for central '%s/%s'", - database, - shard, - database, - collInfo.planId); - try { - var ep = ArangoClusterInfo.getServerEndpoint( - inCurrent.servers[0]); - // First once without a read transaction: - var sy = rep.syncCollection(shard, - { endpoint: ep, incremental: true, - keepBarrier: true }); - if (sy.error) { - console.error("Could not initially synchronize shard ", shard, sy); - } else { - if (sy.collections.length == 0 || - sy.collections[0].name != shard) { - cancelBarrier(ep, database, sy.barrierId); - throw "Shard seems to be gone from leader!"; - } else { - var ok = false; - // Now start a read transaction to stop writes: - var lockJobId = false; - try { - lockJobId = startReadLockOnLeader(ep, database, - shard, 300); - console.debug("lockJobId:", lockJobId); - } - catch (err1) { - console.error("Exception in startReadLockOnLeader:", err1); - } - finally { - cancelBarrier(ep, database, sy.barrierId); - } - if (lockJobId !== false) { - try { - var sy2 = rep.syncCollectionFinalize( - database, shard, sy.collections[0].id, - sy.lastLogTick, { endpoint: ep }); - if (sy2.error) { - console.error("Could not synchronize shard", shard, - sy2); - ok = false; - } else { - ok = addShardFollower(ep, database, shard); - } - } - catch (err3) { - console.error("Exception in syncCollectionFinalize:", err3); - } - finally { - if (!cancelReadLockOnLeader(ep, database, - lockJobId)) { - console.error("Read lock has timed out for shard", shard); - ok = false; - } - } - } else { - console.error("lockJobId was false"); - } - if (ok) { - console.info("Synchronization worked for shard", shard); - } else { - throw "Did not work."; // just to log below in catch - } - } - } - } - catch (err2) { - console.error("synchronization of local shard '%s/%s' for central '%s/%s' failed: %s", - database, - shard, - database, - collInfo.planId, - JSON.stringify(err2)); - } + synchronizeOneShard(database, shard, collInfo.planId, + inCurrent.servers[0]); } } } From e8e2d9406089338baa6eda8456befb817aae58f1 Mon Sep 17 00:00:00 2001 From: hkernbach Date: Tue, 24 May 2016 09:46:16 +0200 Subject: [PATCH 2/9] removed console log [ci skip] --- js/apps/system/_admin/aardvark/APP/aardvark.js | 1 - 1 file changed, 1 deletion(-) diff --git a/js/apps/system/_admin/aardvark/APP/aardvark.js b/js/apps/system/_admin/aardvark/APP/aardvark.js index 0a17a7f45a..31eb6b8bc4 100644 --- a/js/apps/system/_admin/aardvark/APP/aardvark.js +++ b/js/apps/system/_admin/aardvark/APP/aardvark.js @@ -79,7 +79,6 @@ router.get('/config.js', function(req, res) { && isTrustedProxy(req.remoteAddress)) { basePath = req.headers['x-script-name']; } - console.log(Object.keys(global)); res.set('content-type', 'text/javascript'); res.send("var frontendConfig = " + JSON.stringify({ "basePath": basePath, From d988736ea9dc80bf2c5cca075c971399dbe6cb54 Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Tue, 24 May 2016 09:53:05 +0200 Subject: [PATCH 3/9] Lower a logging level to DEBUG in performRequests. Errors will be reported elsewhere. --- arangod/Cluster/ClusterComm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arangod/Cluster/ClusterComm.cpp b/arangod/Cluster/ClusterComm.cpp index 3bd339c0a9..164c07a338 100644 --- a/arangod/Cluster/ClusterComm.cpp +++ b/arangod/Cluster/ClusterComm.cpp @@ -1164,7 +1164,7 @@ size_t ClusterComm::performRequests(std::vector& requests, // We only get here if the global timeout was triggered, not all // requests are marked by done! - LOG_TOPIC(ERR, logTopic) << "ClusterComm::performRequests: " + LOG_TOPIC(DEBUG, logTopic) << "ClusterComm::performRequests: " << "got timeout, this will be reported..."; // Forget about From 217ce3cc5c1dc3ee728f0e87c130564a08dc10bc Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Tue, 24 May 2016 10:26:32 +0200 Subject: [PATCH 4/9] Activate leader failure in resilience test. --- .../resilience-synchronous-repl-cluster.js | 50 ++++++++++++++++--- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/js/server/tests/resilience/resilience-synchronous-repl-cluster.js b/js/server/tests/resilience/resilience-synchronous-repl-cluster.js index 5921c78e84..089d3dad2d 100644 --- a/js/server/tests/resilience/resilience-synchronous-repl-cluster.js +++ b/js/server/tests/resilience/resilience-synchronous-repl-cluster.js @@ -114,6 +114,34 @@ function SynchronousReplicationSuite () { assertTrue(continueExternal(global.instanceInfo.arangods[pos].pid)); } +//////////////////////////////////////////////////////////////////////////////// +/// @brief fail the leader +//////////////////////////////////////////////////////////////////////////////// + + function failLeader() { + var leader = cinfo.shards[shards[0]][0]; + var endpoint = global.ArangoClusterInfo.getServerEndpoint(leader); + // Now look for instanceInfo: + var pos = _.findIndex(global.instanceInfo.arangods, + x => x.endpoint === endpoint); + assertTrue(pos >= 0); + assertTrue(suspendExternal(global.instanceInfo.arangods[pos].pid)); + } + +//////////////////////////////////////////////////////////////////////////////// +/// @brief heal the follower +//////////////////////////////////////////////////////////////////////////////// + + function healLeader() { + var leader = cinfo.shards[shards[0]][0]; + var endpoint = global.ArangoClusterInfo.getServerEndpoint(leader); + // Now look for instanceInfo: + var pos = _.findIndex(global.instanceInfo.arangods, + x => x.endpoint === endpoint); + assertTrue(pos >= 0); + assertTrue(continueExternal(global.instanceInfo.arangods[pos].pid)); + } + //////////////////////////////////////////////////////////////////////////////// /// @brief produce failure //////////////////////////////////////////////////////////////////////////////// @@ -121,9 +149,8 @@ function SynchronousReplicationSuite () { function makeFailure(failure) { if (failure.follower) { failFollower(); -/* } else { - failLeader(); // TODO: function does not exist -*/ + } else { + failLeader(); } } @@ -134,9 +161,8 @@ function SynchronousReplicationSuite () { function healFailure(failure) { if (failure.follower) { healFollower(); -/* } else { - healLeader(); // TODO: function does not exist -*/ + } else { + healLeader(); } } @@ -509,6 +535,18 @@ function SynchronousReplicationSuite () { assertTrue(waitForSynchronousReplication("_system")); }, +//////////////////////////////////////////////////////////////////////////////// +/// @brief run a standard check with failures: +//////////////////////////////////////////////////////////////////////////////// + + testBasicOperationsFailureLeader : function () { + assertTrue(waitForSynchronousReplication("_system")); + failLeader(); + runBasicOperations({}, {}); + healLeader(); + assertTrue(waitForSynchronousReplication("_system")); + }, + //////////////////////////////////////////////////////////////////////////////// /// @brief just to allow a trailing comma at the end of the last test //////////////////////////////////////////////////////////////////////////////// From c3699f3c103284c268f5ba0f8b461ad685a4b67c Mon Sep 17 00:00:00 2001 From: hkernbach Date: Tue, 24 May 2016 10:50:34 +0200 Subject: [PATCH 5/9] frontend persistent index --- .../APP/frontend/js/templates/indicesView.ejs | 26 +++++++++++++++++++ .../APP/frontend/js/views/indicesView.js | 8 ++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/js/apps/system/_admin/aardvark/APP/frontend/js/templates/indicesView.ejs b/js/apps/system/_admin/aardvark/APP/frontend/js/templates/indicesView.ejs index ca51bcc63e..09ac2140cc 100644 --- a/js/apps/system/_admin/aardvark/APP/frontend/js/templates/indicesView.ejs +++ b/js/apps/system/_admin/aardvark/APP/frontend/js/templates/indicesView.ejs @@ -112,6 +112,32 @@ + + Unique: + + + + +
+ + + +
+ + + + Sparse: + + + + +
+ + + +
+ +