1
0
Fork 0

Bug fix/distribute shards like (#4415)

This commit is contained in:
Michael Hackstein 2018-01-29 13:07:06 +01:00
parent 399d2e67f7
commit 79a80558e1
7 changed files with 422 additions and 252 deletions

View File

@ -1,6 +1,14 @@
v3.3.4 (XXXX-XX-XX)
-------------------
* fix internal issue 1770: collection creation using distributeShardsLike yields
errors and did not distribute shards correctly in the following cases:
1. If numberOfShards * replicationFactor % nrDBServers != 0
(shards * replication is not divisible by DBServers).
2. If there was failover / move shard case on the leading collection
and creating the follower collection afterwards.
* fix timeout issues in replication client expiration
* added missing edge filter to neighbors-only traversals
@ -32,7 +40,6 @@ v3.3.4 (XXXX-XX-XX)
* fixed issue #4395: If your foxx app includes an `APP` folder it got accidently removed by selfhealing
this is not the case anymore.
v3.3.3 (2018-01-16)
-------------------

View File

@ -143,15 +143,18 @@ to the [naming conventions](../NamingConventions/README.md).
servers holding copies take over, usually without an error being
reported.
- *distributeShardsLike* distribute the shards of this collection
cloning the shard distribution of another.
When using the *Enterprise* version of ArangoDB the replicationFactor
may be set to "satellite" making the collection locally joinable
on every database server. This reduces the number of network hops
dramatically when using joins in AQL at the costs of reduced write
performance on these collections.
- *distributeShardsLike* distribute the shards of this collection
cloning the shard distribution of another. If this value is set
it will copy *replicationFactor* and *numberOfShards* from the
other collection, the attributes in this collection will be
ignored and can be ommited.
`db._create(collection-name, properties, type)`

View File

@ -1142,14 +1142,6 @@ int ClusterInfo::createCollectionCoordinator(std::string const& databaseName,
std::string const name =
arangodb::basics::VelocyPackHelper::getStringValue(json, "name", "");
std::shared_ptr<ShardMap> otherCidShardMap = nullptr;
if (json.hasKey("distributeShardsLike")) {
auto const otherCidString = json.get("distributeShardsLike").copyString();
if (!otherCidString.empty()) {
otherCidShardMap = getCollection(databaseName, otherCidString)->shardIds();
}
}
{
// check if a collection with the same name is already planned
loadPlan();
@ -1259,23 +1251,24 @@ int ClusterInfo::createCollectionCoordinator(std::string const& databaseName,
_agencyCallbackRegistry->registerCallback(agencyCallback);
TRI_DEFER(_agencyCallbackRegistry->unregisterCallback(agencyCallback));
VPackBuilder builder;
builder.add(json);
std::vector<AgencyOperation> opers (
{ AgencyOperation("Plan/Collections/" + databaseName + "/" + collectionID,
AgencyValueOperationType::SET, builder.slice()),
AgencyValueOperationType::SET, json),
AgencyOperation("Plan/Version", AgencySimpleOperationType::INCREMENT_OP)});
std::vector<AgencyPrecondition> precs;
// Any of the shards locked?
if (otherCidShardMap != nullptr) {
for (auto const& shard : *otherCidShardMap) {
precs.emplace_back(
AgencyPrecondition("Supervision/Shards/" + shard.first,
AgencyPrecondition::Type::EMPTY, true));
std::shared_ptr<ShardMap> otherCidShardMap = nullptr;
if (json.hasKey("distributeShardsLike")) {
auto const otherCidString = json.get("distributeShardsLike").copyString();
if (!otherCidString.empty()) {
otherCidShardMap = getCollection(databaseName, otherCidString)->shardIds();
// Any of the shards locked?
for (auto const& shard : *otherCidShardMap) {
precs.emplace_back(
AgencyPrecondition("Supervision/Shards/" + shard.first,
AgencyPrecondition::Type::EMPTY, true));
}
}
}
@ -2915,3 +2908,7 @@ std::unordered_map<ServerID, std::string> ClusterInfo::getServerAliases() {
}
return ret;
}
// -----------------------------------------------------------------------------
// --SECTION-- END-OF-FILE
// -----------------------------------------------------------------------------

View File

@ -465,6 +465,138 @@ static void collectResultsFromAllShards(
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief compute a shard distribution for a new collection, the list
/// dbServers must be a list of DBserver ids to distribute across.
/// If this list is empty, the complete current list of DBservers is
/// fetched from ClusterInfo and with random_shuffle to mix it up.
////////////////////////////////////////////////////////////////////////////////
static std::shared_ptr<std::unordered_map<std::string, std::vector<std::string>>> DistributeShardsEvenly(
ClusterInfo* ci,
uint64_t numberOfShards,
uint64_t replicationFactor,
std::vector<std::string>& dbServers,
bool warnAboutReplicationFactor) {
auto shards = std::make_shared<std::unordered_map<std::string, std::vector<std::string>>>();
ci->loadCurrentDBServers();
if (dbServers.size() == 0) {
dbServers = ci->getCurrentDBServers();
if (dbServers.empty()) {
return shards;
}
random_shuffle(dbServers.begin(), dbServers.end());
}
// mop: distribute satellite collections on all servers
if (replicationFactor == 0) {
replicationFactor = dbServers.size();
}
// fetch a unique id for each shard to create
uint64_t const id = ci->uniqid(numberOfShards);
size_t leaderIndex = 0;
size_t followerIndex = 0;
for (uint64_t i = 0; i < numberOfShards; ++i) {
// determine responsible server(s)
std::vector<std::string> serverIds;
for (uint64_t j = 0; j < replicationFactor; ++j) {
if (j >= dbServers.size()) {
if (warnAboutReplicationFactor) {
LOG_TOPIC(WARN, Logger::CLUSTER)
<< "createCollectionCoordinator: replicationFactor is "
<< "too large for the number of DBservers";
}
break;
}
std::string candidate;
// mop: leader
if (serverIds.size() == 0) {
candidate = dbServers[leaderIndex++];
if (leaderIndex >= dbServers.size()) {
leaderIndex = 0;
}
} else {
do {
candidate = dbServers[followerIndex++];
if (followerIndex >= dbServers.size()) {
followerIndex = 0;
}
} while (candidate == serverIds[0]); // mop: ignore leader
}
serverIds.push_back(candidate);
}
// determine shard id
std::string shardId = "s" + StringUtils::itoa(id + i);
shards->emplace(shardId, serverIds);
}
return shards;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief Clone shard distribution from other collection
////////////////////////////////////////////////////////////////////////////////
static std::shared_ptr<std::unordered_map<std::string, std::vector<std::string>>>
CloneShardDistribution(ClusterInfo* ci, LogicalCollection* col,
TRI_voc_cid_t cid) {
auto result = std::make_shared<std::unordered_map<std::string, std::vector<std::string>>>();
TRI_ASSERT(cid != 0);
std::string cidString = arangodb::basics::StringUtils::itoa(cid);
std::shared_ptr<LogicalCollection> other =
ci->getCollection(col->dbName(), cidString);
// The function guarantees that no nullptr is returned
TRI_ASSERT(other != nullptr);
if (!other->distributeShardsLike().empty()) {
std::string const errorMessage = "Cannot distribute shards like '" + other->name() + "' it is already distributed like '" + other->distributeShardsLike() + "'.";
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_CLUSTER_CHAIN_OF_DISTRIBUTESHARDSLIKE, errorMessage);
}
// We need to replace the distribute with the cid.
col->distributeShardsLike(cidString);
if (col->isSmart() && col->type() == TRI_COL_TYPE_EDGE) {
return result;
}
if (col->replicationFactor() != other->replicationFactor()) {
col->replicationFactor(other->replicationFactor());
}
if (col->numberOfShards() != other->numberOfShards()) {
col->numberOfShards(other->numberOfShards());
}
auto shards = other->shardIds();
auto shardList = ci->getShardList(cidString);
auto numberOfShards = static_cast<uint64_t>(col->numberOfShards());
// fetch a unique id for each shard to create
uint64_t const id = ci->uniqid(numberOfShards);
for (uint64_t i = 0; i < numberOfShards; ++i) {
// determine responsible server(s)
std::string shardId = "s" + StringUtils::itoa(id + i);
auto it = shards->find(shardList->at(i));
if (it == shards->end()) {
TRI_ASSERT(false);
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "Inconsistency in shard distribution detected. Is in the process of self-healing. Please retry the operation again after some seconds.");
}
result->emplace(shardId, it->second);
}
return result;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief creates a copy of all HTTP headers to forward
////////////////////////////////////////////////////////////////////////////////
@ -2437,80 +2569,6 @@ int flushWalOnAllDBServers(bool waitForSync, bool waitForCollector, double maxWa
return TRI_ERROR_NO_ERROR;
}
////////////////////////////////////////////////////////////////////////////////
/// @brief compute a shard distribution for a new collection, the list
/// dbServers must be a list of DBserver ids to distribute across.
/// If this list is empty, the complete current list of DBservers is
/// fetched from ClusterInfo and with random_shuffle to mix it up.
////////////////////////////////////////////////////////////////////////////////
std::unordered_map<std::string, std::vector<std::string>> distributeShards(
uint64_t numberOfShards,
uint64_t replicationFactor,
std::vector<std::string>& dbServers,
bool warnAboutReplicationFactor) {
std::unordered_map<std::string, std::vector<std::string>> shards;
ClusterInfo* ci = ClusterInfo::instance();
ci->loadCurrentDBServers();
if (dbServers.size() == 0) {
dbServers = ci->getCurrentDBServers();
if (dbServers.empty()) {
return shards;
}
random_shuffle(dbServers.begin(), dbServers.end());
}
// mop: distribute satellite collections on all servers
if (replicationFactor == 0) {
replicationFactor = dbServers.size();
}
// fetch a unique id for each shard to create
uint64_t const id = ci->uniqid(numberOfShards);
size_t leaderIndex = 0;
size_t followerIndex = 0;
for (uint64_t i = 0; i < numberOfShards; ++i) {
// determine responsible server(s)
std::vector<std::string> serverIds;
for (uint64_t j = 0; j < replicationFactor; ++j) {
if (j >= dbServers.size()) {
if (warnAboutReplicationFactor) {
LOG_TOPIC(WARN, Logger::CLUSTER)
<< "createCollectionCoordinator: replicationFactor is "
<< "too large for the number of DBservers";
}
break;
}
std::string candidate;
// mop: leader
if (serverIds.size() == 0) {
candidate = dbServers[leaderIndex++];
if (leaderIndex >= dbServers.size()) {
leaderIndex = 0;
}
} else {
do {
candidate = dbServers[followerIndex++];
if (followerIndex >= dbServers.size()) {
followerIndex = 0;
}
} while (candidate == serverIds[0]); // mop: ignore leader
}
serverIds.push_back(candidate);
}
// determine shard id
std::string shardId = "s" + StringUtils::itoa(id + i);
shards.emplace(shardId, serverIds);
}
return shards;
}
#ifndef USE_ENTERPRISE
std::unique_ptr<LogicalCollection> ClusterMethods::createCollectionOnCoordinator(
TRI_col_type_e collectionType, TRI_vocbase_t* vocbase, VPackSlice parameters,
@ -2533,18 +2591,15 @@ std::unique_ptr<LogicalCollection> ClusterMethods::createCollectionOnCoordinator
std::unique_ptr<LogicalCollection> ClusterMethods::persistCollectionInAgency(
LogicalCollection* col, bool ignoreDistributeShardsLikeErrors,
bool waitForSyncReplication, bool enforceReplicationFactor,
VPackSlice parameters) {
VPackSlice) {
std::string distributeShardsLike = col->distributeShardsLike();
std::vector<std::string> avoid = col->avoidServers();
size_t replicationFactor = col->replicationFactor();
size_t numberOfShards = col->numberOfShards();
std::string const replicationFactorStr("replicationFactor");
std::string const numberOfShardsStr("numberOfShards");
ClusterInfo* ci = ClusterInfo::instance();
std::vector<std::string> dbServers;
std::vector<std::string> dbServers = ci->getCurrentDBServers();
std::shared_ptr<std::unordered_map<std::string, std::vector<std::string>>> shards = nullptr;
if (!distributeShardsLike.empty()) {
CollectionNameResolver resolver(col->vocbase());
@ -2552,132 +2607,48 @@ std::unique_ptr<LogicalCollection> ClusterMethods::persistCollectionInAgency(
resolver.getCollectionIdCluster(distributeShardsLike);
if (otherCid != 0) {
bool chainOfDistributeShardsLike = false;
bool numberOfShardsConflict = false;
bool replicationFactorConflict = false;
std::string otherCidString
= arangodb::basics::StringUtils::itoa(otherCid);
VPackBuilder builder;
{ VPackObjectBuilder a(&builder);
col->toVelocyPack(builder,false); }
try {
std::shared_ptr<LogicalCollection> other =
ci->getCollection(col->dbName(), otherCidString);
size_t otherReplFactor = size_t(other->replicationFactor());
if (!col->isSmart()) {
if (parameters.hasKey(replicationFactorStr)) {
replicationFactor = parameters.get(replicationFactorStr).getNumber<size_t>();
if (otherReplFactor != replicationFactor) {
replicationFactor = otherReplFactor;
col->replicationFactor(static_cast<int>(otherReplFactor));
//replicationFactorConflict = true;
}
} else {
replicationFactor = otherReplFactor;
col->replicationFactor(static_cast<int>(otherReplFactor));
}
size_t otherNumOfShards = size_t(other->numberOfShards());
if (parameters.hasKey(numberOfShardsStr)) {
numberOfShards = parameters.get(numberOfShardsStr).getNumber<size_t>();
if (otherNumOfShards != numberOfShards) {
numberOfShards = otherNumOfShards;
col->replicationFactor(static_cast<int>(otherNumOfShards));
//numberOfShardsConflict = true;
}
} else {
numberOfShards = otherNumOfShards;
col->replicationFactor(static_cast<int>(otherNumOfShards));
}
}
if (!other->distributeShardsLike().empty()) {
chainOfDistributeShardsLike = true;
}
auto shards = other->shardIds();
auto shardList = ci->getShardList(otherCidString);
for (auto const& s : *shardList) {
auto it = shards->find(s);
if (it != shards->end()) {
for (auto const& s : it->second) {
dbServers.push_back(s);
}
}
}
} catch (...) {}
if (replicationFactorConflict) {
THROW_ARANGO_EXCEPTION(
TRI_ERROR_CLUSTER_DISTRIBUTE_SHARDS_LIKE_REPLICATION_FACTOR);
}
if (numberOfShardsConflict) {
THROW_ARANGO_EXCEPTION(
TRI_ERROR_CLUSTER_DISTRIBUTE_SHARDS_LIKE_NUMBER_OF_SHARDS);
}
if (chainOfDistributeShardsLike) {
THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_CHAIN_OF_DISTRIBUTESHARDSLIKE);
}
col->distributeShardsLike(otherCidString);
shards = CloneShardDistribution(ci, col, otherCid);
} else {
dbServers = ci->getCurrentDBServers();
if (ignoreDistributeShardsLikeErrors) {
col->distributeShardsLike(std::string());
} else {
THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_UNKNOWN_DISTRIBUTESHARDSLIKE);
}
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_CLUSTER_UNKNOWN_DISTRIBUTESHARDSLIKE,
"Could not find collection " + distributeShardsLike + " to distribute shards like it.");
}
} else if (!avoid.empty()) {
dbServers = ci->getCurrentDBServers();
if (dbServers.size() - avoid.size() >= replicationFactor) {
} else {
// system collections should never enforce replicationfactor
// to allow them to come up with 1 dbserver
if (col->isSystem()) {
enforceReplicationFactor = false;
}
size_t replicationFactor = col->replicationFactor();
size_t numberOfShards = col->numberOfShards();
// the default behaviour however is to bail out and inform the user
// that the requested replicationFactor is not possible right now
if (enforceReplicationFactor && dbServers.size() < replicationFactor) {
THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_INSUFFICIENT_DBSERVERS);
}
if (!avoid.empty()) {
// We need to remove all servers that are in the avoid list
if (dbServers.size() - avoid.size() < replicationFactor) {
// Not enough DBServers left
THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_INSUFFICIENT_DBSERVERS);
}
dbServers.erase(
std::remove_if(
dbServers.begin(), dbServers.end(), [&](const std::string&x) {
dbServers.begin(), dbServers.end(), [&](const std::string& x) {
return std::find(avoid.begin(), avoid.end(), x) != avoid.end();
}), dbServers.end());
}
std::random_shuffle(dbServers.begin(), dbServers.end());
} else {
dbServers = ci->getCurrentDBServers();
shards = DistributeShardsEvenly(ci, numberOfShards, replicationFactor, dbServers, !col->isSystem());
}
// system collections should never enforce replicationfactor
// to allow them to come up with 1 dbserver
if (enforceReplicationFactor && col->isSystem()) {
enforceReplicationFactor = false;
}
// the default behaviour however is to bail out and inform the user
// that the requested replicationFactor is not possible right now
if (enforceReplicationFactor && dbServers.size() < replicationFactor) {
THROW_ARANGO_EXCEPTION(TRI_ERROR_CLUSTER_INSUFFICIENT_DBSERVERS);
}
// If the list dbServers is still empty, it will be filled in
// distributeShards below.
// Now create the shards:
bool warnAboutReplicationFactor = (!col->isSystem());
auto shards = std::make_shared<
std::unordered_map<std::string, std::vector<std::string>>>(
arangodb::distributeShards(numberOfShards, replicationFactor, dbServers, warnAboutReplicationFactor));
if (shards->empty() && !col->isSmart()) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL,
"no database servers found in cluster");
}
col->setShardMap(shards);
std::unordered_set<std::string> const ignoreKeys{
@ -2690,7 +2661,8 @@ std::unique_ptr<LogicalCollection> ClusterMethods::persistCollectionInAgency(
std::string errorMsg;
int myerrno = ci->createCollectionCoordinator(
col->dbName(), col->cid_as_string(),
numberOfShards, replicationFactor, waitForSyncReplication, velocy.slice(), errorMsg, 240.0);
col->numberOfShards(), col->replicationFactor(),
waitForSyncReplication, velocy.slice(), errorMsg, 240.0);
if (myerrno != TRI_ERROR_NO_ERROR) {
if (errorMsg.empty()) {

View File

@ -248,20 +248,6 @@ int flushWalOnAllDBServers(bool waitForSync, bool waitForCollector, double maxWa
int rotateActiveJournalOnAllDBServers(std::string const& dbname,
std::string const& collname);
////////////////////////////////////////////////////////////////////////////////
/// @brief compute a shard distribution for a new collection, the list
/// dbServers must be a list of DBserver ids to distribute across.
/// If this list is empty, the complete current list of DBservers is
/// fetched from ClusterInfo. If shuffle is true, a few random shuffles
/// are performed before the list is taken. Thus modifies the list.
////////////////////////////////////////////////////////////////////////////////
std::unordered_map<std::string, std::vector<std::string>> distributeShards(
uint64_t numberOfShards,
uint64_t replicationFactor,
std::vector<std::string>& dbServers,
bool warnAboutReplicationFactor);
class ClusterMethods {
public:
// wrapper Class for static functions.

View File

@ -29,7 +29,10 @@ const internal = require('internal');
const download = require('internal').download;
const colName = "UnitTestDistributionTest";
const _ = require("lodash");
const wait = require("internal").wait;
const request = require('@arangodb/request');
const endpointToURL = require("@arangodb/cluster").endpointToURL;
const coordinatorName = "Coordinator0001";
let coordinator = instanceInfo.arangods.filter(arangod => {
return arangod.role === 'coordinator';
@ -87,7 +90,6 @@ describe('Shard distribution', function () {
serverCount += 1;
}
}
console.log("Found health records:", serverCount, health.Health, count);
if (serverCount >= dbServerCount) {
break;
}
@ -155,4 +157,177 @@ describe('Shard distribution', function () {
});
describe("using distributeShardsLike", function () {
const followCollection = 'UnitTestDistributionFollower';
const numberOfShards = 12;
const cleanUp = function () {
internal.db._drop(followCollection);
};
const shardNumber = function (shard) {
// Each shard starts with 's'
expect(shard[0]).to.equal('s');
// And is followed by a numeric value
const nr = parseInt(shard.slice(1));
expect(nr).to.be.above(0);
return nr
};
const sortShardsNumericly = function (l, r) {
return shardNumber(l) - shardNumber(r);
};
const compareDistributions = function() {
const all = request.get(coordinator.url + '/_admin/cluster/shardDistribution');
const dist = JSON.parse(all.body).results;
const orig = dist[colName].Current;
const fol = dist[followCollection].Current;
const origShards = Object.keys(orig).sort(sortShardsNumericly);
const folShards = Object.keys(fol).sort(sortShardsNumericly);
// Now we have all shard names sorted in alphabetical ordering.
// It needs to be guaranteed that leader + follower of each shard in this ordering is identical.
expect(origShards).to.have.length.of(folShards.length);
for (let i = 0; i < origShards.length; ++i) {
const oneOrigShard = orig[origShards[i]];
const oneFolShard = fol[folShards[i]];
// Leader has to be identical
expect(oneOrigShard.leader).to.equal(oneFolShard.leader);
// Follower Order does not matter, but needs to be the same servers
expect(oneOrigShard.followers.sort()).to.deep.equal(oneFolShard.followers.sort());
}
};
describe("without replication", function () {
const replicationFactor = 1;
beforeEach(function () {
cleanUp();
internal.db._create(colName, {replicationFactor, numberOfShards});
});
afterEach(cleanUp);
it("should create all shards on identical servers", function () {
internal.db._create(followCollection, {replicationFactor, numberOfShards, distributeShardsLike: colName});
compareDistributions();
});
});
describe("with replication", function () {
const replicationFactor = 3;
// Note here: We have to make sure that numberOfShards * replicationFactor is not disible by the number of DBServers
////////////////////////////////////////////////////////////////////////////////
/// @brief order the cluster to clean out a server:
////////////////////////////////////////////////////////////////////////////////
const cleanOutServer = function (id) {
var coordEndpoint =
global.ArangoClusterInfo.getServerEndpoint(coordinatorName);
var url = endpointToURL(coordEndpoint);
var body = {"server": id};
try {
return request({ method: "POST",
url: url + "/_admin/cluster/cleanOutServer",
body: JSON.stringify(body) });
} catch (err) {
console.error(
"Exception for POST /_admin/cluster/cleanOutServer:", err.stack);
return false;
}
};
const getCleanedOutServers = function () {
const coordEndpoint =
global.ArangoClusterInfo.getServerEndpoint(coordinatorName);
const url = endpointToURL(coordEndpoint);
try {
const envelope =
{ method: "GET", url: url + "/_admin/cluster/numberOfServers" };
let res = request(envelope);
var body = res.body;
if (typeof body === "string") {
body = JSON.parse(body);
}
return body;
} catch (err) {
console.error(
"Exception for POST /_admin/cluster/cleanOutServer:", err.stack);
return {};
}
};
const waitForCleanout = function (id) {
let count = 600;
while (--count > 0) {
let obj = getCleanedOutServers();
if (obj.cleanedServers.indexOf(id) >= 0) {
console.info(
"Success: Server " + id + " cleaned out after " + (600-count) + " seconds");
return true;
}
wait(1.0);
}
console.error(
"Failed: Server " + id + " not cleaned out after 600 seconds");
return false;
};
const waitForSynchronousReplication = function (collection) {
global.ArangoClusterInfo.flush();
var cinfo = global.ArangoClusterInfo.getCollectionInfo(
"_system", collection);
var shards = Object.keys(cinfo.shards);
var replFactor = cinfo.shards[shards[0]].length;
var count = 0;
while (++count <= 180) {
var ccinfo = shards.map(
s => global.ArangoClusterInfo.getCollectionInfoCurrent(
"_system", collection, s)
);
let replicas = ccinfo.map(s => s.servers);
if (_.every(replicas, x => x.length === replFactor)) {
return true;
}
wait(0.5);
global.ArangoClusterInfo.flush();
}
console.error(`Collection "${collection}" failed to get all followers in sync after 60 sec`);
return false;
};
beforeEach(function () {
cleanUp();
internal.db._create(colName, {replicationFactor, numberOfShards});
expect(waitForSynchronousReplication(colName)).to.equal(true);
});
afterEach(cleanUp);
it("should create all shards and followers on identical servers", function () {
internal.db._create(followCollection, {replicationFactor, numberOfShards, distributeShardsLike: colName});
expect(waitForSynchronousReplication(followCollection)).to.equal(true);
compareDistributions();
});
it("should be resilient to a failover in the original collection", function () {
var server = global.ArangoClusterInfo.getDBServers()[1].serverId;
// Clean out the server that is scheduled second.
expect(cleanOutServer(server)).to.not.equal(false);
expect(waitForCleanout(server)).to.equal(true);
expect(waitForSynchronousReplication(colName)).to.equal(true);
// Now we have moved around some shards.
internal.db._create(followCollection, {replicationFactor, numberOfShards, distributeShardsLike: colName});
expect(waitForSynchronousReplication(followCollection)).to.equal(true);
compareDistributions();
});
});
});
});

View File

@ -63,9 +63,10 @@ function checkReplicationFactor(name, fac) {
internal.sleep(0.5);
}
let current = ArangoAgency.get('Current/Collections/_system');
let val = current.arango.Current.Collections['_system'][collectionId];
throw "replicationFactor is not reflected properly in " +
"/Current/Collections/_system/" + collectionId + ": "+ JSON.stringify(val);
let val = current.arango.Current.Collections['_system'][collectionId];
expect(true).to.equal(false, "Expected replicationFactor of " + fac + " in collection "
+ name + " is not reflected properly in " +
"/Current/Collections/_system/" + collectionId + ": "+ JSON.stringify(val));
};
describe('Update collection properties', function() {
@ -87,13 +88,13 @@ describe('Update collection properties', function() {
checkReplicationFactor(cn1, 1);
const coll = db._collection(cn1);
let props = coll.properties({replicationFactor: 2});
expect(props.replicationFactor).to.equal(2);
checkReplicationFactor(cn1, 2);
});
it('decrease replication factor ', function() {
db._create(cn1, {replicationFactor: 2, numberOfShards: 2}, {waitForSyncReplication: true});
@ -115,7 +116,7 @@ describe('Update collection properties', function() {
try {
const coll = db._collection(cn1);
coll.properties({replicationFactor: -1});
expect(false.replicationFactor).to.equal(true,
expect(false.replicationFactor).to.equal(true,
"Was able to update replicationFactor of follower");
} catch(e) {
expect(e.errorNum).to.equal(errors.ERROR_BAD_PARAMETER.code);
@ -124,7 +125,7 @@ describe('Update collection properties', function() {
try {
const coll = db._collection(cn1);
coll.properties({replicationFactor: 100});
expect(false.replicationFactor).to.equal(true,
expect(false.replicationFactor).to.equal(true,
"Was able to update replicationFactor of follower");
} catch(e) {
expect(e.errorNum).to.equal(errors.ERROR_BAD_PARAMETER.code);
@ -133,7 +134,7 @@ describe('Update collection properties', function() {
try {
const coll = db._collection(cn1);
coll.properties({replicationFactor: "satellite"});
expect(false.replicationFactor).to.equal(true,
expect(false.replicationFactor).to.equal(true,
"Was able to update replicationFactor of follower");
} catch(e) {
expect(e.errorNum).to.equal(errors.ERROR_FORBIDDEN.code);
@ -153,9 +154,9 @@ describe('Update collection properties with distributeShardsLike, ', function()
db._useDatabase("_system");
try {
db._drop(cn2);
db._drop(cn2);
} catch (e) {}
try {
db._drop(cn1);
} catch (e) {}
@ -166,22 +167,22 @@ describe('Update collection properties with distributeShardsLike, ', function()
db._create(cn2, {distributeShardsLike: cn1}, {waitForSyncReplication: true});
checkReplicationFactor(cn1, 1);
checkReplicationFactor(cn2, 1);
checkReplicationFactor(cn2, 1);
const leader = db._collection(cn1);
let props = leader.properties({replicationFactor: 2});
expect(props.replicationFactor).to.equal(2);
checkReplicationFactor(cn1, 2);
checkReplicationFactor(cn2, 2);
checkReplicationFactor(cn2, 2);
});
it('decrease replication factor', function() {
db._create(cn1, {replicationFactor: 2, numberOfShards: 2}, {waitForSyncReplication: true});
db._create(cn2, {distributeShardsLike: cn1}, {waitForSyncReplication: true});
checkReplicationFactor(cn1, 2);
checkReplicationFactor(cn2, 2);
checkReplicationFactor(cn2, 2);
const leader = db._collection(cn1);
@ -197,12 +198,12 @@ describe('Update collection properties with distributeShardsLike, ', function()
db._create(cn2, {distributeShardsLike: cn1}, {waitForSyncReplication: true});
checkReplicationFactor(cn1, 2);
checkReplicationFactor(cn2, 2);
checkReplicationFactor(cn2, 2);
try {
const follower = db._collection(cn2);
follower.properties({replicationFactor: 1});
expect(false.replicationFactor).to.equal(true,
expect(false.replicationFactor).to.equal(true,
"Was able to update replicationFactor of follower");
} catch(e) {
expect(e.errorNum).to.equal(errors.ERROR_FORBIDDEN.code);
@ -219,10 +220,16 @@ describe('Replication factor constraints', function() {
db._useDatabase("_system");
try {
db._drop(cn1);
// must be dropped first because cn1 is prototype for this collection
// and can only be dropped if all dependent collections are dropped first.
db._drop(cn2);
} catch (e) {}
try {
db._drop(cn1);
} catch (e) {}
});
it('should not allow to create a collection with more replicas than dbservers available', function() {
try {
db._create(cn1, {replicationFactor: 5});
@ -235,4 +242,27 @@ describe('Replication factor constraints', function() {
it('should allow to create a collection with more replicas than dbservers when explicitly requested', function() {
db._create(cn1, {replicationFactor: 5}, {enforceReplicationFactor: false});
});
});
it('check replication factor of system collections', function() {
["_appbundles", "_apps", "_aqlfunctions", "_frontend", "_graphs",
"_iresearch_analyzers", "_jobs", "_modules", "_queues", "_routing",
"_statistics" , "_statistics15" , "_statisticsRaw" ,"_users"
].forEach(name => {
if(name === "_graphs"){
expect(db[name].properties()['replicationFactor']).to.equal(2);
} else if(db[name]){
expect(db[name].properties()['replicationFactor']).to.equal(2);
expect(db[name].properties()['distributeShardsLike']).to.equal("_graphs");
}
});
});
it('distributeShardsLike should ignore additional parameters', function() {
db._create(cn1, {replicationFactor: 2, numberOfShards: 2}, {waitForSyncReplication: true});
db._create(cn2, {distributeShardsLike: cn1, replicationFactor: 5, numberOfShards: 99}, {waitForSyncReplication: true});
expect(db[cn1].properties()['replicationFactor']).to.equal(db[cn2].properties()['replicationFactor']);
expect(db[cn1].properties()['numberOfShards']).to.equal(db[cn2].properties()['numberOfShards']);
expect(db[cn2].properties()['distributeShardsLike']).to.equal(cn1);
});
});