mirror of https://gitee.com/bigwinds/arangodb
368 lines
14 KiB
C++
368 lines
14 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Jan Steemann
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "ShardingStrategyDefault.h"
|
|
#include "Basics/Exceptions.h"
|
|
#include "Basics/MutexLocker.h"
|
|
#include "Basics/StaticStrings.h"
|
|
#include "Basics/hashes.h"
|
|
#include "Cluster/ClusterInfo.h"
|
|
#include "Cluster/ServerState.h"
|
|
#include "Sharding/ShardingInfo.h"
|
|
#include "VocBase/LogicalCollection.h"
|
|
|
|
#include <velocypack/Builder.h>
|
|
#include <velocypack/Slice.h>
|
|
#include <velocypack/StringRef.h>
|
|
#include <velocypack/velocypack-aliases.h>
|
|
|
|
using namespace arangodb;
|
|
|
|
namespace {
|
|
|
|
enum class Part : uint8_t { ALL, FRONT, BACK };
|
|
|
|
void preventUseOnSmartEdgeCollection(LogicalCollection const* collection,
|
|
std::string const& strategyName) {
|
|
if (collection->isSmart() && collection->type() == TRI_COL_TYPE_EDGE) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
|
TRI_ERROR_BAD_PARAMETER,
|
|
std::string("sharding strategy ") + strategyName +
|
|
" cannot be used for smart edge collections");
|
|
}
|
|
}
|
|
|
|
inline void parseAttributeAndPart(std::string const& attr, arangodb::velocypack::StringRef& realAttr, Part& part) {
|
|
if (!attr.empty() && attr.back() == ':') {
|
|
realAttr = arangodb::velocypack::StringRef(attr.data(), attr.size() - 1);
|
|
part = Part::FRONT;
|
|
} else if (!attr.empty() && attr.front() == ':') {
|
|
realAttr = arangodb::velocypack::StringRef(attr.data() + 1, attr.size() - 1);
|
|
part = Part::BACK;
|
|
} else {
|
|
realAttr = arangodb::velocypack::StringRef(attr.data(), attr.size());
|
|
part = Part::ALL;
|
|
}
|
|
}
|
|
|
|
template <bool returnNullSlice>
|
|
VPackSlice buildTemporarySlice(VPackSlice const& sub, Part const& part,
|
|
VPackBuilder& temporaryBuilder, bool splitSlash) {
|
|
if (sub.isString()) {
|
|
arangodb::velocypack::StringRef key(sub);
|
|
if (splitSlash) {
|
|
size_t pos = key.find('/');
|
|
if (pos != std::string::npos) {
|
|
// We have an _id. Split it.
|
|
key = key.substr(pos + 1);
|
|
}
|
|
}
|
|
switch (part) {
|
|
case Part::ALL: {
|
|
// by adding the key to the builder, we may invalidate the original key...
|
|
// however, this is safe here as the original key is not used after we have
|
|
// added to the builder
|
|
return VPackSlice(temporaryBuilder.add(VPackValuePair(key.data(), key.size(), VPackValueType::String)));
|
|
}
|
|
case Part::FRONT: {
|
|
size_t pos = key.find(':');
|
|
if (pos != std::string::npos) {
|
|
key = key.substr(0, pos);
|
|
// by adding the key to the builder, we may invalidate the original key...
|
|
// however, this is safe here as the original key is not used after we have
|
|
// added to the builder
|
|
return VPackSlice(temporaryBuilder.add(VPackValuePair(key.data(), key.size(), VPackValueType::String)));
|
|
}
|
|
// fall-through to returning null or original slice
|
|
break;
|
|
}
|
|
case Part::BACK: {
|
|
size_t pos = key.rfind(':');
|
|
if (pos != std::string::npos) {
|
|
key = key.substr(pos + 1);
|
|
// by adding the key to the builder, we may invalidate the original key...
|
|
// however, this is safe here as the original key is not used after we have
|
|
// added to the builder
|
|
return VPackSlice(temporaryBuilder.add(VPackValuePair(key.data(), key.size(), VPackValueType::String)));
|
|
}
|
|
// fall-through to returning null or original slice
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (returnNullSlice) {
|
|
return VPackSlice::nullSlice();
|
|
}
|
|
return sub;
|
|
}
|
|
|
|
template <bool returnNullSlice>
|
|
uint64_t hashByAttributesImpl(VPackSlice slice, std::vector<std::string> const& attributes,
|
|
bool docComplete, int& error, std::string const& key) {
|
|
uint64_t hash = TRI_FnvHashBlockInitial();
|
|
error = TRI_ERROR_NO_ERROR;
|
|
slice = slice.resolveExternal();
|
|
if (slice.isObject()) {
|
|
VPackBuilder temporaryBuilder;
|
|
for (auto const& attr : attributes) {
|
|
temporaryBuilder.clear();
|
|
|
|
arangodb::velocypack::StringRef realAttr;
|
|
::Part part;
|
|
::parseAttributeAndPart(attr, realAttr, part);
|
|
VPackSlice sub = slice.get(realAttr).resolveExternal();
|
|
if (sub.isNone()) {
|
|
// shard key attribute not present in document
|
|
if (realAttr == StaticStrings::KeyString && !key.empty()) {
|
|
temporaryBuilder.add(VPackValue(key));
|
|
sub = temporaryBuilder.slice();
|
|
} else {
|
|
if (!docComplete) {
|
|
error = TRI_ERROR_CLUSTER_NOT_ALL_SHARDING_ATTRIBUTES_GIVEN;
|
|
}
|
|
// Null is equal to None/not present
|
|
sub = VPackSlice::nullSlice();
|
|
}
|
|
}
|
|
// buildTemporarySlice may append data to the builder, which may invalidate
|
|
// the original "sub" value. however, "sub" is reassigned immediately with
|
|
// a new value, so it does not matter in reality
|
|
sub = ::buildTemporarySlice<returnNullSlice>(sub, part, temporaryBuilder, false);
|
|
hash = sub.normalizedHash(hash);
|
|
}
|
|
} else if (slice.isString() && attributes.size() == 1) {
|
|
arangodb::velocypack::StringRef realAttr;
|
|
::Part part;
|
|
::parseAttributeAndPart(attributes[0], realAttr, part);
|
|
if (realAttr == StaticStrings::KeyString && key.empty()) {
|
|
// We always need the _key part. Everything else should be ignored
|
|
// beforehand.
|
|
VPackBuilder temporaryBuilder;
|
|
VPackSlice sub =
|
|
::buildTemporarySlice<returnNullSlice>(slice, part, temporaryBuilder, true);
|
|
hash = sub.normalizedHash(hash);
|
|
}
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
std::string const ShardingStrategyNone::NAME("none");
|
|
std::string const ShardingStrategyCommunityCompat::NAME("community-compat");
|
|
std::string const ShardingStrategyEnterpriseCompat::NAME("enterprise-compat");
|
|
std::string const ShardingStrategyHash::NAME("hash");
|
|
|
|
/// @brief a sharding class used for single server and the DB servers
|
|
/// calling getResponsibleShard on this class will always throw an exception
|
|
ShardingStrategyNone::ShardingStrategyNone() : ShardingStrategy() {
|
|
if (ServerState::instance()->isCoordinator()) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
|
TRI_ERROR_BAD_PARAMETER, std::string("sharding strategy ") + NAME +
|
|
" cannot be used for sharded collections");
|
|
}
|
|
}
|
|
|
|
/// calling getResponsibleShard on this class will always throw an exception
|
|
int ShardingStrategyNone::getResponsibleShard(arangodb::velocypack::Slice slice,
|
|
bool docComplete, ShardID& shardID,
|
|
bool& usesDefaultShardKeys,
|
|
std::string const& key) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
|
TRI_ERROR_INTERNAL, "unexpected invocation of ShardingStrategyNone");
|
|
}
|
|
|
|
/// @brief a sharding class used to indicate that the selected sharding strategy
|
|
/// is only available in the enterprise edition of ArangoDB
|
|
/// calling getResponsibleShard on this class will always throw an exception
|
|
/// with an appropriate error message
|
|
ShardingStrategyOnlyInEnterprise::ShardingStrategyOnlyInEnterprise(std::string const& name)
|
|
: ShardingStrategy(), _name(name) {}
|
|
|
|
/// @brief will always throw an exception telling the user the selected sharding
|
|
/// is only available in the enterprise edition
|
|
int ShardingStrategyOnlyInEnterprise::getResponsibleShard(arangodb::velocypack::Slice slice,
|
|
bool docComplete, ShardID& shardID,
|
|
bool& usesDefaultShardKeys,
|
|
std::string const& key) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(
|
|
TRI_ERROR_ONLY_ENTERPRISE,
|
|
std::string("sharding strategy '") + _name +
|
|
"' is only available in the enterprise edition of ArangoDB");
|
|
}
|
|
|
|
/// @brief base class for hash-based sharding
|
|
ShardingStrategyHashBase::ShardingStrategyHashBase(ShardingInfo* sharding)
|
|
: ShardingStrategy(),
|
|
_sharding(sharding),
|
|
_shards(),
|
|
_usesDefaultShardKeys(false),
|
|
_shardsSet(false) {
|
|
auto shardKeys = _sharding->shardKeys();
|
|
|
|
// validate shard keys
|
|
if (shardKeys.empty()) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER,
|
|
"invalid shard keys");
|
|
}
|
|
for (auto const& it : shardKeys) {
|
|
if (it.empty()) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER,
|
|
"invalid shard keys");
|
|
}
|
|
}
|
|
}
|
|
|
|
int ShardingStrategyHashBase::getResponsibleShard(arangodb::velocypack::Slice slice,
|
|
bool docComplete, ShardID& shardID,
|
|
bool& usesDefaultShardKeys,
|
|
std::string const& key) {
|
|
static constexpr char const* magicPhrase =
|
|
"Foxx you have stolen the goose, give she back again!";
|
|
static constexpr size_t magicLength = 52;
|
|
|
|
determineShards();
|
|
TRI_ASSERT(!_shards.empty());
|
|
|
|
TRI_ASSERT(!_sharding->shardKeys().empty());
|
|
|
|
int res = TRI_ERROR_NO_ERROR;
|
|
usesDefaultShardKeys = _usesDefaultShardKeys;
|
|
// calls virtual "hashByAttributes" function
|
|
|
|
uint64_t hash = hashByAttributes(slice, _sharding->shardKeys(), docComplete, res, key);
|
|
// To improve our hash function result:
|
|
hash = TRI_FnvHashBlock(hash, magicPhrase, magicLength);
|
|
shardID = _shards[hash % _shards.size()];
|
|
return res;
|
|
}
|
|
|
|
void ShardingStrategyHashBase::determineShards() {
|
|
if (_shardsSet) {
|
|
TRI_ASSERT(!_shards.empty());
|
|
return;
|
|
}
|
|
|
|
MUTEX_LOCKER(mutex, _shardsSetMutex);
|
|
if (_shardsSet) {
|
|
TRI_ASSERT(!_shards.empty());
|
|
return;
|
|
}
|
|
|
|
// determine all available shards (which will stay const afterwards)
|
|
auto ci = ClusterInfo::instance();
|
|
auto shards = ci->getShardList(std::to_string(_sharding->collection()->id()));
|
|
|
|
_shards = *shards;
|
|
|
|
if (_shards.empty()) {
|
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER,
|
|
"invalid shard count");
|
|
}
|
|
|
|
TRI_ASSERT(!_shards.empty());
|
|
_shardsSet = true;
|
|
}
|
|
|
|
uint64_t ShardingStrategyHashBase::hashByAttributes(VPackSlice slice,
|
|
std::vector<std::string> const& attributes,
|
|
bool docComplete, int& error,
|
|
std::string const& key) {
|
|
return ::hashByAttributesImpl<false>(slice, attributes, docComplete, error, key);
|
|
}
|
|
|
|
/// @brief old version of the sharding used in the community edition
|
|
/// this is DEPRECATED and should not be used for new collections
|
|
ShardingStrategyCommunityCompat::ShardingStrategyCommunityCompat(ShardingInfo* sharding)
|
|
: ShardingStrategyHashBase(sharding) {
|
|
// whether or not the collection uses the default shard attributes (["_key"])
|
|
// this setting is initialized to false, and we may change it now
|
|
TRI_ASSERT(!_usesDefaultShardKeys);
|
|
auto shardKeys = _sharding->shardKeys();
|
|
if (shardKeys.size() == 1 && shardKeys[0] == StaticStrings::KeyString) {
|
|
_usesDefaultShardKeys = true;
|
|
}
|
|
|
|
::preventUseOnSmartEdgeCollection(_sharding->collection(), NAME);
|
|
}
|
|
|
|
/// @brief old version of the sharding used in the enterprise edition
|
|
/// this is DEPRECATED and should not be used for new collections
|
|
ShardingStrategyEnterpriseBase::ShardingStrategyEnterpriseBase(ShardingInfo* sharding)
|
|
: ShardingStrategyHashBase(sharding) {
|
|
// whether or not the collection uses the default shard attributes (["_key"])
|
|
// this setting is initialized to false, and we may change it now
|
|
TRI_ASSERT(!_usesDefaultShardKeys);
|
|
auto shardKeys = _sharding->shardKeys();
|
|
TRI_ASSERT(!shardKeys.empty());
|
|
|
|
if (shardKeys.size() == 1) {
|
|
_usesDefaultShardKeys =
|
|
(shardKeys[0] == StaticStrings::KeyString ||
|
|
(shardKeys[0][0] == ':' &&
|
|
shardKeys[0].compare(1, shardKeys[0].size() - 1, StaticStrings::KeyString) == 0) ||
|
|
(shardKeys[0].back() == ':' &&
|
|
shardKeys[0].compare(0, shardKeys[0].size() - 1, StaticStrings::KeyString) == 0));
|
|
}
|
|
}
|
|
|
|
/// @brief this implementation of "hashByAttributes" is slightly different
|
|
/// than the implementation in the Community version
|
|
/// we leave the differences in place, because making any changes here
|
|
/// will affect the data distribution, which we want to avoid
|
|
uint64_t ShardingStrategyEnterpriseBase::hashByAttributes(
|
|
VPackSlice slice, std::vector<std::string> const& attributes,
|
|
bool docComplete, int& error, std::string const& key) {
|
|
return ::hashByAttributesImpl<true>(slice, attributes, docComplete, error, key);
|
|
}
|
|
|
|
/// @brief old version of the sharding used in the enterprise edition
|
|
/// this is DEPRECATED and should not be used for new collections
|
|
ShardingStrategyEnterpriseCompat::ShardingStrategyEnterpriseCompat(ShardingInfo* sharding)
|
|
: ShardingStrategyEnterpriseBase(sharding) {
|
|
::preventUseOnSmartEdgeCollection(_sharding->collection(), NAME);
|
|
}
|
|
|
|
/// @brief default hash-based sharding strategy
|
|
/// used for new collections from 3.4 onwards
|
|
ShardingStrategyHash::ShardingStrategyHash(ShardingInfo* sharding)
|
|
: ShardingStrategyHashBase(sharding) {
|
|
// whether or not the collection uses the default shard attributes (["_key"])
|
|
// this setting is initialized to false, and we may change it now
|
|
TRI_ASSERT(!_usesDefaultShardKeys);
|
|
auto shardKeys = _sharding->shardKeys();
|
|
TRI_ASSERT(!shardKeys.empty());
|
|
|
|
if (shardKeys.size() == 1) {
|
|
_usesDefaultShardKeys =
|
|
(shardKeys[0] == StaticStrings::KeyString ||
|
|
(shardKeys[0][0] == ':' &&
|
|
shardKeys[0].compare(1, shardKeys[0].size() - 1, StaticStrings::KeyString) == 0) ||
|
|
(shardKeys[0].back() == ':' &&
|
|
shardKeys[0].compare(0, shardKeys[0].size() - 1, StaticStrings::KeyString) == 0));
|
|
}
|
|
|
|
::preventUseOnSmartEdgeCollection(_sharding->collection(), NAME);
|
|
}
|