mirror of https://gitee.com/bigwinds/arangodb
2783 lines
92 KiB
C++
2783 lines
92 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
|
|
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Max Neunhoeffer
|
|
/// @author Jan Steemann
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "ClusterInfo.h"
|
|
|
|
#include <velocypack/Iterator.h>
|
|
#include <velocypack/Builder.h>
|
|
#include <velocypack/Slice.h>
|
|
#include <velocypack/velocypack-aliases.h>
|
|
|
|
#include "Basics/JsonHelper.h"
|
|
#include "Basics/MutexLocker.h"
|
|
#include "Basics/ReadLocker.h"
|
|
#include "Basics/StringUtils.h"
|
|
#include "Basics/VelocyPackHelper.h"
|
|
#include "Basics/WriteLocker.h"
|
|
#include "Basics/json-utilities.h"
|
|
#include "Basics/json.h"
|
|
#include "Cluster/ServerState.h"
|
|
#include "Logger/Logger.h"
|
|
#include "Rest/HttpResponse.h"
|
|
#include "VocBase/document-collection.h"
|
|
|
|
#ifdef _WIN32
|
|
// turn off warnings about too long type name for debug symbols blabla in MSVC
|
|
// only...
|
|
#pragma warning(disable : 4503)
|
|
#endif
|
|
|
|
using namespace arangodb;
|
|
|
|
using arangodb::basics::JsonHelper;
|
|
|
|
static std::unique_ptr<ClusterInfo> _instance;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief a local helper to report errors and messages
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static inline int setErrormsg(int ourerrno, std::string& errorMsg) {
|
|
errorMsg = TRI_errno_string(ourerrno);
|
|
return ourerrno;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief check whether the JSON returns an error
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static inline bool hasError(VPackSlice const& slice) {
|
|
return arangodb::basics::VelocyPackHelper::getBooleanValue(slice, "error",
|
|
false);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief extract the error message from a JSON
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static std::string extractErrorMessage(std::string const& shardId,
|
|
VPackSlice const& slice) {
|
|
std::string msg = " shardID:" + shardId + ": ";
|
|
|
|
// add error message text
|
|
msg += arangodb::basics::VelocyPackHelper::getStringValue(slice,
|
|
"errorMessage", "");
|
|
|
|
// add error number
|
|
if (slice.hasKey("errorNum")) {
|
|
VPackSlice const errorNum = slice.get("errorNum");
|
|
if (errorNum.isNumber()) {
|
|
msg += " (errNum=" + arangodb::basics::StringUtils::itoa(
|
|
errorNum.getNumericValue<uint32_t>()) +
|
|
")";
|
|
}
|
|
}
|
|
|
|
return msg;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief creates an empty collection info object
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfo::CollectionInfo() : _json(nullptr) {}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief creates a collection info object from json
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfo::CollectionInfo(TRI_json_t* json) : _json(json) {}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief creates a collection info object from another
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfo::CollectionInfo(CollectionInfo const& other)
|
|
: _json(other._json) {
|
|
if (other._json != nullptr) {
|
|
_json = TRI_CopyJson(TRI_UNKNOWN_MEM_ZONE, other._json);
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief move constructs a collection info object from another
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfo::CollectionInfo(CollectionInfo&& other) : _json(other._json) {
|
|
other._json = nullptr;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief copy assigns a collection info object from another one
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfo& CollectionInfo::operator=(CollectionInfo const& other) {
|
|
if (other._json != nullptr && this != &other) {
|
|
_json = TRI_CopyJson(TRI_UNKNOWN_MEM_ZONE, other._json);
|
|
} else {
|
|
_json = nullptr;
|
|
}
|
|
|
|
return *this;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief move assigns a collection info object from another one
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfo& CollectionInfo::operator=(CollectionInfo&& other) {
|
|
if (this == &other) {
|
|
return *this;
|
|
}
|
|
|
|
if (_json != nullptr) {
|
|
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, _json);
|
|
}
|
|
_json = other._json;
|
|
other._json = nullptr;
|
|
|
|
return *this;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief destroys a collection info object
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfo::~CollectionInfo() {
|
|
if (_json != nullptr) {
|
|
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, _json);
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief creates an empty collection info object
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfoCurrent::CollectionInfoCurrent() {}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief creates a collection info object from json
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfoCurrent::CollectionInfoCurrent(ShardID const& shardID,
|
|
TRI_json_t* json) {
|
|
_jsons.insert(make_pair(shardID, json));
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief creates a collection info object from another
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfoCurrent::CollectionInfoCurrent(CollectionInfoCurrent const& other)
|
|
: _jsons(other._jsons) {
|
|
copyAllJsons();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief moves a collection info current object from another
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfoCurrent::CollectionInfoCurrent(CollectionInfoCurrent&& other) {
|
|
_jsons.swap(other._jsons);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief copy assigns a collection info current object from another one
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfoCurrent& CollectionInfoCurrent::operator=(
|
|
CollectionInfoCurrent const& other) {
|
|
if (this == &other) {
|
|
return *this;
|
|
}
|
|
freeAllJsons();
|
|
_jsons = other._jsons;
|
|
copyAllJsons();
|
|
return *this;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief creates a collection info object from json
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfoCurrent& CollectionInfoCurrent::operator=(
|
|
CollectionInfoCurrent&& other) {
|
|
if (this == &other) {
|
|
return *this;
|
|
}
|
|
freeAllJsons();
|
|
_jsons.clear();
|
|
_jsons.swap(other._jsons);
|
|
return *this;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief destroys a collection info object
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
CollectionInfoCurrent::~CollectionInfoCurrent() { freeAllJsons(); }
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief free all pointers to TRI_json_t in the map _jsons
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void CollectionInfoCurrent::freeAllJsons() {
|
|
for (auto it = _jsons.begin(); it != _jsons.end(); ++it) {
|
|
if (it->second != nullptr) {
|
|
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, it->second);
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief copy TRI_json_t behind the pointers in the map _jsons
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void CollectionInfoCurrent::copyAllJsons() {
|
|
for (auto it = _jsons.begin(); it != _jsons.end(); ++it) {
|
|
if (nullptr != it->second) {
|
|
it->second = TRI_CopyJson(TRI_UNKNOWN_MEM_ZONE, it->second);
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create the clusterinfo instance
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void ClusterInfo::createInstance(AgencyCallbackRegistry* agencyCallbackRegistry) {
|
|
_instance.reset(new ClusterInfo(agencyCallbackRegistry));
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief returns an instance of the cluster info class
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
ClusterInfo* ClusterInfo::instance() { return _instance.get(); }
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief creates a cluster info object
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
ClusterInfo::ClusterInfo(AgencyCallbackRegistry* agencyCallbackRegistry)
|
|
: _agency(), _agencyCallbackRegistry(agencyCallbackRegistry), _uniqid() {
|
|
_uniqid._currentValue = _uniqid._upperValue = 0ULL;
|
|
|
|
// Actual loading into caches is postponed until necessary
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief destroys a cluster info object
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
ClusterInfo::~ClusterInfo() {
|
|
clearPlannedDatabases(_plannedDatabases);
|
|
clearCurrentDatabases(_currentDatabases);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief increase the uniqid value. if it exceeds the upper bound, fetch a
|
|
/// new upper bound value from the agency
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
uint64_t ClusterInfo::uniqid(uint64_t count) {
|
|
MUTEX_LOCKER(mutexLocker, _idLock);
|
|
|
|
if (_uniqid._currentValue + count - 1 >= _uniqid._upperValue) {
|
|
uint64_t fetch = count;
|
|
|
|
if (fetch < MinIdsPerBatch) {
|
|
fetch = MinIdsPerBatch;
|
|
}
|
|
|
|
AgencyCommResult result = _agency.uniqid("Sync/LatestID", fetch, 0.0);
|
|
|
|
if (!result.successful() || result._index == 0) {
|
|
return 0;
|
|
}
|
|
|
|
_uniqid._currentValue = result._index + count;
|
|
_uniqid._upperValue = _uniqid._currentValue + fetch - 1;
|
|
|
|
return result._index;
|
|
}
|
|
|
|
uint64_t result = _uniqid._currentValue;
|
|
_uniqid._currentValue += count;
|
|
|
|
return result;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief flush the caches (used for testing)
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void ClusterInfo::flush() {
|
|
loadServers();
|
|
loadCurrentDBServers();
|
|
loadCurrentCoordinators();
|
|
loadPlannedDatabases();
|
|
loadCurrentDatabases();
|
|
loadPlannedCollections();
|
|
loadCurrentCollections();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief ask whether a cluster database exists
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
bool ClusterInfo::doesDatabaseExist(DatabaseID const& databaseID, bool reload) {
|
|
int tries = 0;
|
|
|
|
if (reload || !_plannedDatabasesProt.isValid ||
|
|
!_currentDatabasesProt.isValid || !_DBServersProt.isValid) {
|
|
loadPlannedDatabases();
|
|
loadCurrentDatabases();
|
|
loadCurrentDBServers();
|
|
++tries; // no need to reload if the database is not found
|
|
}
|
|
|
|
// From now on we know that all data has been valid once, so no need
|
|
// to check the isValid flags again under the lock.
|
|
|
|
while (true) {
|
|
{
|
|
size_t expectedSize;
|
|
{
|
|
READ_LOCKER(readLocker, _DBServersProt.lock);
|
|
expectedSize = _DBServers.size();
|
|
}
|
|
|
|
// look up database by name:
|
|
|
|
READ_LOCKER(readLocker, _plannedDatabasesProt.lock);
|
|
// _plannedDatabases is a map-type<DatabaseID, TRI_json_t*>
|
|
auto it = _plannedDatabases.find(databaseID);
|
|
|
|
if (it != _plannedDatabases.end()) {
|
|
// found the database in Plan
|
|
READ_LOCKER(readLocker, _currentDatabasesProt.lock);
|
|
// _currentDatabases is
|
|
// a map-type<DatabaseID, a map-type<ServerID, TRI_json_t*>>
|
|
auto it2 = _currentDatabases.find(databaseID);
|
|
|
|
if (it2 != _currentDatabases.end()) {
|
|
// found the database in Current
|
|
|
|
return ((*it2).second.size() >= expectedSize);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (++tries >= 2) {
|
|
break;
|
|
}
|
|
|
|
loadPlannedDatabases();
|
|
loadCurrentDatabases();
|
|
loadCurrentDBServers();
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief get list of databases in the cluster
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::vector<DatabaseID> ClusterInfo::listDatabases(bool reload) {
|
|
std::vector<DatabaseID> result;
|
|
|
|
if (reload || !_plannedDatabasesProt.isValid ||
|
|
!_currentDatabasesProt.isValid || !_DBServersProt.isValid) {
|
|
loadPlannedDatabases();
|
|
loadCurrentDatabases();
|
|
loadCurrentDBServers();
|
|
}
|
|
|
|
// From now on we know that all data has been valid once, so no need
|
|
// to check the isValid flags again under the lock.
|
|
|
|
size_t expectedSize;
|
|
{
|
|
READ_LOCKER(readLocker, _DBServersProt.lock);
|
|
expectedSize = _DBServers.size();
|
|
}
|
|
|
|
{
|
|
READ_LOCKER(readLockerPlanned, _plannedDatabasesProt.lock);
|
|
READ_LOCKER(readLockerCurrent, _currentDatabasesProt.lock);
|
|
// _plannedDatabases is a map-type<DatabaseID, TRI_json_t*>
|
|
auto it = _plannedDatabases.begin();
|
|
|
|
while (it != _plannedDatabases.end()) {
|
|
// _currentDatabases is:
|
|
// a map-type<DatabaseID, a map-type<ServerID, TRI_json_t*>>
|
|
auto it2 = _currentDatabases.find((*it).first);
|
|
|
|
if (it2 != _currentDatabases.end()) {
|
|
if ((*it2).second.size() >= expectedSize) {
|
|
result.push_back((*it).first);
|
|
}
|
|
}
|
|
|
|
++it;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief actually clears a list of planned databases
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void ClusterInfo::clearPlannedDatabases(
|
|
std::unordered_map<DatabaseID, TRI_json_t*>& databases) {
|
|
auto it = databases.begin();
|
|
while (it != databases.end()) {
|
|
TRI_json_t* json = (*it).second;
|
|
|
|
if (json != nullptr) {
|
|
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json);
|
|
}
|
|
++it;
|
|
}
|
|
databases.clear();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief (re-)load the information about planned databases
|
|
/// Usually one does not have to call this directly.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
static std::string const prefixPlannedDatabases = "Plan/Databases";
|
|
|
|
void ClusterInfo::loadPlannedDatabases() {
|
|
uint64_t storedVersion = _plannedDatabasesProt.version;
|
|
MUTEX_LOCKER(mutexLocker, _plannedDatabasesProt.mutex);
|
|
if (_plannedDatabasesProt.version > storedVersion) {
|
|
// Somebody else did, what we intended to do, so just return
|
|
return;
|
|
}
|
|
|
|
// Now contact the agency:
|
|
AgencyCommResult result;
|
|
{
|
|
AgencyCommLocker locker("Plan", "READ");
|
|
|
|
if (locker.successful()) {
|
|
result = _agency.getValues(prefixPlannedDatabases, true);
|
|
}
|
|
}
|
|
|
|
if (result.successful()) {
|
|
result.parse(prefixPlannedDatabases + "/", false);
|
|
|
|
decltype(_plannedDatabases) newDatabases;
|
|
|
|
// result._values is a std::map<std::string, AgencyCommResultEntry>
|
|
auto it = result._values.begin();
|
|
|
|
while (it != result._values.end()) {
|
|
std::string const& name = (*it).first;
|
|
// TODO: _plannedDatabases need to be moved to velocypack
|
|
// Than this can be merged to swap
|
|
TRI_json_t* options =
|
|
arangodb::basics::VelocyPackHelper::velocyPackToJson(
|
|
(*it).second._vpack->slice());
|
|
|
|
// steal the VelocyPack
|
|
(*it).second._vpack.reset();
|
|
newDatabases.insert(std::make_pair(name, options));
|
|
|
|
++it;
|
|
}
|
|
|
|
// Now set the new value:
|
|
{
|
|
WRITE_LOCKER(writeLocker, _plannedDatabasesProt.lock);
|
|
_plannedDatabases.swap(newDatabases);
|
|
_plannedDatabasesProt.version++; // such that others notice our change
|
|
_plannedDatabasesProt.isValid = true; // will never be reset to false
|
|
}
|
|
clearPlannedDatabases(newDatabases); // delete the old stuff
|
|
return;
|
|
}
|
|
|
|
LOG(DEBUG) << "Error while loading " << prefixPlannedDatabases
|
|
<< " httpCode: " << result.httpCode()
|
|
<< " errorCode: " << result.errorCode()
|
|
<< " errorMessage: " << result.errorMessage()
|
|
<< " body: " << result.body();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief deletes a list of current databases
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void ClusterInfo::clearCurrentDatabases(
|
|
std::unordered_map<DatabaseID, std::unordered_map<ServerID, TRI_json_t*>>&
|
|
databases) {
|
|
auto it = databases.begin();
|
|
while (it != databases.end()) {
|
|
auto it2 = (*it).second.begin();
|
|
|
|
while (it2 != (*it).second.end()) {
|
|
TRI_json_t* json = (*it2).second;
|
|
|
|
if (json != nullptr) {
|
|
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, json);
|
|
}
|
|
|
|
++it2;
|
|
}
|
|
++it;
|
|
}
|
|
|
|
databases.clear();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief (re-)load the information about current databases
|
|
/// Usually one does not have to call this directly.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static std::string const prefixCurrentDatabases = "Current/Databases";
|
|
|
|
void ClusterInfo::loadCurrentDatabases() {
|
|
uint64_t storedVersion = _currentDatabasesProt.version;
|
|
MUTEX_LOCKER(mutexLocker, _currentDatabasesProt.mutex);
|
|
if (_currentDatabasesProt.version > storedVersion) {
|
|
// Somebody else did, what we intended to do, so just return
|
|
return;
|
|
}
|
|
|
|
// Now contact the agency:
|
|
AgencyCommResult result;
|
|
{
|
|
AgencyCommLocker locker("Plan", "READ");
|
|
|
|
if (locker.successful()) {
|
|
result = _agency.getValues(prefixCurrentDatabases, true);
|
|
}
|
|
}
|
|
|
|
if (result.successful()) {
|
|
result.parse(prefixCurrentDatabases + "/", false);
|
|
|
|
decltype(_currentDatabases) newDatabases;
|
|
|
|
std::map<std::string, AgencyCommResultEntry>::iterator it =
|
|
result._values.begin();
|
|
|
|
while (it != result._values.end()) {
|
|
std::string const key = (*it).first;
|
|
|
|
// each entry consists of a database id and a collection id, separated by
|
|
// '/'
|
|
std::vector<std::string> parts =
|
|
arangodb::basics::StringUtils::split(key, '/');
|
|
|
|
if (parts.empty()) {
|
|
++it;
|
|
continue;
|
|
}
|
|
std::string const database = parts[0];
|
|
|
|
// _currentDatabases is
|
|
// a map-type<DatabaseID, a map-type<ServerID, TRI_json_t*>>
|
|
auto it2 = newDatabases.find(database);
|
|
|
|
if (it2 == newDatabases.end()) {
|
|
// insert an empty list for this database
|
|
decltype(it2->second) empty;
|
|
it2 = newDatabases.insert(std::make_pair(database, empty)).first;
|
|
}
|
|
|
|
if (parts.size() == 2) {
|
|
// got a server name
|
|
//
|
|
// TODO: _plannedDatabases need to be moved to velocypack
|
|
// Than this can be merged to swap
|
|
TRI_json_t* json = arangodb::basics::VelocyPackHelper::velocyPackToJson(
|
|
(*it).second._vpack->slice());
|
|
|
|
// steal the VelocyPack
|
|
(*it).second._vpack.reset();
|
|
(*it2).second.insert(std::make_pair(parts[1], json));
|
|
}
|
|
|
|
++it;
|
|
}
|
|
|
|
// Now set the new value:
|
|
{
|
|
WRITE_LOCKER(writeLocker, _currentDatabasesProt.lock);
|
|
_currentDatabases.swap(newDatabases);
|
|
_currentDatabasesProt.version++; // such that others notice our change
|
|
_currentDatabasesProt.isValid = true; // will never be reset to false
|
|
}
|
|
clearCurrentDatabases(newDatabases); // delete the old stuff
|
|
return;
|
|
}
|
|
|
|
LOG(DEBUG) << "Error while loading " << prefixCurrentDatabases
|
|
<< " httpCode: " << result.httpCode()
|
|
<< " errorCode: " << result.errorCode()
|
|
<< " errorMessage: " << result.errorMessage()
|
|
<< " body: " << result.body();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief (re-)load the information about collections from the agency
|
|
/// Usually one does not have to call this directly.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static std::string const prefixPlannedCollections = "Plan/Collections";
|
|
|
|
void ClusterInfo::loadPlannedCollections() {
|
|
uint64_t storedVersion = _plannedCollectionsProt.version;
|
|
MUTEX_LOCKER(mutexLocker, _plannedCollectionsProt.mutex);
|
|
if (_plannedCollectionsProt.version > storedVersion) {
|
|
// Somebody else did, what we intended to do, so just return
|
|
return;
|
|
}
|
|
|
|
// Now contact the agency:
|
|
AgencyCommResult result;
|
|
{
|
|
AgencyCommLocker locker("Plan", "READ");
|
|
|
|
if (locker.successful()) {
|
|
result = _agency.getValues(prefixPlannedCollections, true);
|
|
} else {
|
|
LOG(ERR) << "Error while locking " << prefixPlannedCollections;
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (result.successful()) {
|
|
result.parse(prefixPlannedCollections + "/", false);
|
|
|
|
decltype(_plannedCollections) newCollections;
|
|
decltype(_shards) newShards;
|
|
decltype(_shardKeys) newShardKeys;
|
|
|
|
std::map<std::string, AgencyCommResultEntry>::iterator it =
|
|
result._values.begin();
|
|
|
|
for (; it != result._values.end(); ++it) {
|
|
std::string const key = (*it).first;
|
|
|
|
// each entry consists of a database id and a collection id, separated by
|
|
// '/'
|
|
std::vector<std::string> parts =
|
|
arangodb::basics::StringUtils::split(key, '/');
|
|
|
|
if (parts.size() != 2) {
|
|
// invalid entry
|
|
LOG(WARN) << "found invalid collection key in agency: '" << key << "'";
|
|
continue;
|
|
}
|
|
|
|
std::string const database = parts[0];
|
|
std::string const collection = parts[1];
|
|
|
|
// check whether we have created an entry for the database already
|
|
AllCollections::iterator it2 = newCollections.find(database);
|
|
|
|
if (it2 == newCollections.end()) {
|
|
// not yet, so create an entry for the database
|
|
DatabaseCollections empty;
|
|
newCollections.emplace(std::make_pair(database, empty));
|
|
it2 = newCollections.find(database);
|
|
}
|
|
|
|
// TODO: The Collection info has to store VPack instead of JSON
|
|
TRI_json_t* json = arangodb::basics::VelocyPackHelper::velocyPackToJson(
|
|
(*it).second._vpack->slice());
|
|
// steal the velocypack
|
|
(*it).second._vpack.reset();
|
|
|
|
auto collectionData = std::make_shared<CollectionInfo>(json);
|
|
auto shardKeys = std::make_shared<std::vector<std::string>>(
|
|
collectionData->shardKeys());
|
|
newShardKeys.insert(make_pair(collection, shardKeys));
|
|
auto shardIDs = collectionData->shardIds();
|
|
auto shards = std::make_shared<std::vector<std::string>>();
|
|
for (auto const& p : *shardIDs) {
|
|
shards->push_back(p.first);
|
|
}
|
|
// Sort by the number in the shard ID ("s0000001" for example):
|
|
std::sort(shards->begin(), shards->end(),
|
|
[](std::string const& a, std::string const& b) -> bool {
|
|
return std::strtol(a.c_str() + 1, nullptr, 10) <
|
|
std::strtol(b.c_str() + 1, nullptr, 10);
|
|
});
|
|
newShards.emplace(std::make_pair(collection, shards));
|
|
|
|
// insert the collection into the existing map, insert it under its
|
|
// ID as well as under its name, so that a lookup can be done with
|
|
// either of the two.
|
|
|
|
(*it2).second.emplace(std::make_pair(collection, collectionData));
|
|
(*it2).second.emplace(
|
|
std::make_pair(collectionData->name(), collectionData));
|
|
}
|
|
|
|
// Now set the new value:
|
|
{
|
|
WRITE_LOCKER(writeLocker, _plannedCollectionsProt.lock);
|
|
_plannedCollections.swap(newCollections);
|
|
_shards.swap(newShards);
|
|
_shardKeys.swap(newShardKeys);
|
|
_plannedCollectionsProt.version++; // such that others notice our change
|
|
_plannedCollectionsProt.isValid = true; // will never be reset to false
|
|
}
|
|
return;
|
|
}
|
|
|
|
LOG(ERR) << "Error while loading " << prefixPlannedCollections
|
|
<< " httpCode: " << result.httpCode()
|
|
<< " errorCode: " << result.errorCode()
|
|
<< " errorMessage: " << result.errorMessage()
|
|
<< " body: " << result.body();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief ask about a collection
|
|
/// If it is not found in the cache, the cache is reloaded once
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::shared_ptr<CollectionInfo> ClusterInfo::getCollection(
|
|
DatabaseID const& databaseID, CollectionID const& collectionID) {
|
|
int tries = 0;
|
|
|
|
if (!_plannedCollectionsProt.isValid) {
|
|
loadPlannedCollections();
|
|
++tries;
|
|
}
|
|
|
|
while (true) { // left by break
|
|
{
|
|
READ_LOCKER(readLocker, _plannedCollectionsProt.lock);
|
|
// look up database by id
|
|
AllCollections::const_iterator it = _plannedCollections.find(databaseID);
|
|
|
|
if (it != _plannedCollections.end()) {
|
|
// look up collection by id (or by name)
|
|
DatabaseCollections::const_iterator it2 =
|
|
(*it).second.find(collectionID);
|
|
|
|
if (it2 != (*it).second.end()) {
|
|
return (*it2).second;
|
|
}
|
|
}
|
|
}
|
|
if (++tries >= 2) {
|
|
break;
|
|
}
|
|
|
|
// must load collections outside the lock
|
|
loadPlannedCollections();
|
|
}
|
|
|
|
return std::make_shared<CollectionInfo>();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief get properties of a collection
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
arangodb::VocbaseCollectionInfo ClusterInfo::getCollectionProperties(
|
|
CollectionInfo const& collection) {
|
|
arangodb::VocbaseCollectionInfo info(collection);
|
|
return info;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief get properties of a collection
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
VocbaseCollectionInfo ClusterInfo::getCollectionProperties(
|
|
DatabaseID const& databaseID, CollectionID const& collectionID) {
|
|
auto ci = getCollection(databaseID, collectionID);
|
|
return getCollectionProperties(*ci);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief ask about all collections
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::vector<std::shared_ptr<CollectionInfo>> const ClusterInfo::getCollections(
|
|
DatabaseID const& databaseID) {
|
|
std::vector<std::shared_ptr<CollectionInfo>> result;
|
|
|
|
// always reload
|
|
loadPlannedCollections();
|
|
|
|
READ_LOCKER(readLocker, _plannedCollectionsProt.lock);
|
|
// look up database by id
|
|
AllCollections::const_iterator it = _plannedCollections.find(databaseID);
|
|
|
|
if (it == _plannedCollections.end()) {
|
|
return result;
|
|
}
|
|
|
|
// iterate over all collections
|
|
DatabaseCollections::const_iterator it2 = (*it).second.begin();
|
|
while (it2 != (*it).second.end()) {
|
|
char c = (*it2).first[0];
|
|
|
|
if (c < '0' || c > '9') {
|
|
// skip collections indexed by id
|
|
result.push_back((*it2).second);
|
|
}
|
|
|
|
++it2;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief (re-)load the information about current collections from the agency
|
|
/// Usually one does not have to call this directly. Note that this is
|
|
/// necessarily complicated, since here we have to consider information
|
|
/// about all shards of a collection.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static std::string const prefixCurrentCollections = "Current/Collections";
|
|
void ClusterInfo::loadCurrentCollections() {
|
|
uint64_t storedVersion = _currentCollectionsProt.version;
|
|
MUTEX_LOCKER(mutexLocker, _currentCollectionsProt.mutex);
|
|
if (_currentCollectionsProt.version > storedVersion) {
|
|
// Somebody else did, what we intended to do, so just return
|
|
return;
|
|
}
|
|
|
|
// Now contact the agency:
|
|
AgencyCommResult result;
|
|
{
|
|
AgencyCommLocker locker("Current", "READ");
|
|
|
|
if (locker.successful()) {
|
|
result = _agency.getValues(prefixCurrentCollections, true);
|
|
}
|
|
}
|
|
|
|
if (result.successful()) {
|
|
result.parse(prefixCurrentCollections + "/", false);
|
|
|
|
decltype(_currentCollections) newCollections;
|
|
decltype(_shardIds) newShardIds;
|
|
|
|
std::map<std::string, AgencyCommResultEntry>::iterator it =
|
|
result._values.begin();
|
|
|
|
for (; it != result._values.end(); ++it) {
|
|
std::string const key = (*it).first;
|
|
|
|
// each entry consists of a database id, a collection id, and a shardID,
|
|
// separated by '/'
|
|
std::vector<std::string> parts =
|
|
arangodb::basics::StringUtils::split(key, '/');
|
|
|
|
if (parts.size() != 3) {
|
|
// invalid entry
|
|
LOG(WARN) << "found invalid collection key in current in agency: '"
|
|
<< key << "'";
|
|
continue;
|
|
}
|
|
|
|
std::string const database = parts[0];
|
|
std::string const collection = parts[1];
|
|
std::string const shardID = parts[2];
|
|
|
|
// check whether we have created an entry for the database already
|
|
AllCollectionsCurrent::iterator it2 = newCollections.find(database);
|
|
|
|
if (it2 == newCollections.end()) {
|
|
// not yet, so create an entry for the database
|
|
DatabaseCollectionsCurrent empty;
|
|
newCollections.insert(std::make_pair(database, empty));
|
|
it2 = newCollections.find(database);
|
|
}
|
|
|
|
// TODO: The Collection info has to store VPack instead of JSON
|
|
TRI_json_t* json = arangodb::basics::VelocyPackHelper::velocyPackToJson(
|
|
(*it).second._vpack->slice());
|
|
// steal the velocypack
|
|
(*it).second._vpack.reset();
|
|
|
|
// check whether we already have a CollectionInfoCurrent:
|
|
DatabaseCollectionsCurrent::iterator it3 = it2->second.find(collection);
|
|
if (it3 == it2->second.end()) {
|
|
auto collectionDataCurrent =
|
|
std::make_shared<CollectionInfoCurrent>(shardID, json);
|
|
it2->second.insert(make_pair(collection, collectionDataCurrent));
|
|
it3 = it2->second.find(collection);
|
|
} else {
|
|
it3->second->add(shardID, json);
|
|
}
|
|
|
|
// Note that we have only inserted the CollectionInfoCurrent under
|
|
// the collection ID and not under the name! It is not possible
|
|
// to query the current collection info by name. This is because
|
|
// the correct place to hold the current name is in the plan.
|
|
// Thus: Look there and get the collection ID from there. Then
|
|
// ask about the current collection info.
|
|
|
|
// Now take note of this shard and its responsible server:
|
|
auto servers = std::make_shared<std::vector<ServerID>>(
|
|
it3->second->servers(shardID));
|
|
newShardIds.insert(make_pair(shardID, servers));
|
|
}
|
|
|
|
// Now set the new value:
|
|
{
|
|
WRITE_LOCKER(writeLocker, _currentCollectionsProt.lock);
|
|
_currentCollections.swap(newCollections);
|
|
_shardIds.swap(newShardIds);
|
|
_currentCollectionsProt.version++; // such that others notice our change
|
|
_currentCollectionsProt.isValid = true;
|
|
}
|
|
return;
|
|
}
|
|
|
|
LOG(DEBUG) << "Error while loading " << prefixCurrentCollections
|
|
<< " httpCode: " << result.httpCode()
|
|
<< " errorCode: " << result.errorCode()
|
|
<< " errorMessage: " << result.errorMessage()
|
|
<< " body: " << result.body();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief ask about a collection in current. This returns information about
|
|
/// all shards in the collection.
|
|
/// If it is not found in the cache, the cache is reloaded once.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::shared_ptr<CollectionInfoCurrent> ClusterInfo::getCollectionCurrent(
|
|
DatabaseID const& databaseID, CollectionID const& collectionID) {
|
|
int tries = 0;
|
|
|
|
if (!_currentCollectionsProt.isValid) {
|
|
loadCurrentCollections();
|
|
++tries;
|
|
}
|
|
|
|
while (true) {
|
|
{
|
|
READ_LOCKER(readLocker, _currentCollectionsProt.lock);
|
|
// look up database by id
|
|
AllCollectionsCurrent::const_iterator it =
|
|
_currentCollections.find(databaseID);
|
|
|
|
if (it != _currentCollections.end()) {
|
|
// look up collection by id
|
|
DatabaseCollectionsCurrent::const_iterator it2 =
|
|
(*it).second.find(collectionID);
|
|
|
|
if (it2 != (*it).second.end()) {
|
|
return (*it2).second;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (++tries >= 2) {
|
|
break;
|
|
}
|
|
|
|
// must load collections outside the lock
|
|
loadCurrentCollections();
|
|
}
|
|
|
|
return std::make_shared<CollectionInfoCurrent>();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create database in coordinator, the return value is an ArangoDB
|
|
/// error code and the errorMsg is set accordingly. One possible error
|
|
/// is a timeout, a timeout of 0.0 means no timeout.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int ClusterInfo::createDatabaseCoordinator(std::string const& name,
|
|
VPackSlice const& slice,
|
|
std::string& errorMsg,
|
|
double timeout) {
|
|
AgencyComm ac;
|
|
AgencyCommResult res;
|
|
|
|
double const realTimeout = getTimeout(timeout);
|
|
double const endTime = TRI_microtime() + realTimeout;
|
|
double const interval = getPollInterval();
|
|
|
|
{
|
|
AgencyCommLocker locker("Plan", "WRITE");
|
|
|
|
if (!locker.successful()) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_LOCK_PLAN, errorMsg);
|
|
}
|
|
|
|
res = ac.casValue("Plan/Databases/" + name, slice, false, 0.0, realTimeout);
|
|
if (!res.successful()) {
|
|
if (res._statusCode ==
|
|
(int)arangodb::GeneralResponse::ResponseCode::PRECONDITION_FAILED) {
|
|
return setErrormsg(TRI_ERROR_ARANGO_DUPLICATE_NAME, errorMsg);
|
|
}
|
|
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_CREATE_DATABASE_IN_PLAN,
|
|
errorMsg);
|
|
}
|
|
}
|
|
|
|
// Now update our own cache of planned databases:
|
|
loadPlannedDatabases();
|
|
|
|
// Now wait for it to appear and be complete:
|
|
res.clear();
|
|
res = ac.getValues("Current/Version", false);
|
|
if (!res.successful()) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_READ_CURRENT_VERSION,
|
|
errorMsg);
|
|
}
|
|
std::vector<ServerID> DBServers = getCurrentDBServers();
|
|
int count = 0; // this counts, when we have to reload the DBServers
|
|
|
|
std::string where = "Current/Databases/" + name;
|
|
while (TRI_microtime() <= endTime) {
|
|
res.clear();
|
|
res = ac.getValues(where, true);
|
|
if (res.successful() && res.parse(where + "/", false)) {
|
|
if (res._values.size() == DBServers.size()) {
|
|
std::map<std::string, AgencyCommResultEntry>::iterator it;
|
|
std::string tmpMsg = "";
|
|
bool tmpHaveError = false;
|
|
for (it = res._values.begin(); it != res._values.end(); ++it) {
|
|
VPackSlice slice = (*it).second._vpack->slice();
|
|
if (arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
slice, "error", false)) {
|
|
tmpHaveError = true;
|
|
tmpMsg += " DBServer:" + it->first + ":";
|
|
tmpMsg += arangodb::basics::VelocyPackHelper::getStringValue(
|
|
slice, "errorMessage", "");
|
|
if (slice.hasKey("errorNum")) {
|
|
VPackSlice errorNum = slice.get("errorNum");
|
|
if (errorNum.isNumber()) {
|
|
tmpMsg += " (errorNum=";
|
|
tmpMsg += basics::StringUtils::itoa(
|
|
errorNum.getNumericValue<uint32_t>());
|
|
tmpMsg += ")";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (tmpHaveError) {
|
|
errorMsg = "Error in creation of database:" + tmpMsg;
|
|
return TRI_ERROR_CLUSTER_COULD_NOT_CREATE_DATABASE;
|
|
}
|
|
loadCurrentDatabases(); // update our cache
|
|
return setErrormsg(TRI_ERROR_NO_ERROR, errorMsg);
|
|
}
|
|
}
|
|
|
|
res.clear();
|
|
_agencyCallbackRegistry->awaitNextChange("Current/Version", getReloadServerListTimeout() / interval);
|
|
|
|
if (++count >= static_cast<int>(getReloadServerListTimeout() / interval)) {
|
|
// We update the list of DBServers every minute in case one of them
|
|
// was taken away since we last looked. This also helps (slightly)
|
|
// if a new DBServer was added. However, in this case we report
|
|
// success a bit too early, which is not too bad.
|
|
loadCurrentDBServers();
|
|
DBServers = getCurrentDBServers();
|
|
count = 0;
|
|
}
|
|
}
|
|
return setErrormsg(TRI_ERROR_CLUSTER_TIMEOUT, errorMsg);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief drop database in coordinator, the return value is an ArangoDB
|
|
/// error code and the errorMsg is set accordingly. One possible error
|
|
/// is a timeout, a timeout of 0.0 means no timeout.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int ClusterInfo::dropDatabaseCoordinator(std::string const& name,
|
|
std::string& errorMsg,
|
|
double timeout) {
|
|
AgencyComm ac;
|
|
AgencyCommResult res;
|
|
|
|
double const realTimeout = getTimeout(timeout);
|
|
double const endTime = TRI_microtime() + realTimeout;
|
|
double const interval = getPollInterval();
|
|
|
|
{
|
|
AgencyCommLocker locker("Plan", "WRITE");
|
|
|
|
if (!locker.successful()) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_LOCK_PLAN, errorMsg);
|
|
}
|
|
|
|
if (!ac.exists("Plan/Databases/" + name)) {
|
|
return setErrormsg(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND, errorMsg);
|
|
}
|
|
|
|
res = ac.removeValues("Plan/Databases/" + name, false);
|
|
if (!res.successful()) {
|
|
if (res.httpCode() == (int)GeneralResponse::ResponseCode::NOT_FOUND) {
|
|
return setErrormsg(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND, errorMsg);
|
|
}
|
|
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_PLAN,
|
|
errorMsg);
|
|
}
|
|
|
|
res.clear();
|
|
res = ac.removeValues("Plan/Collections/" + name, true);
|
|
|
|
if (!res.successful() &&
|
|
res.httpCode() != (int)GeneralResponse::ResponseCode::NOT_FOUND) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_PLAN,
|
|
errorMsg);
|
|
}
|
|
}
|
|
|
|
// Load our own caches:
|
|
loadPlannedDatabases();
|
|
loadPlannedCollections();
|
|
|
|
// Now wait for it to appear and be complete:
|
|
res.clear();
|
|
res = ac.getValues("Current/Version", false);
|
|
if (!res.successful()) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_READ_CURRENT_VERSION,
|
|
errorMsg);
|
|
}
|
|
|
|
std::string where = "Current/Databases/" + name;
|
|
while (TRI_microtime() <= endTime) {
|
|
res.clear();
|
|
res = ac.getValues(where, true);
|
|
if (res.successful() && res.parse(where + "/", false)) {
|
|
if (res._values.size() == 0) {
|
|
AgencyCommLocker locker("Current", "WRITE");
|
|
if (locker.successful()) {
|
|
res.clear();
|
|
res = ac.removeValues(where, true);
|
|
if (res.successful()) {
|
|
return setErrormsg(TRI_ERROR_NO_ERROR, errorMsg);
|
|
}
|
|
return setErrormsg(
|
|
TRI_ERROR_CLUSTER_COULD_NOT_REMOVE_DATABASE_IN_CURRENT, errorMsg);
|
|
}
|
|
return setErrormsg(TRI_ERROR_NO_ERROR, errorMsg);
|
|
}
|
|
}
|
|
res.clear();
|
|
_agencyCallbackRegistry->awaitNextChange("Current/Version", interval);
|
|
}
|
|
return setErrormsg(TRI_ERROR_CLUSTER_TIMEOUT, errorMsg);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief create collection in coordinator, the return value is an ArangoDB
|
|
/// error code and the errorMsg is set accordingly. One possible error
|
|
/// is a timeout, a timeout of 0.0 means no timeout.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int ClusterInfo::createCollectionCoordinator(std::string const& databaseName,
|
|
std::string const& collectionID,
|
|
uint64_t numberOfShards,
|
|
VPackSlice const& json,
|
|
std::string& errorMsg,
|
|
double timeout) {
|
|
using arangodb::velocypack::Slice;
|
|
|
|
AgencyComm ac;
|
|
|
|
double const realTimeout = getTimeout(timeout);
|
|
double const endTime = TRI_microtime() + realTimeout;
|
|
double const interval = getPollInterval();
|
|
{
|
|
// check if a collection with the same name is already planned
|
|
loadPlannedCollections();
|
|
|
|
READ_LOCKER(readLocker, _plannedCollectionsProt.lock);
|
|
AllCollections::const_iterator it = _plannedCollections.find(databaseName);
|
|
if (it != _plannedCollections.end()) {
|
|
std::string const name =
|
|
arangodb::basics::VelocyPackHelper::getStringValue(json, "name", "");
|
|
|
|
DatabaseCollections::const_iterator it2 = (*it).second.find(name);
|
|
|
|
if (it2 != (*it).second.end()) {
|
|
// collection already exists!
|
|
return TRI_ERROR_ARANGO_DUPLICATE_NAME;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!ac.exists("Plan/Databases/" + databaseName)) {
|
|
return setErrormsg(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND, errorMsg);
|
|
}
|
|
|
|
if (ac.exists("Plan/Collections/" + databaseName + "/" + collectionID)) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COLLECTION_ID_EXISTS, errorMsg);
|
|
}
|
|
|
|
AgencyCommResult result =
|
|
ac.casValue("Plan/Collections/" + databaseName + "/" + collectionID, json,
|
|
false, 0.0, 0.0);
|
|
if (!result.successful()) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_CREATE_COLLECTION_IN_PLAN,
|
|
errorMsg);
|
|
}
|
|
|
|
ac.increaseVersion("Plan/Version");
|
|
|
|
// Update our cache:
|
|
loadPlannedCollections();
|
|
|
|
AgencyCommResult res;
|
|
std::string const where =
|
|
"Current/Collections/" + databaseName + "/" + collectionID;
|
|
while (TRI_microtime() <= endTime) {
|
|
res.clear();
|
|
res = ac.getValues(where, true);
|
|
|
|
LOG(TRACE) << "CREATE OYOYOYOY " << where;
|
|
|
|
if (res.successful() && res.parse(where + "/", false)) {
|
|
LOG(TRACE) << "CREATE IS SUCCESS " << where;
|
|
if (res._values.size() == (size_t)numberOfShards) {
|
|
LOG(TRACE) << "CREATE has number " << where;
|
|
std::string tmpMsg = "";
|
|
bool tmpHaveError = false;
|
|
for (auto const& p : res._values) {
|
|
VPackSlice const slice = p.second._vpack->slice();
|
|
if (arangodb::basics::VelocyPackHelper::getBooleanValue(
|
|
slice, "error", false)) {
|
|
tmpHaveError = true;
|
|
tmpMsg += " shardID:" + p.first + ":";
|
|
tmpMsg += arangodb::basics::VelocyPackHelper::getStringValue(
|
|
slice, "errorMessage", "");
|
|
if (slice.hasKey("errorNum")) {
|
|
VPackSlice const errorNum = slice.get("errorNum");
|
|
if (errorNum.isNumber()) {
|
|
tmpMsg += " (errNum=";
|
|
tmpMsg += basics::StringUtils::itoa(
|
|
errorNum.getNumericValue<uint32_t>());
|
|
tmpMsg += ")";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
LOG(TRACE) << "CREATE PRE LOAD has number " << where;
|
|
loadCurrentCollections();
|
|
LOG(TRACE) << "CREATE POST LOAD has number " << where;
|
|
if (tmpHaveError) {
|
|
errorMsg = "Error in creation of collection:" + tmpMsg;
|
|
LOG(TRACE) << "CREATE KAP0TT " << where;
|
|
return TRI_ERROR_CLUSTER_COULD_NOT_CREATE_COLLECTION;
|
|
}
|
|
LOG(TRACE) << "CREATE OK " << where;
|
|
return setErrormsg(TRI_ERROR_NO_ERROR, errorMsg);
|
|
}
|
|
}
|
|
|
|
res.clear();
|
|
LOG(TRACE) << "JASSSSS " << interval;
|
|
_agencyCallbackRegistry->awaitNextChange("Current/Version", interval);
|
|
LOG(TRACE) << "NNNNJASSSSS " << interval;
|
|
}
|
|
|
|
// LOG(ERR) << "GOT TIMEOUT. NUMBEROFSHARDS: " << numberOfShards;
|
|
return setErrormsg(TRI_ERROR_CLUSTER_TIMEOUT, errorMsg);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief drop collection in coordinator, the return value is an ArangoDB
|
|
/// error code and the errorMsg is set accordingly. One possible error
|
|
/// is a timeout, a timeout of 0.0 means no timeout.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int ClusterInfo::dropCollectionCoordinator(std::string const& databaseName,
|
|
std::string const& collectionID,
|
|
std::string& errorMsg,
|
|
double timeout) {
|
|
AgencyComm ac;
|
|
AgencyCommResult res;
|
|
|
|
double const realTimeout = getTimeout(timeout);
|
|
double const endTime = TRI_microtime() + realTimeout;
|
|
double const interval = getPollInterval();
|
|
|
|
{
|
|
AgencyCommLocker locker("Plan", "WRITE");
|
|
|
|
if (!locker.successful()) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_LOCK_PLAN, errorMsg);
|
|
}
|
|
|
|
if (!ac.exists("Plan/Databases/" + databaseName)) {
|
|
return setErrormsg(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND, errorMsg);
|
|
}
|
|
|
|
res = ac.removeValues(
|
|
"Plan/Collections/" + databaseName + "/" + collectionID, false);
|
|
if (!res.successful()) {
|
|
if (res._statusCode == (int) GeneralResponse::ResponseCode::NOT_FOUND) {
|
|
return setErrormsg(TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND, errorMsg);
|
|
}
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_REMOVE_COLLECTION_IN_PLAN,
|
|
errorMsg);
|
|
}
|
|
}
|
|
|
|
// Update our own cache:
|
|
loadPlannedCollections();
|
|
|
|
// monitor the entry for the collection
|
|
std::string const where =
|
|
"Current/Collections/" + databaseName + "/" + collectionID;
|
|
while (TRI_microtime() <= endTime) {
|
|
res.clear();
|
|
res = ac.getValues(where, true);
|
|
if (res.successful() && res.parse(where + "/", false)) {
|
|
// if there are no more active shards for the collection...
|
|
if (res._values.size() == 0) {
|
|
// ...remove the entire directory for the collection
|
|
AgencyCommLocker locker("Current", "WRITE");
|
|
if (locker.successful()) {
|
|
res.clear();
|
|
res = ac.removeValues(
|
|
"Current/Collections/" + databaseName + "/" + collectionID, true);
|
|
if (res.successful()) {
|
|
return setErrormsg(TRI_ERROR_NO_ERROR, errorMsg);
|
|
}
|
|
return setErrormsg(
|
|
TRI_ERROR_CLUSTER_COULD_NOT_REMOVE_COLLECTION_IN_CURRENT,
|
|
errorMsg);
|
|
}
|
|
loadCurrentCollections();
|
|
return setErrormsg(TRI_ERROR_NO_ERROR, errorMsg);
|
|
}
|
|
}
|
|
|
|
res.clear();
|
|
_agencyCallbackRegistry->awaitNextChange("Current/Version", interval);
|
|
}
|
|
return setErrormsg(TRI_ERROR_CLUSTER_TIMEOUT, errorMsg);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief set collection properties in coordinator
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int ClusterInfo::setCollectionPropertiesCoordinator(
|
|
std::string const& databaseName, std::string const& collectionID,
|
|
VocbaseCollectionInfo const* info) {
|
|
AgencyComm ac;
|
|
AgencyCommResult res;
|
|
|
|
{
|
|
AgencyCommLocker locker("Plan", "WRITE");
|
|
|
|
if (!locker.successful()) {
|
|
return TRI_ERROR_CLUSTER_COULD_NOT_LOCK_PLAN;
|
|
}
|
|
|
|
if (!ac.exists("Plan/Databases/" + databaseName)) {
|
|
return TRI_ERROR_ARANGO_DATABASE_NOT_FOUND;
|
|
}
|
|
|
|
res = ac.getValues("Plan/Collections/" + databaseName + "/" + collectionID,
|
|
false);
|
|
|
|
if (!res.successful()) {
|
|
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
|
|
}
|
|
|
|
res.parse("", false);
|
|
std::map<std::string, AgencyCommResultEntry>::const_iterator it =
|
|
res._values.begin();
|
|
|
|
if (it == res._values.end()) {
|
|
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
|
|
}
|
|
|
|
if (it->second._vpack == nullptr) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
VPackSlice const slice = it->second._vpack->slice();
|
|
if (slice.isNone()) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
VPackBuilder copy;
|
|
try {
|
|
VPackObjectBuilder b(©);
|
|
for (auto const& entry : VPackObjectIterator(slice)) {
|
|
std::string key = entry.key.copyString();
|
|
// Copy all values except the following
|
|
// They are overwritten later
|
|
if (key != "doCompact" && key != "journalSize" &&
|
|
key != "waitForSync" && key != "indexBuckets") {
|
|
copy.add(key, entry.value);
|
|
}
|
|
}
|
|
copy.add("doCompact", VPackValue(info->doCompact()));
|
|
copy.add("journalSize", VPackValue(info->maximalSize()));
|
|
copy.add("waitForSync", VPackValue(info->waitForSync()));
|
|
copy.add("indexBuckets", VPackValue(info->indexBuckets()));
|
|
} catch (...) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
res.clear();
|
|
res = ac.setValue("Plan/Collections/" + databaseName + "/" + collectionID,
|
|
copy.slice(), 0.0);
|
|
}
|
|
|
|
if (res.successful()) {
|
|
loadPlannedCollections();
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief set collection status in coordinator
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int ClusterInfo::setCollectionStatusCoordinator(
|
|
std::string const& databaseName, std::string const& collectionID,
|
|
TRI_vocbase_col_status_e status) {
|
|
AgencyComm ac;
|
|
AgencyCommResult res;
|
|
|
|
{
|
|
AgencyCommLocker locker("Plan", "WRITE");
|
|
|
|
if (!locker.successful()) {
|
|
return TRI_ERROR_CLUSTER_COULD_NOT_LOCK_PLAN;
|
|
}
|
|
|
|
if (!ac.exists("Plan/Databases/" + databaseName)) {
|
|
return TRI_ERROR_ARANGO_DATABASE_NOT_FOUND;
|
|
}
|
|
|
|
res = ac.getValues("Plan/Collections/" + databaseName + "/" + collectionID,
|
|
false);
|
|
|
|
if (!res.successful()) {
|
|
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
|
|
}
|
|
|
|
res.parse("", false);
|
|
std::map<std::string, AgencyCommResultEntry>::const_iterator it =
|
|
res._values.begin();
|
|
|
|
if (it == res._values.end()) {
|
|
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
|
|
}
|
|
|
|
VPackSlice const slice = it->second._vpack->slice();
|
|
if (slice.isNone()) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
TRI_vocbase_col_status_e old = static_cast<TRI_vocbase_col_status_e>(
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<int>(
|
|
slice, "status", static_cast<int>(TRI_VOC_COL_STATUS_CORRUPTED)));
|
|
|
|
if (old == status) {
|
|
// no status change
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
VPackBuilder builder;
|
|
try {
|
|
VPackObjectBuilder b(&builder);
|
|
for (auto const& entry : VPackObjectIterator(slice)) {
|
|
std::string key = entry.key.copyString();
|
|
if (key != "status") {
|
|
builder.add(key, entry.value);
|
|
}
|
|
}
|
|
builder.add("status", VPackValue(status));
|
|
} catch (...) {
|
|
return TRI_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
res.clear();
|
|
res = ac.setValue("Plan/Collections/" + databaseName + "/" + collectionID,
|
|
builder.slice(), 0.0);
|
|
}
|
|
|
|
if (res.successful()) {
|
|
loadPlannedCollections();
|
|
return TRI_ERROR_NO_ERROR;
|
|
}
|
|
|
|
return TRI_ERROR_INTERNAL;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief ensure an index in coordinator.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int ClusterInfo::ensureIndexCoordinator(
|
|
std::string const& databaseName, std::string const& collectionID,
|
|
VPackSlice const& slice, bool create,
|
|
bool (*compare)(VPackSlice const&, VPackSlice const&),
|
|
VPackBuilder& resultBuilder, std::string& errorMsg, double timeout) {
|
|
AgencyComm ac;
|
|
|
|
double const realTimeout = getTimeout(timeout);
|
|
double const endTime = TRI_microtime() + realTimeout;
|
|
double const interval = getPollInterval();
|
|
|
|
TRI_ASSERT(resultBuilder.isEmpty());
|
|
int numberOfShards = 0;
|
|
|
|
// check index id
|
|
uint64_t iid = 0;
|
|
|
|
VPackSlice const idxSlice = slice.get("id");
|
|
if (idxSlice.isString()) {
|
|
// use predefined index id
|
|
iid = arangodb::basics::StringUtils::uint64(idxSlice.copyString());
|
|
}
|
|
|
|
if (iid == 0) {
|
|
// no id set, create a new one!
|
|
iid = uniqid();
|
|
}
|
|
|
|
std::string const idString = arangodb::basics::StringUtils::itoa(iid);
|
|
|
|
std::string const key =
|
|
"Plan/Collections/" + databaseName + "/" + collectionID;
|
|
AgencyCommResult previous = ac.getValues(key, false);
|
|
previous.parse("", false);
|
|
auto it = previous._values.begin();
|
|
bool usePrevious = true;
|
|
if (it == previous._values.end()) {
|
|
LOG(INFO) << "Entry for collection in Plan does not exist!";
|
|
usePrevious = false;
|
|
}
|
|
|
|
loadPlannedCollections();
|
|
VPackBuilder newBuilder;
|
|
// It is possible that between the fetching of the planned collections
|
|
// and the write lock we acquire below something has changed. Therefore
|
|
// we first get the previous value and then do a compare and swap operation.
|
|
{
|
|
AgencyCommLocker locker("Plan", "WRITE");
|
|
|
|
if (!locker.successful()) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_LOCK_PLAN, errorMsg);
|
|
}
|
|
|
|
std::shared_ptr<VPackBuilder> collectionBuilder;
|
|
{
|
|
std::shared_ptr<CollectionInfo> c =
|
|
getCollection(databaseName, collectionID);
|
|
|
|
// Note that nobody is removing this collection in the plan, since
|
|
// we hold the write lock in the agency, therefore it does not matter
|
|
// that getCollection fetches the read lock and releases it before
|
|
// we get it again.
|
|
//
|
|
READ_LOCKER(readLocker, _plannedCollectionsProt.lock);
|
|
|
|
if (c->empty()) {
|
|
return setErrormsg(TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND, errorMsg);
|
|
}
|
|
|
|
std::shared_ptr<VPackBuilder> tmp =
|
|
arangodb::basics::JsonHelper::toVelocyPack(c->getIndexes());
|
|
numberOfShards = c->numberOfShards();
|
|
VPackSlice const indexes = tmp->slice();
|
|
|
|
if (indexes.isArray()) {
|
|
VPackSlice const type = slice.get("type");
|
|
|
|
if (!type.isString()) {
|
|
return setErrormsg(TRI_ERROR_INTERNAL, errorMsg);
|
|
}
|
|
|
|
for (auto const& other : VPackArrayIterator(indexes)) {
|
|
if (arangodb::basics::VelocyPackHelper::compare(
|
|
type, other.get("type"), false) != 0) {
|
|
// compare index types first. they must match
|
|
continue;
|
|
}
|
|
TRI_ASSERT(other.isObject());
|
|
|
|
bool isSame = compare(slice, other);
|
|
|
|
if (isSame) {
|
|
// found an existing index...
|
|
{
|
|
// Copy over all elements in slice.
|
|
VPackObjectBuilder b(&resultBuilder);
|
|
for (auto const& entry : VPackObjectIterator(other)) {
|
|
resultBuilder.add(entry.key.copyString(), entry.value);
|
|
}
|
|
resultBuilder.add("isNewlyCreated", VPackValue(false));
|
|
}
|
|
return setErrormsg(TRI_ERROR_NO_ERROR, errorMsg);
|
|
}
|
|
}
|
|
}
|
|
|
|
// no existing index found.
|
|
if (!create) {
|
|
TRI_ASSERT(resultBuilder.isEmpty());
|
|
return setErrormsg(TRI_ERROR_NO_ERROR, errorMsg);
|
|
}
|
|
|
|
// now create a new index
|
|
collectionBuilder =
|
|
arangodb::basics::JsonHelper::toVelocyPack(c->getJson());
|
|
}
|
|
VPackSlice const collectionSlice = collectionBuilder->slice();
|
|
|
|
if (!collectionSlice.isObject()) {
|
|
return setErrormsg(TRI_ERROR_OUT_OF_MEMORY, errorMsg);
|
|
}
|
|
try {
|
|
VPackObjectBuilder b(&newBuilder);
|
|
// Create a new collection VPack with the new Index
|
|
for (auto const& entry : VPackObjectIterator(collectionSlice)) {
|
|
TRI_ASSERT(entry.key.isString());
|
|
std::string key = entry.key.copyString();
|
|
if (key == "indexes") {
|
|
TRI_ASSERT(entry.value.isArray());
|
|
newBuilder.add(key, VPackValue(VPackValueType::Array));
|
|
// Copy over all indexes known so far
|
|
for (auto const& idx : VPackArrayIterator(entry.value)) {
|
|
newBuilder.add(idx);
|
|
}
|
|
{
|
|
VPackObjectBuilder ob(&newBuilder);
|
|
// Add the new index ignoring "id"
|
|
for (auto const& e : VPackObjectIterator(slice)) {
|
|
TRI_ASSERT(e.key.isString());
|
|
std::string tmpkey = e.key.copyString();
|
|
if (tmpkey != "id") {
|
|
newBuilder.add(tmpkey, e.value);
|
|
}
|
|
}
|
|
newBuilder.add("id", VPackValue(idString));
|
|
}
|
|
newBuilder.close(); // the array
|
|
} else {
|
|
// Plain copy everything else
|
|
newBuilder.add(key, entry.value);
|
|
}
|
|
}
|
|
} catch (...) {
|
|
return setErrormsg(TRI_ERROR_OUT_OF_MEMORY, errorMsg);
|
|
}
|
|
|
|
AgencyCommResult result;
|
|
if (usePrevious) {
|
|
result = ac.casValue(key, it->second._vpack->slice(), newBuilder.slice(),
|
|
0.0, 0.0);
|
|
} else { // only when there is no previous value
|
|
result = ac.setValue(key, newBuilder.slice(), 0.0);
|
|
}
|
|
|
|
if (!result.successful()) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_CREATE_COLLECTION_IN_PLAN,
|
|
errorMsg);
|
|
}
|
|
}
|
|
|
|
// reload our own cache:
|
|
loadPlannedCollections();
|
|
|
|
TRI_ASSERT(numberOfShards > 0);
|
|
|
|
// now wait for the index to appear
|
|
AgencyCommResult res = ac.getValues("Current/Version", false);
|
|
if (!res.successful()) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_READ_CURRENT_VERSION,
|
|
errorMsg);
|
|
}
|
|
|
|
std::string where =
|
|
"Current/Collections/" + databaseName + "/" + collectionID;
|
|
while (TRI_microtime() <= endTime) {
|
|
res.clear();
|
|
res = ac.getValues(where, true);
|
|
if (res.successful() && res.parse(where + "/", false)) {
|
|
if (res._values.size() == (size_t)numberOfShards) {
|
|
std::map<std::string, AgencyCommResultEntry>::iterator it;
|
|
|
|
size_t found = 0;
|
|
for (it = res._values.begin(); it != res._values.end(); ++it) {
|
|
VPackSlice const slice = it->second._vpack->slice();
|
|
if (slice.hasKey("indexes")) {
|
|
VPackSlice const indexes = slice.get("indexes");
|
|
if (!indexes.isArray()) {
|
|
// no list, so our index is not present. we can abort searching
|
|
break;
|
|
}
|
|
|
|
for (auto const& v : VPackArrayIterator(indexes)) {
|
|
// check for errors
|
|
if (hasError(v)) {
|
|
std::string errorMsg = extractErrorMessage((*it).first, v);
|
|
|
|
errorMsg = "Error during index creation: " + errorMsg;
|
|
|
|
// Returns the specific error number if set, or the general
|
|
// error
|
|
// otherwise
|
|
return arangodb::basics::VelocyPackHelper::getNumericValue<int>(
|
|
v, "errorNum", TRI_ERROR_ARANGO_INDEX_CREATION_FAILED);
|
|
}
|
|
|
|
VPackSlice const k = v.get("id");
|
|
|
|
if (!k.isString() || idString != k.copyString()) {
|
|
// this is not our index
|
|
continue;
|
|
}
|
|
|
|
// found our index
|
|
found++;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (found == (size_t)numberOfShards) {
|
|
VPackSlice indexFinder = newBuilder.slice();
|
|
TRI_ASSERT(indexFinder.isObject());
|
|
indexFinder = indexFinder.get("indexes");
|
|
TRI_ASSERT(indexFinder.isArray());
|
|
VPackValueLength l = indexFinder.length();
|
|
indexFinder = indexFinder.at(l - 1); // Get the last index
|
|
TRI_ASSERT(indexFinder.isObject());
|
|
{
|
|
// Copy over all elements in slice.
|
|
VPackObjectBuilder b(&resultBuilder);
|
|
for (auto const& entry : VPackObjectIterator(indexFinder)) {
|
|
resultBuilder.add(entry.key.copyString(), entry.value);
|
|
}
|
|
resultBuilder.add("isNewlyCreated", VPackValue(true));
|
|
}
|
|
loadCurrentCollections();
|
|
|
|
return setErrormsg(TRI_ERROR_NO_ERROR, errorMsg);
|
|
}
|
|
}
|
|
}
|
|
res.clear();
|
|
_agencyCallbackRegistry->awaitNextChange("Current/Version", interval);
|
|
}
|
|
|
|
return setErrormsg(TRI_ERROR_CLUSTER_TIMEOUT, errorMsg);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief drop an index in coordinator.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int ClusterInfo::dropIndexCoordinator(std::string const& databaseName,
|
|
std::string const& collectionID,
|
|
TRI_idx_iid_t iid, std::string& errorMsg,
|
|
double timeout) {
|
|
AgencyComm ac;
|
|
|
|
double const realTimeout = getTimeout(timeout);
|
|
double const endTime = TRI_microtime() + realTimeout;
|
|
double const interval = getPollInterval();
|
|
|
|
int numberOfShards = 0;
|
|
std::string const idString = arangodb::basics::StringUtils::itoa(iid);
|
|
|
|
std::string const key =
|
|
"Plan/Collections/" + databaseName + "/" + collectionID;
|
|
AgencyCommResult previous = ac.getValues(key, false);
|
|
previous.parse("", false);
|
|
auto it = previous._values.begin();
|
|
TRI_ASSERT(it != previous._values.end());
|
|
|
|
loadPlannedCollections();
|
|
// It is possible that between the fetching of the planned collections
|
|
// and the write lock we acquire below something has changed. Therefore
|
|
// we first get the previous value and then do a compare and swap operation.
|
|
{
|
|
AgencyCommLocker locker("Plan", "WRITE");
|
|
|
|
if (!locker.successful()) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_LOCK_PLAN, errorMsg);
|
|
}
|
|
|
|
TRI_json_t* collectionJson = nullptr;
|
|
TRI_json_t const* indexes = nullptr;
|
|
|
|
{
|
|
std::shared_ptr<CollectionInfo> c =
|
|
getCollection(databaseName, collectionID);
|
|
|
|
READ_LOCKER(readLocker, _plannedCollectionsProt.lock);
|
|
|
|
if (c->empty()) {
|
|
return setErrormsg(TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND, errorMsg);
|
|
}
|
|
|
|
indexes = c->getIndexes();
|
|
|
|
if (!TRI_IsArrayJson(indexes)) {
|
|
// no indexes present, so we can't delete our index
|
|
return setErrormsg(TRI_ERROR_ARANGO_INDEX_NOT_FOUND, errorMsg);
|
|
}
|
|
|
|
collectionJson = TRI_CopyJson(TRI_UNKNOWN_MEM_ZONE, c->getJson());
|
|
numberOfShards = c->numberOfShards();
|
|
}
|
|
|
|
if (collectionJson == nullptr) {
|
|
return setErrormsg(TRI_ERROR_OUT_OF_MEMORY, errorMsg);
|
|
}
|
|
|
|
TRI_ASSERT(TRI_IsArrayJson(indexes));
|
|
|
|
// delete previous indexes entry
|
|
TRI_DeleteObjectJson(TRI_UNKNOWN_MEM_ZONE, collectionJson, "indexes");
|
|
|
|
TRI_json_t* copy = TRI_CreateArrayJson(TRI_UNKNOWN_MEM_ZONE);
|
|
|
|
if (copy == nullptr) {
|
|
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, collectionJson);
|
|
return setErrormsg(TRI_ERROR_OUT_OF_MEMORY, errorMsg);
|
|
}
|
|
|
|
bool found = false;
|
|
|
|
// copy remaining indexes back into collection
|
|
size_t const n = TRI_LengthArrayJson(indexes);
|
|
for (size_t i = 0; i < n; ++i) {
|
|
TRI_json_t const* v = TRI_LookupArrayJson(indexes, i);
|
|
TRI_json_t const* id = TRI_LookupObjectJson(v, "id");
|
|
TRI_json_t const* type = TRI_LookupObjectJson(v, "type");
|
|
|
|
if (!TRI_IsStringJson(id) || !TRI_IsStringJson(type)) {
|
|
continue;
|
|
}
|
|
|
|
if (idString == std::string(id->_value._string.data)) {
|
|
// found our index, ignore it when copying
|
|
found = true;
|
|
|
|
std::string const typeString(type->_value._string.data);
|
|
if (typeString == "primary" || typeString == "edge") {
|
|
// must not delete these index types
|
|
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, copy);
|
|
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, collectionJson);
|
|
return setErrormsg(TRI_ERROR_FORBIDDEN, errorMsg);
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
TRI_PushBack3ArrayJson(TRI_UNKNOWN_MEM_ZONE, copy,
|
|
TRI_CopyJson(TRI_UNKNOWN_MEM_ZONE, v));
|
|
}
|
|
|
|
TRI_Insert3ObjectJson(TRI_UNKNOWN_MEM_ZONE, collectionJson, "indexes",
|
|
copy);
|
|
|
|
if (!found) {
|
|
// did not find the sought index
|
|
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, collectionJson);
|
|
return setErrormsg(TRI_ERROR_ARANGO_INDEX_NOT_FOUND, errorMsg);
|
|
}
|
|
|
|
auto tmp = arangodb::basics::JsonHelper::toVelocyPack(collectionJson);
|
|
AgencyCommResult result =
|
|
ac.casValue(key, it->second._vpack->slice(), tmp->slice(), 0.0, 0.0);
|
|
|
|
TRI_FreeJson(TRI_UNKNOWN_MEM_ZONE, collectionJson);
|
|
|
|
if (!result.successful()) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_CREATE_COLLECTION_IN_PLAN,
|
|
errorMsg);
|
|
}
|
|
}
|
|
|
|
// load our own cache:
|
|
loadPlannedCollections();
|
|
|
|
TRI_ASSERT(numberOfShards > 0);
|
|
|
|
// now wait for the index to disappear
|
|
AgencyCommResult res = ac.getValues("Current/Version", false);
|
|
if (!res.successful()) {
|
|
return setErrormsg(TRI_ERROR_CLUSTER_COULD_NOT_READ_CURRENT_VERSION,
|
|
errorMsg);
|
|
}
|
|
|
|
std::string where =
|
|
"Current/Collections/" + databaseName + "/" + collectionID;
|
|
while (TRI_microtime() <= endTime) {
|
|
res.clear();
|
|
res = ac.getValues(where, true);
|
|
if (res.successful() && res.parse(where + "/", false)) {
|
|
if (res._values.size() == (size_t)numberOfShards) {
|
|
std::map<std::string, AgencyCommResultEntry>::iterator it;
|
|
|
|
bool found = false;
|
|
for (it = res._values.begin(); it != res._values.end(); ++it) {
|
|
VPackSlice const slice = it->second._vpack->slice();
|
|
VPackSlice const indexes = slice.get("indexes");
|
|
|
|
if (indexes.isArray()) {
|
|
for (auto const& v : VPackArrayIterator(indexes)) {
|
|
if (v.isObject()) {
|
|
VPackSlice const k = v.get("id");
|
|
if (k.isString() && idString == k.copyString()) {
|
|
// still found the index in some shard
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (found) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!found) {
|
|
loadCurrentCollections();
|
|
return setErrormsg(TRI_ERROR_NO_ERROR, errorMsg);
|
|
}
|
|
}
|
|
}
|
|
|
|
res.clear();
|
|
_agencyCallbackRegistry->awaitNextChange("Current/Version", interval);
|
|
}
|
|
|
|
return setErrormsg(TRI_ERROR_CLUSTER_TIMEOUT, errorMsg);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief (re-)load the information about servers from the agency
|
|
/// Usually one does not have to call this directly.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static std::string const prefixServers = "Current/ServersRegistered";
|
|
|
|
void ClusterInfo::loadServers() {
|
|
uint64_t storedVersion = _serversProt.version;
|
|
MUTEX_LOCKER(mutexLocker, _serversProt.mutex);
|
|
if (_serversProt.version > storedVersion) {
|
|
// Somebody else did, what we intended to do, so just return
|
|
return;
|
|
}
|
|
|
|
// Now contact the agency:
|
|
AgencyCommResult result;
|
|
{
|
|
AgencyCommLocker locker("Current", "READ");
|
|
|
|
if (locker.successful()) {
|
|
result = _agency.getValues(prefixServers, true);
|
|
}
|
|
}
|
|
|
|
if (result.successful()) {
|
|
result.parse(prefixServers + "/", false);
|
|
|
|
decltype(_servers) newServers;
|
|
|
|
std::map<std::string, AgencyCommResultEntry>::const_iterator it =
|
|
result._values.begin();
|
|
|
|
while (it != result._values.end()) {
|
|
VPackSlice const slice = it->second._vpack->slice();
|
|
if (slice.isObject() && slice.hasKey("endpoint")) {
|
|
std::string server = arangodb::basics::VelocyPackHelper::getStringValue(
|
|
slice, "endpoint", "");
|
|
newServers.emplace(std::make_pair((*it).first, server));
|
|
}
|
|
++it;
|
|
}
|
|
|
|
// Now set the new value:
|
|
{
|
|
WRITE_LOCKER(writeLocker, _serversProt.lock);
|
|
_servers.swap(newServers);
|
|
_serversProt.version++; // such that others notice our change
|
|
_serversProt.isValid = true; // will never be reset to false
|
|
}
|
|
return;
|
|
}
|
|
|
|
LOG(DEBUG) << "Error while loading " << prefixServers
|
|
<< " httpCode: " << result.httpCode()
|
|
<< " errorCode: " << result.errorCode()
|
|
<< " errorMessage: " << result.errorMessage()
|
|
<< " body: " << result.body();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief find the endpoint of a server from its ID.
|
|
/// If it is not found in the cache, the cache is reloaded once, if
|
|
/// it is still not there an empty string is returned as an error.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::string ClusterInfo::getServerEndpoint(ServerID const& serverID) {
|
|
int tries = 0;
|
|
|
|
if (!_serversProt.isValid) {
|
|
loadServers();
|
|
tries++;
|
|
}
|
|
|
|
while (true) {
|
|
{
|
|
READ_LOCKER(readLocker, _serversProt.lock);
|
|
// _servers is a map-type <ServerId, std::string>
|
|
auto it = _servers.find(serverID);
|
|
|
|
if (it != _servers.end()) {
|
|
return (*it).second;
|
|
}
|
|
}
|
|
|
|
if (++tries >= 2) {
|
|
break;
|
|
}
|
|
|
|
// must call loadServers outside the lock
|
|
loadServers();
|
|
}
|
|
|
|
return std::string("");
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief find the ID of a server from its endpoint.
|
|
/// If it is not found in the cache, the cache is reloaded once, if
|
|
/// it is still not there an empty string is returned as an error.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::string ClusterInfo::getServerName(std::string const& endpoint) {
|
|
int tries = 0;
|
|
|
|
if (!_serversProt.isValid) {
|
|
loadServers();
|
|
tries++;
|
|
}
|
|
|
|
while (true) {
|
|
{
|
|
READ_LOCKER(readLocker, _serversProt.lock);
|
|
for (auto const& it : _servers) {
|
|
if (it.second == endpoint) {
|
|
return it.first;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (++tries >= 2) {
|
|
break;
|
|
}
|
|
|
|
// must call loadServers outside the lock
|
|
loadServers();
|
|
}
|
|
|
|
return std::string("");
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief (re-)load the information about all coordinators from the agency
|
|
/// Usually one does not have to call this directly.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static std::string const prefixCurrentCoordinators = "Current/Coordinators";
|
|
|
|
void ClusterInfo::loadCurrentCoordinators() {
|
|
uint64_t storedVersion = _coordinatorsProt.version;
|
|
MUTEX_LOCKER(mutexLocker, _coordinatorsProt.mutex);
|
|
if (_coordinatorsProt.version > storedVersion) {
|
|
// Somebody else did, what we intended to do, so just return
|
|
return;
|
|
}
|
|
|
|
// Now contact the agency:
|
|
AgencyCommResult result;
|
|
{
|
|
AgencyCommLocker locker("Current", "READ");
|
|
|
|
if (locker.successful()) {
|
|
result = _agency.getValues(prefixCurrentCoordinators, true);
|
|
}
|
|
}
|
|
|
|
if (result.successful()) {
|
|
result.parse(prefixCurrentCoordinators + "/", false);
|
|
|
|
decltype(_coordinators) newCoordinators;
|
|
|
|
std::map<std::string, AgencyCommResultEntry>::const_iterator it =
|
|
result._values.begin();
|
|
|
|
for (; it != result._values.end(); ++it) {
|
|
VPackSlice const slice = it->second._vpack->slice();
|
|
newCoordinators.emplace(std::make_pair(
|
|
(*it).first,
|
|
arangodb::basics::VelocyPackHelper::getStringValue(slice, "")));
|
|
}
|
|
|
|
// Now set the new value:
|
|
{
|
|
WRITE_LOCKER(writeLocker, _coordinatorsProt.lock);
|
|
_coordinators.swap(newCoordinators);
|
|
_coordinatorsProt.version++; // such that others notice our change
|
|
_coordinatorsProt.isValid = true; // will never be reset to false
|
|
}
|
|
return;
|
|
}
|
|
|
|
LOG(DEBUG) << "Error while loading " << prefixCurrentCoordinators
|
|
<< " httpCode: " << result.httpCode()
|
|
<< " errorCode: " << result.errorCode()
|
|
<< " errorMessage: " << result.errorMessage()
|
|
<< " body: " << result.body();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief (re-)load the information about all DBservers from the agency
|
|
/// Usually one does not have to call this directly.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static std::string const prefixCurrentDBServers = "Current/DBServers";
|
|
|
|
void ClusterInfo::loadCurrentDBServers() {
|
|
uint64_t storedVersion = _DBServersProt.version;
|
|
MUTEX_LOCKER(mutexLocker, _DBServersProt.mutex);
|
|
if (_DBServersProt.version > storedVersion) {
|
|
// Somebody else did, what we intended to do, so just return
|
|
return;
|
|
}
|
|
|
|
// Now contact the agency:
|
|
AgencyCommResult result;
|
|
{
|
|
AgencyCommLocker locker("Current", "READ");
|
|
|
|
if (locker.successful()) {
|
|
result = _agency.getValues(prefixCurrentDBServers, true);
|
|
}
|
|
}
|
|
|
|
if (result.successful()) {
|
|
result.parse(prefixCurrentDBServers + "/", false);
|
|
|
|
decltype(_DBServers) newDBServers;
|
|
|
|
std::map<std::string, AgencyCommResultEntry>::const_iterator it =
|
|
result._values.begin();
|
|
|
|
for (; it != result._values.end(); ++it) {
|
|
VPackSlice const slice = it->second._vpack->slice();
|
|
newDBServers.emplace(std::make_pair(
|
|
(*it).first,
|
|
arangodb::basics::VelocyPackHelper::getStringValue(slice, "")));
|
|
}
|
|
|
|
// Now set the new value:
|
|
{
|
|
WRITE_LOCKER(writeLocker, _DBServersProt.lock);
|
|
_DBServers.swap(newDBServers);
|
|
_DBServersProt.version++; // such that others notice our change
|
|
_DBServersProt.isValid = true; // will never be reset to false
|
|
}
|
|
return;
|
|
}
|
|
|
|
LOG(DEBUG) << "Error while loading " << prefixCurrentDBServers
|
|
<< " httpCode: " << result.httpCode()
|
|
<< " errorCode: " << result.errorCode()
|
|
<< " errorMessage: " << result.errorMessage()
|
|
<< " body: " << result.body();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief return a list of all DBServers in the cluster that have
|
|
/// currently registered
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::vector<ServerID> ClusterInfo::getCurrentDBServers() {
|
|
std::vector<ServerID> result;
|
|
int tries = 0;
|
|
|
|
if (!_DBServersProt.isValid) {
|
|
loadCurrentDBServers();
|
|
tries++;
|
|
}
|
|
while (true) {
|
|
{
|
|
// return a consistent state of servers
|
|
READ_LOCKER(readLocker, _DBServersProt.lock);
|
|
|
|
result.reserve(_DBServers.size());
|
|
|
|
for (auto& it : _DBServers) {
|
|
result.emplace_back(it.first);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
if (++tries >= 2) {
|
|
break;
|
|
}
|
|
|
|
// loadCurrentDBServers needs the write lock
|
|
loadCurrentDBServers();
|
|
}
|
|
|
|
// note that the result will be empty if we get here
|
|
return result;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief lookup the server's endpoint by scanning Target/MapIDToEnpdoint for
|
|
/// our id
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
static std::string const prefixTargetServerEndpoint = "Target/MapIDToEndpoint/";
|
|
|
|
std::string ClusterInfo::getTargetServerEndpoint(ServerID const& serverID) {
|
|
AgencyCommResult result;
|
|
|
|
// fetch value at Target/MapIDToEndpoint
|
|
{
|
|
AgencyCommLocker locker("Target", "READ");
|
|
|
|
if (locker.successful()) {
|
|
result = _agency.getValues(prefixTargetServerEndpoint + serverID, false);
|
|
}
|
|
}
|
|
|
|
if (result.successful()) {
|
|
result.parse(prefixTargetServerEndpoint, false);
|
|
|
|
// check if we can find ourselves in the list returned by the agency
|
|
std::map<std::string, AgencyCommResultEntry>::const_iterator it =
|
|
result._values.find(serverID);
|
|
|
|
if (it != result._values.end()) {
|
|
VPackSlice const slice = it->second._vpack->slice();
|
|
return arangodb::basics::VelocyPackHelper::getStringValue(slice, "");
|
|
}
|
|
}
|
|
|
|
// not found
|
|
return "";
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief find the servers who are responsible for a shard (one leader
|
|
/// and multiple followers)
|
|
/// If it is not found in the cache, the cache is reloaded once, if
|
|
/// it is still not there an empty string is returned as an error.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::shared_ptr<std::vector<ServerID>> ClusterInfo::getResponsibleServer(
|
|
ShardID const& shardID) {
|
|
int tries = 0;
|
|
|
|
if (!_currentCollectionsProt.isValid) {
|
|
loadCurrentCollections();
|
|
tries++;
|
|
}
|
|
|
|
while (true) {
|
|
{
|
|
READ_LOCKER(readLocker, _currentCollectionsProt.lock);
|
|
// _shardIds is a map-type <ShardId,
|
|
// std::shared_ptr<std::vector<ServerId>>>
|
|
auto it = _shardIds.find(shardID);
|
|
|
|
if (it != _shardIds.end()) {
|
|
return (*it).second;
|
|
}
|
|
}
|
|
|
|
if (++tries >= 2) {
|
|
break;
|
|
}
|
|
|
|
// must load collections outside the lock
|
|
loadCurrentCollections();
|
|
}
|
|
|
|
return std::make_shared<std::vector<ServerID>>();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief find the shard list of a collection, sorted numerically
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::shared_ptr<std::vector<ShardID>> ClusterInfo::getShardList(
|
|
CollectionID const& collectionID) {
|
|
if (!_plannedCollectionsProt.isValid) {
|
|
loadPlannedCollections();
|
|
}
|
|
|
|
int tries = 0;
|
|
while (true) {
|
|
{
|
|
// Get the sharding keys and the number of shards:
|
|
READ_LOCKER(readLocker, _plannedCollectionsProt.lock);
|
|
// _shards is a map-type <CollectionId, shared_ptr<vector<string>>>
|
|
auto it = _shards.find(collectionID);
|
|
|
|
if (it != _shards.end()) {
|
|
return it->second;
|
|
}
|
|
}
|
|
if (++tries >= 2) {
|
|
return std::make_shared<std::vector<ShardID>>();
|
|
}
|
|
loadPlannedCollections();
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief find the shard that is responsible for a document, which is given
|
|
/// as a VelocyPackSlice.
|
|
///
|
|
/// There are two modes, one assumes that the document is given as a
|
|
/// whole (`docComplete`==`true`), in this case, the non-existence of
|
|
/// values for some of the sharding attributes is silently ignored
|
|
/// and treated as if these values were `null`. In the second mode
|
|
/// (`docComplete`==false) leads to an error which is reported by
|
|
/// returning TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND, which is the only
|
|
/// error code that can be returned.
|
|
///
|
|
/// In either case, if the collection is found, the variable
|
|
/// shardID is set to the ID of the responsible shard and the flag
|
|
/// `usesDefaultShardingAttributes` is used set to `true` if and only if
|
|
/// `_key` is the one and only sharding attribute.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
int ClusterInfo::getResponsibleShard(CollectionID const& collectionID,
|
|
VPackSlice slice, bool docComplete,
|
|
ShardID& shardID,
|
|
bool& usesDefaultShardingAttributes,
|
|
std::string const& key) {
|
|
// Note that currently we take the number of shards and the shardKeys
|
|
// from Plan, since they are immutable. Later we will have to switch
|
|
// this to Current, when we allow to add and remove shards.
|
|
if (!_plannedCollectionsProt.isValid) {
|
|
loadPlannedCollections();
|
|
}
|
|
|
|
int tries = 0;
|
|
std::shared_ptr<std::vector<std::string>> shardKeysPtr;
|
|
std::shared_ptr<std::vector<ShardID>> shards;
|
|
bool found = false;
|
|
|
|
while (true) {
|
|
{
|
|
// Get the sharding keys and the number of shards:
|
|
READ_LOCKER(readLocker, _plannedCollectionsProt.lock);
|
|
// _shards is a map-type <CollectionId, shared_ptr<vector<string>>>
|
|
auto it = _shards.find(collectionID);
|
|
|
|
if (it != _shards.end()) {
|
|
shards = it->second;
|
|
// _shardKeys is a map-type <CollectionID, shared_ptr<vector<string>>>
|
|
auto it2 = _shardKeys.find(collectionID);
|
|
if (it2 != _shardKeys.end()) {
|
|
shardKeysPtr = it2->second;
|
|
usesDefaultShardingAttributes =
|
|
shardKeysPtr->size() == 1 &&
|
|
shardKeysPtr->at(0) == TRI_VOC_ATTRIBUTE_KEY;
|
|
found = true;
|
|
break; // all OK
|
|
}
|
|
}
|
|
}
|
|
if (++tries >= 2) {
|
|
break;
|
|
}
|
|
loadPlannedCollections();
|
|
}
|
|
|
|
if (!found) {
|
|
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
|
|
}
|
|
|
|
int error = TRI_ERROR_NO_ERROR;
|
|
uint64_t hash = arangodb::basics::VelocyPackHelper::hashByAttributes(
|
|
slice, *shardKeysPtr, docComplete, error, key);
|
|
static char const* magicPhrase =
|
|
"Foxx you have stolen the goose, give she back again!";
|
|
static size_t const len = 52;
|
|
// To improve our hash function:
|
|
hash = TRI_FnvHashBlock(hash, magicPhrase, len);
|
|
|
|
shardID = shards->at(hash % shards->size());
|
|
return error;
|
|
}
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief find the shard that is responsible for a document, which is given
|
|
/// as a TRI_json_t const*.
|
|
///
|
|
/// There are two modes, one assumes that the document is given as a
|
|
/// whole (`docComplete`==`true`), in this case, the non-existence of
|
|
/// values for some of the sharding attributes is silently ignored
|
|
/// and treated as if these values were `null`. In the second mode
|
|
/// (`docComplete`==false) leads to an error which is reported by
|
|
/// returning TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND, which is the only
|
|
/// error code that can be returned.
|
|
///
|
|
/// In either case, if the collection is found, the variable
|
|
/// shardID is set to the ID of the responsible shard and the flag
|
|
/// `usesDefaultShardingAttributes` is used set to `true` if and only if
|
|
/// `_key` is the one and only sharding attribute.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
int ClusterInfo::getResponsibleShard(CollectionID const& collectionID,
|
|
TRI_json_t const* json, bool docComplete,
|
|
ShardID& shardID,
|
|
bool& usesDefaultShardingAttributes) {
|
|
// Note that currently we take the number of shards and the shardKeys
|
|
// from Plan, since they are immutable. Later we will have to switch
|
|
// this to Current, when we allow to add and remove shards.
|
|
if (!_plannedCollectionsProt.isValid) {
|
|
loadPlannedCollections();
|
|
}
|
|
|
|
int tries = 0;
|
|
std::shared_ptr<std::vector<std::string>> shardKeysPtr;
|
|
std::unique_ptr<char const* []> shardKeys;
|
|
std::shared_ptr<std::vector<ShardID>> shards;
|
|
bool found = false;
|
|
|
|
while (true) {
|
|
{
|
|
// Get the sharding keys and the number of shards:
|
|
READ_LOCKER(readLocker, _plannedCollectionsProt.lock);
|
|
// _shards is a map-type <CollectionId, shared_ptr<vector<string>>>
|
|
auto it = _shards.find(collectionID);
|
|
|
|
if (it != _shards.end()) {
|
|
shards = it->second;
|
|
// _shardKeys is a map-type <CollectionID, shared_ptr<vector<string>>>
|
|
auto it2 = _shardKeys.find(collectionID);
|
|
if (it2 != _shardKeys.end()) {
|
|
shardKeysPtr = it2->second;
|
|
shardKeys.reset(new char const*[shardKeysPtr->size()]);
|
|
size_t i;
|
|
for (i = 0; i < shardKeysPtr->size(); ++i) {
|
|
shardKeys[i] = shardKeysPtr->at(i).c_str();
|
|
}
|
|
usesDefaultShardingAttributes =
|
|
shardKeysPtr->size() == 1 &&
|
|
shardKeysPtr->at(0) == TRI_VOC_ATTRIBUTE_KEY;
|
|
found = true;
|
|
break; // all OK
|
|
}
|
|
}
|
|
}
|
|
if (++tries >= 2) {
|
|
break;
|
|
}
|
|
loadPlannedCollections();
|
|
}
|
|
|
|
if (!found) {
|
|
return TRI_ERROR_ARANGO_COLLECTION_NOT_FOUND;
|
|
}
|
|
|
|
int error;
|
|
uint64_t hash = TRI_HashJsonByAttributes(
|
|
json, shardKeys.get(), (int)shardKeysPtr->size(), docComplete, error);
|
|
static char const* magicPhrase =
|
|
"Foxx you have stolen the goose, give she back again!";
|
|
static size_t const len = 52;
|
|
// To improve our hash function:
|
|
hash = TRI_FnvHashBlock(hash, magicPhrase, len);
|
|
|
|
shardID = shards->at(hash % shards->size());
|
|
return error;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief return the list of coordinator server names
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::vector<ServerID> ClusterInfo::getCurrentCoordinators() {
|
|
std::vector<ServerID> result;
|
|
int tries = 0;
|
|
|
|
if (!_coordinatorsProt.isValid) {
|
|
loadCurrentCoordinators();
|
|
tries++;
|
|
}
|
|
while (true) {
|
|
{
|
|
// return a consistent state of servers
|
|
READ_LOCKER(readLocker, _coordinatorsProt.lock);
|
|
|
|
result.reserve(_coordinators.size());
|
|
|
|
for (auto& it : _coordinators) {
|
|
result.emplace_back(it.first);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
if (++tries >= 2) {
|
|
break;
|
|
}
|
|
|
|
// loadCurrentCoordinators needs the write lock
|
|
loadCurrentCoordinators();
|
|
}
|
|
|
|
// note that the result will be empty if we get here
|
|
return result;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
/// @brief invalidate current
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
void ClusterInfo::invalidateCurrent() {
|
|
WRITE_LOCKER(writeLocker, _currentCollectionsProt.lock);
|
|
_currentCollectionsProt.isValid = false;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief get information about current followers of a shard.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::shared_ptr<std::vector<ServerID> const> FollowerInfo::get() {
|
|
MUTEX_LOCKER(locker, _mutex);
|
|
return _followers;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief change JSON under
|
|
/// Current/Collection/<DB-name>/<Collection-ID>/<shard-ID>
|
|
/// to add or remove a serverID, if add flag is true, the entry is added
|
|
/// (if it is not yet there), otherwise the entry is removed (if it was
|
|
/// there).
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
VPackBuilder newShardEntry(VPackSlice oldValue, ServerID const& sid, bool add) {
|
|
VPackBuilder newValue;
|
|
VPackSlice servers;
|
|
{
|
|
VPackObjectBuilder b(&newValue);
|
|
// Now need to find the `servers` attribute, which is a list:
|
|
for (auto const& it : VPackObjectIterator(oldValue)) {
|
|
if (it.key.isEqualString("servers")) {
|
|
servers = it.value;
|
|
} else {
|
|
newValue.add(it.key);
|
|
newValue.add(it.value);
|
|
}
|
|
}
|
|
newValue.add(VPackValue("servers"));
|
|
if (servers.isArray() && servers.length() > 0) {
|
|
VPackArrayBuilder bb(&newValue);
|
|
newValue.add(servers[0]);
|
|
VPackArrayIterator it(servers);
|
|
bool done = false;
|
|
for (++it; it.valid(); ++it) {
|
|
if ((*it).isEqualString(sid)) {
|
|
if (add) {
|
|
newValue.add(*it);
|
|
done = true;
|
|
}
|
|
} else {
|
|
newValue.add(*it);
|
|
}
|
|
}
|
|
if (add && !done) {
|
|
newValue.add(VPackValue(sid));
|
|
}
|
|
} else {
|
|
VPackArrayBuilder bb(&newValue);
|
|
newValue.add(VPackValue(ServerState::instance()->getId()));
|
|
if (add) {
|
|
newValue.add(VPackValue(sid));
|
|
}
|
|
}
|
|
}
|
|
return newValue;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief add a follower to a shard, this is only done by the server side
|
|
/// of the "get-in-sync" capabilities. This reports to the agency under
|
|
/// `/Current` but in asynchronous "fire-and-forget" way.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void FollowerInfo::add(ServerID const& sid) {
|
|
MUTEX_LOCKER(locker, _mutex);
|
|
|
|
// Fully copy the vector:
|
|
auto v = std::make_shared<std::vector<ServerID>>(*_followers);
|
|
v->push_back(sid); // add a single entry
|
|
_followers = v; // will cast to std::vector<ServerID> const
|
|
// Now tell the agency, path is
|
|
// Current/Collections/<dbName>/<collectionID>/<shardID>
|
|
std::string path = "Current/Collections/";
|
|
path += _docColl->_vocbase->_name;
|
|
path += "/";
|
|
path += std::to_string(_docColl->_info.planId());
|
|
path += "/";
|
|
path += _docColl->_info.name();
|
|
AgencyComm ac;
|
|
double startTime = TRI_microtime();
|
|
bool success = false;
|
|
do {
|
|
AgencyCommResult res = ac.getValues(path, false);
|
|
if (res.successful()) {
|
|
if (res.parse("", false)) {
|
|
auto it = res._values.begin();
|
|
if (it != res._values.end() && it->first == path) {
|
|
VPackSlice oldValue = it->second._vpack->slice();
|
|
auto newValue = newShardEntry(oldValue, sid, true);
|
|
AgencyCommResult res2 =
|
|
ac.casValue(path, oldValue, newValue.slice(), 0, 0);
|
|
if (res2.successful()) {
|
|
success = true;
|
|
break; //
|
|
} else {
|
|
LOG(WARN) << "FollowerInfo::add, could not cas key " << path;
|
|
}
|
|
} else {
|
|
LOG(ERR) << "FollowerInfo::add, did not find key " << path
|
|
<< " in agency.";
|
|
}
|
|
} else {
|
|
LOG(ERR) << "FollowerInfo::add, could not parse " << path
|
|
<< " in agency.";
|
|
}
|
|
} else {
|
|
LOG(ERR) << "FollowerInfo::add, could not read " << path << " in agency.";
|
|
}
|
|
usleep(500000);
|
|
} while (TRI_microtime() < startTime + 30);
|
|
if (!success) {
|
|
LOG(ERR) << "FollowerInfo::add, timeout in agency operation for key "
|
|
<< path;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
/// @brief remove a follower from a shard, this is only done by the
|
|
/// server if a synchronous replication request fails. This reports to
|
|
/// the agency under `/Current` but in asynchronous "fire-and-forget"
|
|
/// way. The method fails silently, if the follower information has
|
|
/// since been dropped (see `dropFollowerInfo` below).
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
void FollowerInfo::remove(ServerID const& sid) {
|
|
MUTEX_LOCKER(locker, _mutex);
|
|
|
|
auto v = std::make_shared<std::vector<ServerID>>();
|
|
v->reserve(_followers->size() - 1);
|
|
for (auto const& i : *_followers) {
|
|
if (i != sid) {
|
|
v->push_back(i);
|
|
}
|
|
}
|
|
_followers = v; // will cast to std::vector<ServerID> const
|
|
// Now tell the agency, path is
|
|
// Current/Collections/<dbName>/<collectionID>/<shardID>
|
|
std::string path = "Current/Collections/";
|
|
path += _docColl->_vocbase->_name;
|
|
path += "/";
|
|
path += std::to_string(_docColl->_info.planId());
|
|
path += "/";
|
|
path += _docColl->_info.name();
|
|
AgencyComm ac;
|
|
double startTime = TRI_microtime();
|
|
bool success = false;
|
|
do {
|
|
AgencyCommResult res = ac.getValues(path, false);
|
|
if (res.successful()) {
|
|
if (res.parse("", false)) {
|
|
auto it = res._values.begin();
|
|
if (it != res._values.end() && it->first == path) {
|
|
VPackSlice oldValue = it->second._vpack->slice();
|
|
auto newValue = newShardEntry(oldValue, sid, false);
|
|
AgencyCommResult res2 =
|
|
ac.casValue(path, oldValue, newValue.slice(), 0, 0);
|
|
if (res2.successful()) {
|
|
success = true;
|
|
break; //
|
|
} else {
|
|
LOG(WARN) << "FollowerInfo::remove, could not cas key " << path;
|
|
}
|
|
} else {
|
|
LOG(ERR) << "FollowerInfo::remove, did not find key " << path
|
|
<< " in agency.";
|
|
}
|
|
} else {
|
|
LOG(ERR) << "FollowerInfo::remove, could not parse " << path
|
|
<< " in agency.";
|
|
}
|
|
} else {
|
|
LOG(ERR) << "FollowerInfo::remove, could not read " << path
|
|
<< " in agency.";
|
|
}
|
|
usleep(500000);
|
|
} while (TRI_microtime() < startTime + 30);
|
|
if (!success) {
|
|
LOG(ERR) << "FollowerInfo::remove, timeout in agency operation for key "
|
|
<< path;
|
|
}
|
|
}
|