mirror of https://gitee.com/bigwinds/arangodb
1684 lines
61 KiB
C++
1684 lines
61 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
/// DISCLAIMER
|
|
///
|
|
/// Copyright 2016 ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// Licensed under the Apache License, Version 2.0 (the "License");
|
|
/// you may not use this file except in compliance with the License.
|
|
/// You may obtain a copy of the License at
|
|
///
|
|
/// http://www.apache.org/licenses/LICENSE-2.0
|
|
///
|
|
/// Unless required by applicable law or agreed to in writing, software
|
|
/// distributed under the License is distributed on an "AS IS" BASIS,
|
|
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
/// See the License for the specific language governing permissions and
|
|
/// limitations under the License.
|
|
///
|
|
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
|
///
|
|
/// @author Jan Steemann
|
|
/// @author Dan Larkin-York
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "RestoreFeature.h"
|
|
|
|
#include <velocypack/Builder.h>
|
|
#include <velocypack/Collection.h>
|
|
#include <velocypack/Iterator.h>
|
|
#include <velocypack/StringRef.h>
|
|
#include <velocypack/velocypack-aliases.h>
|
|
#include <boost/algorithm/clamp.hpp>
|
|
|
|
#include <chrono>
|
|
#include <thread>
|
|
|
|
#include "ApplicationFeatures/ApplicationServer.h"
|
|
#include "Basics/FileUtils.h"
|
|
#include "Basics/Result.h"
|
|
#include "Basics/StaticStrings.h"
|
|
#include "Basics/StringUtils.h"
|
|
#include "Basics/VelocyPackHelper.h"
|
|
#include "Logger/Logger.h"
|
|
#include "ProgramOptions/ProgramOptions.h"
|
|
#include "Shell/ClientFeature.h"
|
|
#include "SimpleHttpClient/GeneralClientConnection.h"
|
|
#include "SimpleHttpClient/SimpleHttpClient.h"
|
|
#include "SimpleHttpClient/SimpleHttpResult.h"
|
|
#include "Ssl/SslInterface.h"
|
|
|
|
#ifdef USE_ENTERPRISE
|
|
#include "Enterprise/Encryption/EncryptionFeature.h"
|
|
#endif
|
|
|
|
namespace {
|
|
|
|
/// @brief name of the feature to report to application server
|
|
constexpr auto FeatureName = "Restore";
|
|
|
|
/// @brief return the target replication factor for the specified collection
|
|
uint64_t getReplicationFactor(arangodb::RestoreFeature::Options const& options,
|
|
arangodb::velocypack::Slice const& slice, bool& isSatellite) {
|
|
uint64_t result = options.defaultReplicationFactor;
|
|
isSatellite = false;
|
|
|
|
arangodb::velocypack::Slice s = slice.get("replicationFactor");
|
|
if (s.isInteger()) {
|
|
result = s.getNumericValue<uint64_t>();
|
|
} else if (s.isString()) {
|
|
if (s.copyString() == "satellite") {
|
|
isSatellite = true;
|
|
}
|
|
}
|
|
|
|
s = slice.get("name");
|
|
if (!s.isString()) {
|
|
// should not happen, but anyway, let's be safe here
|
|
return result;
|
|
}
|
|
|
|
if (!options.replicationFactor.empty()) {
|
|
std::string const name = s.copyString();
|
|
|
|
for (auto const& it : options.replicationFactor) {
|
|
auto parts = arangodb::basics::StringUtils::split(it, '=');
|
|
if (parts.size() == 1) {
|
|
// this is the default value, e.g. `--replicationFactor 2`
|
|
if (parts[0] == "satellite") {
|
|
isSatellite = true;
|
|
} else {
|
|
result = arangodb::basics::StringUtils::uint64(parts[0]);
|
|
}
|
|
}
|
|
|
|
// look if we have a more specific value, e.g. `--replicationFactor myCollection=3`
|
|
if (parts.size() != 2 || parts[0] != name) {
|
|
// somehow invalid or different collection
|
|
continue;
|
|
}
|
|
if (parts[1] == "satellite") {
|
|
isSatellite = true;
|
|
} else {
|
|
result = arangodb::basics::StringUtils::uint64(parts[1]);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// @brief return the target number of shards for the specified collection
|
|
uint64_t getNumberOfShards(arangodb::RestoreFeature::Options const& options,
|
|
arangodb::velocypack::Slice const& slice) {
|
|
uint64_t result = options.defaultNumberOfShards;
|
|
|
|
arangodb::velocypack::Slice s = slice.get("numberOfShards");
|
|
if (s.isInteger()) {
|
|
result = s.getNumericValue<uint64_t>();
|
|
}
|
|
|
|
s = slice.get("name");
|
|
if (!s.isString()) {
|
|
// should not happen, but anyway, let's be safe here
|
|
return result;
|
|
}
|
|
|
|
if (!options.numberOfShards.empty()) {
|
|
std::string const name = s.copyString();
|
|
|
|
for (auto const& it : options.numberOfShards) {
|
|
auto parts = arangodb::basics::StringUtils::split(it, '=');
|
|
if (parts.size() == 1) {
|
|
// this is the default value, e.g. `--numberOfShards 2`
|
|
result = arangodb::basics::StringUtils::uint64(parts[0]);
|
|
}
|
|
|
|
// look if we have a more specific value, e.g. `--numberOfShards myCollection=3`
|
|
if (parts.size() != 2 || parts[0] != name) {
|
|
// somehow invalid or different collection
|
|
continue;
|
|
}
|
|
result = arangodb::basics::StringUtils::uint64(parts[1]);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// @brief check whether HTTP response is valid, complete, and not an error
|
|
arangodb::Result checkHttpResponse(arangodb::httpclient::SimpleHttpClient& client,
|
|
std::unique_ptr<arangodb::httpclient::SimpleHttpResult>& response,
|
|
char const* requestAction,
|
|
std::string const& originalRequest) {
|
|
using arangodb::basics::StringUtils::itoa;
|
|
if (response == nullptr || !response->isComplete()) {
|
|
return {TRI_ERROR_INTERNAL,
|
|
"got invalid response from server: '" + client.getErrorMessage() +
|
|
"' while executing " + requestAction +
|
|
(originalRequest.empty() ? "" : " with this payload: '" + originalRequest + "'")};
|
|
}
|
|
if (response->wasHttpError()) {
|
|
int errorNum = TRI_ERROR_INTERNAL;
|
|
std::string errorMsg = response->getHttpReturnMessage();
|
|
std::shared_ptr<arangodb::velocypack::Builder> bodyBuilder(response->getBodyVelocyPack());
|
|
arangodb::velocypack::Slice error = bodyBuilder->slice();
|
|
if (!error.isNone() && error.hasKey(arangodb::StaticStrings::ErrorMessage)) {
|
|
errorNum = error.get(arangodb::StaticStrings::ErrorNum).getNumericValue<int>();
|
|
errorMsg = error.get(arangodb::StaticStrings::ErrorMessage).copyString();
|
|
}
|
|
return {errorNum,
|
|
"got invalid response from server: HTTP " + itoa(response->getHttpReturnCode()) +
|
|
": '" + errorMsg + "' while executing '" + requestAction +
|
|
(originalRequest.empty() ? "" : "' with this payload: '" + originalRequest + "'")};
|
|
}
|
|
return {TRI_ERROR_NO_ERROR};
|
|
}
|
|
|
|
/// @brief Sort collections for proper recreation order
|
|
bool sortCollectionsForCreation(VPackBuilder const& l, VPackBuilder const& r) {
|
|
VPackSlice const left = l.slice().get("parameters");
|
|
VPackSlice const right = r.slice().get("parameters");
|
|
|
|
std::string leftName =
|
|
arangodb::basics::VelocyPackHelper::getStringValue(left, "name", "");
|
|
std::string rightName =
|
|
arangodb::basics::VelocyPackHelper::getStringValue(right, "name", "");
|
|
|
|
// First we sort by shard distribution.
|
|
// We first have to create the collections which have no dependencies.
|
|
// NB: Dependency graph has depth at most 1, no need to manage complex DAG
|
|
VPackSlice leftDist = left.get("distributeShardsLike");
|
|
VPackSlice rightDist = right.get("distributeShardsLike");
|
|
if (leftDist.isNone() && rightDist.isString() && rightDist.copyString() == leftName) {
|
|
return true;
|
|
}
|
|
if (rightDist.isNone() && leftDist.isString() && leftDist.copyString() == rightName) {
|
|
return false;
|
|
}
|
|
|
|
// Next we sort by collection type so that vertex collections are recreated
|
|
// before edge, etc.
|
|
int leftType =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<int>(left, "type", 0);
|
|
int rightType =
|
|
arangodb::basics::VelocyPackHelper::getNumericValue<int>(right, "type", 0);
|
|
if (leftType != rightType) {
|
|
return leftType < rightType;
|
|
}
|
|
|
|
// Finally, sort by name so we have stable, reproducible results
|
|
// Sort system collections first
|
|
if (!leftName.empty() && leftName[0] == '_' && !rightName.empty() && rightName[0] != '_') {
|
|
return true;
|
|
}
|
|
if (!leftName.empty() && leftName[0] != '_' && !rightName.empty() && rightName[0] == '_') {
|
|
return false;
|
|
}
|
|
return strcasecmp(leftName.c_str(), rightName.c_str()) < 0;
|
|
}
|
|
|
|
void makeAttributesUnique(arangodb::velocypack::Builder& builder,
|
|
arangodb::velocypack::Slice slice) {
|
|
if (slice.isObject()) {
|
|
std::unordered_set<arangodb::velocypack::StringRef> keys;
|
|
|
|
builder.openObject();
|
|
|
|
auto it = arangodb::velocypack::ObjectIterator(slice, true);
|
|
|
|
while (it.valid()) {
|
|
if (!keys.emplace(it.key().stringRef()).second) {
|
|
// duplicate key
|
|
it.next();
|
|
continue;
|
|
}
|
|
|
|
// process attributes recursively
|
|
builder.add(it.key());
|
|
makeAttributesUnique(builder, it.value());
|
|
it.next();
|
|
}
|
|
|
|
builder.close();
|
|
} else if (slice.isArray()) {
|
|
builder.openArray();
|
|
|
|
auto it = arangodb::velocypack::ArrayIterator(slice);
|
|
|
|
while (it.valid()) {
|
|
// recurse into array
|
|
makeAttributesUnique(builder, it.value());
|
|
it.next();
|
|
}
|
|
|
|
builder.close();
|
|
} else {
|
|
// non-compound value!
|
|
builder.add(slice);
|
|
}
|
|
}
|
|
|
|
/// @brief Create the database to restore to, connecting manually
|
|
arangodb::Result tryCreateDatabase(std::string const& name) {
|
|
using arangodb::httpclient::SimpleHttpClient;
|
|
using arangodb::httpclient::SimpleHttpResult;
|
|
using arangodb::rest::RequestType;
|
|
using arangodb::rest::ResponseCode;
|
|
using arangodb::velocypack::ArrayBuilder;
|
|
using arangodb::velocypack::ObjectBuilder;
|
|
|
|
// get client feature for configuration info
|
|
auto client =
|
|
arangodb::application_features::ApplicationServer::getFeature<arangodb::ClientFeature>(
|
|
"Client");
|
|
TRI_ASSERT(nullptr != client);
|
|
|
|
// get httpclient by hand rather than using manager, to bypass any built-in
|
|
// checks which will fail if the database doesn't exist
|
|
std::unique_ptr<SimpleHttpClient> httpClient;
|
|
try {
|
|
httpClient = client->createHttpClient();
|
|
httpClient->params().setLocationRewriter(static_cast<void*>(client),
|
|
arangodb::ClientManager::rewriteLocation);
|
|
httpClient->params().setUserNamePassword("/", client->username(), client->password());
|
|
} catch (...) {
|
|
LOG_TOPIC("832ef", FATAL, arangodb::Logger::RESTORE)
|
|
<< "cannot create server connection, giving up!";
|
|
return {TRI_ERROR_SIMPLE_CLIENT_COULD_NOT_CONNECT};
|
|
}
|
|
|
|
VPackBuilder builder;
|
|
{
|
|
ObjectBuilder object(&builder);
|
|
object->add("name", VPackValue(name));
|
|
{
|
|
ArrayBuilder users(&builder, "users");
|
|
{
|
|
ObjectBuilder user(&builder);
|
|
user->add("username", VPackValue(client->username()));
|
|
user->add("passwd", VPackValue(client->password()));
|
|
}
|
|
}
|
|
}
|
|
std::string const body = builder.slice().toJson();
|
|
|
|
std::unique_ptr<SimpleHttpResult> response(
|
|
httpClient->request(RequestType::POST, "/_api/database", body.c_str(), body.size()));
|
|
if (response == nullptr || !response->isComplete()) {
|
|
return {TRI_ERROR_INTERNAL};
|
|
}
|
|
|
|
auto returnCode = response->getHttpReturnCode();
|
|
if (returnCode == static_cast<int>(ResponseCode::OK) ||
|
|
returnCode == static_cast<int>(ResponseCode::CREATED)) {
|
|
// all ok
|
|
return {TRI_ERROR_NO_ERROR};
|
|
}
|
|
if (returnCode == static_cast<int>(ResponseCode::UNAUTHORIZED) ||
|
|
returnCode == static_cast<int>(ResponseCode::FORBIDDEN)) {
|
|
// invalid authorization
|
|
auto res = ::checkHttpResponse(*httpClient, response, "creating database", body);
|
|
return {TRI_ERROR_FORBIDDEN, res.errorMessage()};
|
|
}
|
|
|
|
// any other error
|
|
auto res = ::checkHttpResponse(*httpClient, response, "creating database", body);
|
|
return {TRI_ERROR_INTERNAL, res.errorMessage()};
|
|
}
|
|
|
|
/// @check If directory is encrypted, check that key option is specified
|
|
void checkEncryption(arangodb::ManagedDirectory& directory) {
|
|
using arangodb::Logger;
|
|
if (directory.isEncrypted()) {
|
|
#ifdef USE_ENTERPRISE
|
|
if (!directory.encryptionFeature()->keyOptionSpecified()) {
|
|
LOG_TOPIC("cc58e", WARN, Logger::RESTORE)
|
|
<< "the dump data seems to be encrypted with " << directory.encryptionType()
|
|
<< ", but no key information was specified to decrypt the dump";
|
|
LOG_TOPIC("1a5a4", WARN, Logger::RESTORE)
|
|
<< "it is recommended to specify either "
|
|
"`--encryption.keyfile` or `--encryption.key-generator` "
|
|
"when invoking arangorestore with an encrypted dump";
|
|
} else {
|
|
LOG_TOPIC("4f9cf", INFO, Logger::RESTORE)
|
|
<< "# using encryption type " << directory.encryptionType()
|
|
<< " for reading dump";
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/// @brief Check the database name specified by the dump file
|
|
arangodb::Result checkDumpDatabase(arangodb::ManagedDirectory& directory, bool forceSameDatabase) {
|
|
using arangodb::ClientFeature;
|
|
using arangodb::Logger;
|
|
using arangodb::application_features::ApplicationServer;
|
|
|
|
std::string databaseName;
|
|
try {
|
|
VPackBuilder fileContentBuilder = directory.vpackFromJsonFile("dump.json");
|
|
VPackSlice const fileContent = fileContentBuilder.slice();
|
|
databaseName = fileContent.get("database").copyString();
|
|
} catch (...) {
|
|
// the above may go wrong for several reasons
|
|
}
|
|
|
|
if (!databaseName.empty()) {
|
|
LOG_TOPIC("abeb4", INFO, Logger::RESTORE)
|
|
<< "Database name in source dump is '" << databaseName << "'";
|
|
}
|
|
|
|
ClientFeature* client =
|
|
ApplicationServer::getFeature<ClientFeature>("Client");
|
|
if (forceSameDatabase && databaseName != client->databaseName()) {
|
|
return {TRI_ERROR_BAD_PARAMETER,
|
|
std::string("database name in dump.json ('") + databaseName +
|
|
"') does not match specified database name ('" +
|
|
client->databaseName() + "')"};
|
|
}
|
|
|
|
return {};
|
|
}
|
|
|
|
/// @brief Send the command to recreate a collection
|
|
arangodb::Result sendRestoreCollection(arangodb::httpclient::SimpleHttpClient& httpClient,
|
|
arangodb::RestoreFeature::Options const& options,
|
|
VPackSlice const& slice, std::string const& name) {
|
|
using arangodb::Logger;
|
|
using arangodb::httpclient::SimpleHttpResult;
|
|
|
|
std::string url =
|
|
"/_api/replication/restore-collection"
|
|
"?overwrite=" +
|
|
std::string(options.overwrite ? "true" : "false") +
|
|
"&force=" + std::string(options.force ? "true" : "false") +
|
|
"&ignoreDistributeShardsLikeErrors=" +
|
|
std::string(options.ignoreDistributeShardsLikeErrors ? "true" : "false");
|
|
|
|
VPackSlice const parameters = slice.get("parameters");
|
|
|
|
// build cluster options using command-line parameter values
|
|
VPackBuilder newOptions;
|
|
newOptions.openObject();
|
|
bool isSatellite = false;
|
|
uint64_t replicationFactor = getReplicationFactor(options, parameters, isSatellite);
|
|
if (isSatellite) {
|
|
newOptions.add("replicationFactor", VPackValue("satellite"));
|
|
} else {
|
|
newOptions.add("replicationFactor", VPackValue(replicationFactor));
|
|
}
|
|
newOptions.add("numberOfShards", VPackValue(getNumberOfShards(options, parameters)));
|
|
newOptions.close();
|
|
|
|
VPackBuilder b;
|
|
b.openObject();
|
|
b.add("indexes", slice.get("indexes"));
|
|
b.add(VPackValue("parameters"));
|
|
VPackCollection::merge(b, parameters, newOptions.slice(), true, false);
|
|
b.close();
|
|
|
|
std::string const body = b.slice().toJson();
|
|
|
|
std::unique_ptr<SimpleHttpResult> response(
|
|
httpClient.request(arangodb::rest::RequestType::PUT, url, body.c_str(), body.size()));
|
|
return ::checkHttpResponse(httpClient, response, "restoring collection", body);
|
|
}
|
|
|
|
/// @brief Send command to restore a collection's indexes
|
|
arangodb::Result sendRestoreIndexes(arangodb::httpclient::SimpleHttpClient& httpClient,
|
|
arangodb::RestoreFeature::Options const& options,
|
|
VPackSlice const& slice) {
|
|
using arangodb::httpclient::SimpleHttpResult;
|
|
|
|
std::string const url = "/_api/replication/restore-indexes?force=" +
|
|
std::string(options.force ? "true" : "false");
|
|
std::string const body = slice.toJson();
|
|
|
|
std::unique_ptr<SimpleHttpResult> response(
|
|
httpClient.request(arangodb::rest::RequestType::PUT, url, body.c_str(), body.size()));
|
|
return ::checkHttpResponse(httpClient, response, "restoring indexes", body);
|
|
}
|
|
|
|
/// @brief Send a command to restore actual data
|
|
arangodb::Result sendRestoreData(arangodb::httpclient::SimpleHttpClient& httpClient,
|
|
arangodb::RestoreFeature::Options const& options,
|
|
std::string const& cname, char const* buffer,
|
|
size_t bufferSize) {
|
|
using arangodb::basics::StringUtils::urlEncode;
|
|
using arangodb::httpclient::SimpleHttpResult;
|
|
|
|
// the following two structs are needed for cleaning up duplicate attributes
|
|
arangodb::velocypack::Builder result;
|
|
arangodb::basics::StringBuffer cleaned;
|
|
|
|
if (options.cleanupDuplicateAttributes) {
|
|
int res = cleaned.reserve(bufferSize);
|
|
|
|
if (res != TRI_ERROR_NO_ERROR) {
|
|
// out of memory
|
|
THROW_ARANGO_EXCEPTION(res);
|
|
}
|
|
|
|
arangodb::velocypack::Options options = arangodb::velocypack::Options::Defaults;
|
|
// do *not* check duplicate attributes here (because that would throw)
|
|
options.checkAttributeUniqueness = false;
|
|
arangodb::velocypack::Builder builder(&options);
|
|
|
|
// instead, we need to manually check for duplicate attributes...
|
|
char const* p = buffer;
|
|
char const* e = p + bufferSize;
|
|
|
|
while (p < e) {
|
|
while (p < e && (*p == ' ' || *p == '\r' || *p == '\n' || *p == '\t')) {
|
|
++p;
|
|
}
|
|
|
|
// detect line ending
|
|
size_t length;
|
|
char const* nl = static_cast<char const*>(memchr(p, '\n', e - p));
|
|
if (nl == nullptr) {
|
|
length = e - p;
|
|
} else {
|
|
length = nl - p;
|
|
}
|
|
|
|
builder.clear();
|
|
try {
|
|
VPackParser parser(builder, builder.options);
|
|
parser.parse(p, length);
|
|
} catch (arangodb::velocypack::Exception const& ex) {
|
|
return {TRI_ERROR_HTTP_CORRUPTED_JSON, ex.what()};
|
|
} catch (std::bad_alloc const&) {
|
|
return {TRI_ERROR_OUT_OF_MEMORY};
|
|
} catch (std::exception const& ex) {
|
|
return {TRI_ERROR_INTERNAL, ex.what()};
|
|
}
|
|
|
|
// recursively clean up duplicate attributes in the document
|
|
result.clear();
|
|
makeAttributesUnique(result, builder.slice());
|
|
|
|
std::string const json = result.toJson();
|
|
cleaned.appendText(json.data(), json.size());
|
|
|
|
if (nl == nullptr) {
|
|
// done
|
|
break;
|
|
}
|
|
|
|
cleaned.appendChar('\n');
|
|
// advance behind newline
|
|
p = nl + 1;
|
|
}
|
|
|
|
// now point to the cleaned up data
|
|
buffer = cleaned.c_str();
|
|
bufferSize = cleaned.length();
|
|
}
|
|
|
|
std::string const url = "/_api/replication/restore-data?collection=" + urlEncode(cname) +
|
|
"&force=" + (options.force ? "true" : "false");
|
|
|
|
std::unique_ptr<SimpleHttpResult> response(
|
|
httpClient.request(arangodb::rest::RequestType::PUT, url, buffer, bufferSize));
|
|
return ::checkHttpResponse(httpClient, response, "restoring data", "");
|
|
}
|
|
|
|
/// @brief Recreate a collection given its description
|
|
arangodb::Result recreateCollection(arangodb::httpclient::SimpleHttpClient& httpClient,
|
|
arangodb::RestoreFeature::JobData& jobData) {
|
|
using arangodb::Logger;
|
|
|
|
arangodb::Result result;
|
|
VPackSlice const parameters = jobData.collection.get("parameters");
|
|
std::string const cname =
|
|
arangodb::basics::VelocyPackHelper::getStringValue(parameters, "name", "");
|
|
int type = arangodb::basics::VelocyPackHelper::getNumericValue<int>(parameters,
|
|
"type", 2);
|
|
std::string const collectionType(type == 2 ? "document" : "edge");
|
|
|
|
// re-create collection
|
|
if (jobData.options.progress) {
|
|
if (jobData.options.overwrite) {
|
|
LOG_TOPIC("9b414", INFO, Logger::RESTORE)
|
|
<< "# Re-creating " << collectionType << " collection '" << cname << "'...";
|
|
} else {
|
|
LOG_TOPIC("a9123", INFO, Logger::RESTORE)
|
|
<< "# Creating " << collectionType << " collection '" << cname << "'...";
|
|
}
|
|
}
|
|
|
|
result = ::sendRestoreCollection(httpClient, jobData.options, jobData.collection, cname);
|
|
|
|
if (result.fail()) {
|
|
if (jobData.options.force) {
|
|
LOG_TOPIC("c6658", WARN, Logger::RESTORE)
|
|
<< "Error while creating " << collectionType << " collection '"
|
|
<< cname << "': " << result.errorMessage();
|
|
result.reset();
|
|
} else {
|
|
LOG_TOPIC("e8e7a", ERR, Logger::RESTORE)
|
|
<< "Error while creating " << collectionType << " collection '"
|
|
<< cname << "': " << result.errorMessage();
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/// @brief Restore a collection's indexes given its description
|
|
arangodb::Result restoreIndexes(arangodb::httpclient::SimpleHttpClient& httpClient,
|
|
arangodb::RestoreFeature::JobData& jobData) {
|
|
using arangodb::Logger;
|
|
|
|
arangodb::Result result;
|
|
VPackSlice const parameters = jobData.collection.get("parameters");
|
|
VPackSlice const indexes = jobData.collection.get("indexes");
|
|
// re-create indexes
|
|
if (indexes.length() > 0) {
|
|
// we actually have indexes
|
|
if (jobData.options.progress) {
|
|
std::string const cname =
|
|
arangodb::basics::VelocyPackHelper::getStringValue(parameters, "name",
|
|
"");
|
|
LOG_TOPIC("d88c6", INFO, Logger::RESTORE)
|
|
<< "# Creating indexes for collection '" << cname << "'...";
|
|
}
|
|
|
|
result = ::sendRestoreIndexes(httpClient, jobData.options, jobData.collection);
|
|
|
|
if (result.fail()) {
|
|
std::string const cname =
|
|
arangodb::basics::VelocyPackHelper::getStringValue(parameters, "name",
|
|
"");
|
|
if (jobData.options.force) {
|
|
LOG_TOPIC("db937", WARN, Logger::RESTORE)
|
|
<< "Error while creating indexes for collection '" << cname
|
|
<< "': " << result.errorMessage();
|
|
result.reset();
|
|
} else {
|
|
LOG_TOPIC("d5d06", ERR, Logger::RESTORE)
|
|
<< "Error while creating indexes for collection '" << cname
|
|
<< "': " << result.errorMessage();
|
|
}
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// @brief Restore the data for a given collection
|
|
arangodb::Result restoreData(arangodb::httpclient::SimpleHttpClient& httpClient,
|
|
arangodb::RestoreFeature::JobData& jobData) {
|
|
using arangodb::Logger;
|
|
using arangodb::basics::StringBuffer;
|
|
|
|
arangodb::Result result;
|
|
StringBuffer buffer(true);
|
|
|
|
VPackSlice const parameters = jobData.collection.get("parameters");
|
|
std::string const cname =
|
|
arangodb::basics::VelocyPackHelper::getStringValue(parameters, "name", "");
|
|
int type = arangodb::basics::VelocyPackHelper::getNumericValue<int>(parameters,
|
|
"type", 2);
|
|
std::string const collectionType(type == 2 ? "document" : "edge");
|
|
|
|
// import data. check if we have a datafile
|
|
// ... there are 4 possible names
|
|
auto datafile = jobData.directory.readableFile(
|
|
cname + "_" + arangodb::rest::SslInterface::sslMD5(cname) + ".data.json");
|
|
if (!datafile || datafile->status().fail()) {
|
|
datafile = jobData.directory.readableFile(
|
|
cname + "_" + arangodb::rest::SslInterface::sslMD5(cname) +
|
|
".data.json.gz");
|
|
}
|
|
if (!datafile || datafile->status().fail()) {
|
|
datafile = jobData.directory.readableFile(cname + ".data.json.gz");
|
|
}
|
|
if (!datafile || datafile->status().fail()) {
|
|
datafile = jobData.directory.readableFile(cname + ".data.json");
|
|
}
|
|
if (!datafile || datafile->status().fail()) {
|
|
result = {TRI_ERROR_CANNOT_READ_FILE,
|
|
"could not open data file for collection '" + cname + "'"};
|
|
return result;
|
|
}
|
|
|
|
int64_t const fileSize = TRI_SizeFile(datafile->path().c_str());
|
|
|
|
if (jobData.options.progress) {
|
|
LOG_TOPIC("94913", INFO, Logger::RESTORE)
|
|
<< "# Loading data into " << collectionType << " collection '" << cname
|
|
<< "', data size: " << fileSize << " byte(s)";
|
|
}
|
|
|
|
int64_t numReadForThisCollection = 0;
|
|
int64_t numReadSinceLastReport = 0;
|
|
|
|
bool const isGzip =
|
|
(0 == datafile->path().substr(datafile->path().size() - 3).compare(".gz"));
|
|
|
|
buffer.clear();
|
|
while (true) {
|
|
if (buffer.reserve(16384) != TRI_ERROR_NO_ERROR) {
|
|
result = {TRI_ERROR_OUT_OF_MEMORY, "out of memory"};
|
|
return result;
|
|
}
|
|
|
|
ssize_t numRead = datafile->read(buffer.end(), 16384);
|
|
if (datafile->status().fail()) { // error while reading
|
|
result = datafile->status();
|
|
return result;
|
|
}
|
|
// we read something
|
|
buffer.increaseLength(numRead);
|
|
jobData.stats.totalRead += static_cast<uint64_t>(numRead);
|
|
numReadForThisCollection += numRead;
|
|
numReadSinceLastReport += numRead;
|
|
|
|
if (buffer.length() < jobData.options.chunkSize && numRead > 0) {
|
|
continue; // still continue reading
|
|
}
|
|
|
|
// do we have a buffer?
|
|
if (buffer.length() > 0) {
|
|
// look for the last \n in the buffer
|
|
char* found = (char*)memrchr((const void*)buffer.begin(), '\n', buffer.length());
|
|
size_t length;
|
|
|
|
if (found == nullptr) { // no \n in buffer...
|
|
if (numRead == 0) {
|
|
// we're at the end of the file, so send the complete buffer anyway
|
|
length = buffer.length();
|
|
} else {
|
|
continue; // don't have a complete line yet, read more
|
|
}
|
|
} else {
|
|
length = found - buffer.begin(); // found a \n somewhere; break at line
|
|
}
|
|
|
|
jobData.stats.totalBatches++;
|
|
result = ::sendRestoreData(httpClient, jobData.options, cname, buffer.begin(), length);
|
|
jobData.stats.totalSent += length;
|
|
|
|
if (result.fail()) {
|
|
if (jobData.options.force) {
|
|
LOG_TOPIC("a595a", WARN, Logger::RESTORE)
|
|
<< "Error while restoring data into collection '" << cname
|
|
<< "': " << result.errorMessage();
|
|
result.reset();
|
|
continue;
|
|
} else {
|
|
LOG_TOPIC("a89bf", ERR, Logger::RESTORE)
|
|
<< "Error while restoring data into collection '" << cname
|
|
<< "': " << result.errorMessage();
|
|
}
|
|
return result;
|
|
}
|
|
|
|
buffer.erase_front(length);
|
|
|
|
if (jobData.options.progress && fileSize > 0 &&
|
|
numReadSinceLastReport > 1024 * 1024 * 8) {
|
|
// report every 8MB of transferred data
|
|
// currently do not have unzipped size for .gz files
|
|
std::stringstream percentage, ofFilesize;
|
|
if (isGzip) {
|
|
ofFilesize << "";
|
|
percentage << "";
|
|
} else {
|
|
ofFilesize << " of " << fileSize;
|
|
percentage << " ("
|
|
<< int(100. * double(numReadForThisCollection) / double(fileSize))
|
|
<< " %)";
|
|
} // else
|
|
|
|
LOG_TOPIC("69a73", INFO, Logger::RESTORE)
|
|
<< "# Still loading data into " << collectionType << " collection '"
|
|
<< cname << "', " << numReadForThisCollection << ofFilesize.str()
|
|
<< " byte(s) restored" << percentage.str();
|
|
numReadSinceLastReport = 0;
|
|
}
|
|
}
|
|
|
|
if (numRead == 0) { // EOF
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// @brief Restore the data for a given view
|
|
arangodb::Result restoreView(arangodb::httpclient::SimpleHttpClient& httpClient,
|
|
arangodb::RestoreFeature::Options const& options,
|
|
VPackSlice const& viewDefinition) {
|
|
using arangodb::httpclient::SimpleHttpResult;
|
|
|
|
std::string url = "/_api/replication/restore-view?overwrite=" +
|
|
std::string(options.overwrite ? "true" : "false") +
|
|
"&force=" + std::string(options.force ? "true" : "false");
|
|
|
|
std::string const body = viewDefinition.toJson();
|
|
std::unique_ptr<SimpleHttpResult> response(
|
|
httpClient.request(arangodb::rest::RequestType::PUT, url, body.c_str(), body.size()));
|
|
return ::checkHttpResponse(httpClient, response, "restoring view", body);
|
|
}
|
|
|
|
arangodb::Result triggerFoxxHeal(arangodb::httpclient::SimpleHttpClient& httpClient) {
|
|
using arangodb::Logger;
|
|
using arangodb::httpclient::SimpleHttpResult;
|
|
const std::string FoxxHealUrl = "/_api/foxx/_local/heal";
|
|
|
|
std::string body = "";
|
|
|
|
std::unique_ptr<SimpleHttpResult> response(
|
|
httpClient.request(arangodb::rest::RequestType::POST, FoxxHealUrl,
|
|
body.c_str(), body.length()));
|
|
return ::checkHttpResponse(httpClient, response, "trigger self heal", body);
|
|
}
|
|
|
|
arangodb::Result processInputDirectory(
|
|
arangodb::httpclient::SimpleHttpClient& httpClient,
|
|
arangodb::ClientTaskQueue<arangodb::RestoreFeature::JobData>& jobQueue,
|
|
arangodb::RestoreFeature& feature, arangodb::RestoreFeature::Options const& options,
|
|
arangodb::ManagedDirectory& directory, arangodb::RestoreFeature::Stats& stats) {
|
|
using arangodb::Logger;
|
|
using arangodb::Result;
|
|
using arangodb::StaticStrings;
|
|
using arangodb::basics::VelocyPackHelper;
|
|
using arangodb::basics::FileUtils::listFiles;
|
|
|
|
// create a lookup table for collections
|
|
std::set<std::string> restrictColls, restrictViews;
|
|
restrictColls.insert(options.collections.begin(), options.collections.end());
|
|
restrictViews.insert(options.views.begin(), options.views.end());
|
|
|
|
try {
|
|
std::vector<std::string> const files = listFiles(directory.path());
|
|
std::string const collectionSuffix = std::string(".structure.json");
|
|
std::string const viewsSuffix = std::string(".view.json");
|
|
std::vector<VPackBuilder> collections, views;
|
|
|
|
// Step 1 determine all collections to process
|
|
{
|
|
// loop over all files in InputDirectory, and look for all structure.json
|
|
// files
|
|
for (std::string const& file : files) {
|
|
size_t const nameLength = file.size();
|
|
|
|
if (nameLength > viewsSuffix.size() &&
|
|
file.substr(file.size() - viewsSuffix.size()) == viewsSuffix) {
|
|
if (!restrictColls.empty() && restrictViews.empty()) {
|
|
continue; // skip view if not specifically included
|
|
}
|
|
|
|
VPackBuilder contentBuilder = directory.vpackFromJsonFile(file);
|
|
VPackSlice const fileContent = contentBuilder.slice();
|
|
if (!fileContent.isObject()) {
|
|
return {TRI_ERROR_INTERNAL, "could not read view file '" +
|
|
directory.pathToFile(file) + "'"};
|
|
}
|
|
|
|
if (!restrictViews.empty()) {
|
|
std::string const name =
|
|
VelocyPackHelper::getStringValue(fileContent, StaticStrings::DataSourceName,
|
|
"");
|
|
if (restrictViews.find(name) == restrictViews.end()) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
views.emplace_back(std::move(contentBuilder));
|
|
continue;
|
|
}
|
|
|
|
if (nameLength <= collectionSuffix.size() ||
|
|
file.substr(file.size() - collectionSuffix.size()) != collectionSuffix) {
|
|
// some other file
|
|
continue;
|
|
}
|
|
|
|
// found a structure.json file
|
|
std::string name = file.substr(0, file.size() - collectionSuffix.size());
|
|
if (!options.includeSystemCollections && name[0] == '_') {
|
|
continue;
|
|
}
|
|
|
|
VPackBuilder fileContentBuilder = directory.vpackFromJsonFile(file);
|
|
VPackSlice const fileContent = fileContentBuilder.slice();
|
|
if (!fileContent.isObject()) {
|
|
return {TRI_ERROR_INTERNAL,
|
|
"could not read collection structure file '" +
|
|
directory.pathToFile(file) + "'"};
|
|
}
|
|
|
|
VPackSlice const parameters = fileContent.get("parameters");
|
|
VPackSlice const indexes = fileContent.get("indexes");
|
|
if (!parameters.isObject() || !indexes.isArray()) {
|
|
return {TRI_ERROR_INTERNAL,
|
|
"could not read collection structure file '" +
|
|
directory.pathToFile(file) + "'"};
|
|
}
|
|
std::string const cname =
|
|
VelocyPackHelper::getStringValue(parameters,
|
|
StaticStrings::DataSourceName, "");
|
|
bool overwriteName = false;
|
|
if (cname != name &&
|
|
name != (cname + "_" + arangodb::rest::SslInterface::sslMD5(cname))) {
|
|
// file has a different name than found in structure file
|
|
if (options.importStructure) {
|
|
// we cannot go on if there is a mismatch
|
|
return {TRI_ERROR_INTERNAL,
|
|
"collection name mismatch in collection structure file '" +
|
|
directory.pathToFile(file) + "' (offending value: '" +
|
|
cname + "')"};
|
|
} else {
|
|
// we can patch the name in our array and go on
|
|
LOG_TOPIC("8e7b7", INFO, Logger::RESTORE)
|
|
<< "ignoring collection name mismatch in collection "
|
|
"structure file '" +
|
|
directory.pathToFile(file) + "' (offending value: '" +
|
|
cname + "')";
|
|
|
|
overwriteName = true;
|
|
}
|
|
}
|
|
|
|
if (!restrictColls.empty() && restrictColls.find(cname) == restrictColls.end()) {
|
|
continue; // collection name not in list
|
|
}
|
|
|
|
if (overwriteName) {
|
|
// TODO: we have a JSON object with sub-object "parameters" with
|
|
// attribute "name". we only want to replace this. how?
|
|
} else {
|
|
collections.emplace_back(std::move(fileContentBuilder));
|
|
}
|
|
}
|
|
}
|
|
|
|
// order collections so that prototypes for distributeShardsLike come first
|
|
std::sort(collections.begin(), collections.end(), ::sortCollectionsForCreation);
|
|
|
|
std::unique_ptr<arangodb::RestoreFeature::JobData> usersData;
|
|
std::unique_ptr<arangodb::RestoreFeature::JobData> analyzersData;
|
|
std::vector<std::unique_ptr<arangodb::RestoreFeature::JobData>> jobs;
|
|
jobs.reserve(collections.size());
|
|
|
|
bool didModifyFoxxCollection = false;
|
|
// Step 3: create collections
|
|
for (VPackBuilder const& b : collections) {
|
|
VPackSlice const collection = b.slice();
|
|
|
|
LOG_TOPIC("c601a", DEBUG, Logger::RESTORE)
|
|
<< "# Processing collection: " << collection.toJson();
|
|
|
|
VPackSlice params = collection.get("parameters");
|
|
VPackSlice name = VPackSlice::emptyStringSlice();
|
|
if (params.isObject()) {
|
|
name = params.get("name");
|
|
// Only these two are relevant for FOXX.
|
|
if (name.isString() && (name.isEqualString("_apps") ||
|
|
name.isEqualString("_appbundles"))) {
|
|
didModifyFoxxCollection = true;
|
|
}
|
|
}
|
|
|
|
auto jobData =
|
|
std::make_unique<arangodb::RestoreFeature::JobData>(directory, feature, options,
|
|
stats, collection);
|
|
|
|
// take care of collection creation now, serially
|
|
if (options.importStructure) {
|
|
Result result = ::recreateCollection(httpClient, *jobData);
|
|
if (result.fail()) {
|
|
return result;
|
|
}
|
|
}
|
|
|
|
if (name.isString() && name.stringRef() == "_users") {
|
|
// special treatment for _users collection - this must be the very last,
|
|
// and run isolated from all previous data loading operations - the
|
|
// reason is that loading into the users collection may change the
|
|
// credentials for the current arangorestore connection!
|
|
usersData = std::move(jobData);
|
|
} else if (name.isString() && name.stringRef() == StaticStrings::AnalyzersCollection) {
|
|
// special treatment for _analyzers collection - this must be the very first
|
|
stats.totalCollections++;
|
|
analyzersData = std::move(jobData);
|
|
} else {
|
|
stats.totalCollections++;
|
|
jobs.push_back(std::move(jobData));
|
|
}
|
|
}
|
|
|
|
// Step 4: restore data from _analyzers collection
|
|
if (analyzersData) {
|
|
// restore analyzers
|
|
if (!jobQueue.queueJob(std::move(analyzersData))) {
|
|
return Result(TRI_ERROR_OUT_OF_MEMORY, "unable to queue restore job");
|
|
}
|
|
|
|
jobQueue.waitForIdle();
|
|
}
|
|
|
|
// Step 5: create arangosearch views
|
|
if (options.importStructure && !views.empty()) {
|
|
LOG_TOPIC("f723c", INFO, Logger::RESTORE) << "# Creating views...";
|
|
|
|
for (auto const& viewDefinition : views) {
|
|
LOG_TOPIC("c608d", DEBUG, Logger::RESTORE)
|
|
<< "# Creating view: " << viewDefinition.toJson();
|
|
|
|
auto res = ::restoreView(httpClient, options, viewDefinition.slice());
|
|
|
|
if (!res.ok()) {
|
|
return res;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 6: fire up data transfer
|
|
for (auto& job : jobs) {
|
|
if (!jobQueue.queueJob(std::move(job))) {
|
|
return Result(TRI_ERROR_OUT_OF_MEMORY, "unable to queue restore job");
|
|
}
|
|
}
|
|
|
|
// wait for all jobs to finish, then check for errors
|
|
if (options.progress) {
|
|
LOG_TOPIC("6d69f", INFO, Logger::RESTORE)
|
|
<< "# Dispatched " << stats.totalCollections << " job(s), using "
|
|
<< options.threadCount << " worker(s)";
|
|
|
|
double start = TRI_microtime();
|
|
|
|
while (true) {
|
|
if (jobQueue.isQueueEmpty() && jobQueue.allWorkersIdle()) {
|
|
// done
|
|
break;
|
|
}
|
|
|
|
double now = TRI_microtime();
|
|
if (now - start >= 5.0) {
|
|
// returns #queued jobs, #workers total, #workers busy
|
|
auto queueStats = jobQueue.statistics();
|
|
// periodically report current status, but do not spam user
|
|
LOG_TOPIC("75e65", INFO, Logger::RESTORE)
|
|
<< "# Current restore progress: restored " << stats.restoredCollections
|
|
<< " of " << stats.totalCollections << " collection(s), read "
|
|
<< stats.totalRead << " byte(s) from datafiles, "
|
|
<< "sent " << stats.totalBatches << " data batch(es) of "
|
|
<< stats.totalSent << " byte(s) total size"
|
|
<< ", queued jobs: " << std::get<0>(queueStats)
|
|
<< ", workers: " << std::get<1>(queueStats);
|
|
start = now;
|
|
}
|
|
|
|
// don't sleep for too long, as we want to quickly terminate
|
|
// when the gets empty
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(200));
|
|
}
|
|
}
|
|
|
|
jobQueue.waitForIdle();
|
|
jobs.clear();
|
|
|
|
Result firstError = feature.getFirstError();
|
|
if (firstError.fail()) {
|
|
return firstError;
|
|
}
|
|
|
|
if (didModifyFoxxCollection) {
|
|
// if we get here we need to trigger foxx heal
|
|
Result res = ::triggerFoxxHeal(httpClient);
|
|
if (res.fail()) {
|
|
LOG_TOPIC("47cd7", WARN, Logger::RESTORE)
|
|
<< "Reloading of Foxx services failed: " << res.errorMessage() << "- in the cluster Foxx services will be available eventually, On single servers send "
|
|
<< "a POST to '/_api/foxx/_local/heal' on the current database, "
|
|
<< "with an empty body. Please note that any of this is not "
|
|
"necessary if the Foxx APIs "
|
|
<< "have been turned off on the server using the option "
|
|
"`--foxx.api false`.";
|
|
}
|
|
}
|
|
|
|
// Last step: reload data into _users. Note: this can change the credentials
|
|
// of the arangorestore user itself
|
|
if (usersData) {
|
|
TRI_ASSERT(jobs.empty());
|
|
if (!jobQueue.queueJob(std::move(usersData))) {
|
|
return Result(TRI_ERROR_OUT_OF_MEMORY, "unable to queue restore job");
|
|
}
|
|
jobQueue.waitForIdle();
|
|
jobs.clear();
|
|
|
|
Result firstError = feature.getFirstError();
|
|
if (firstError.fail()) {
|
|
return firstError;
|
|
}
|
|
}
|
|
|
|
} catch (std::exception const& ex) {
|
|
return {TRI_ERROR_INTERNAL,
|
|
std::string(
|
|
"arangorestore terminated because of an unhandled exception: ")
|
|
.append(ex.what())};
|
|
} catch (...) {
|
|
return {TRI_ERROR_OUT_OF_MEMORY, "arangorestore out of memory"};
|
|
}
|
|
return {TRI_ERROR_NO_ERROR};
|
|
}
|
|
|
|
/// @brief process a single job from the queue
|
|
arangodb::Result processJob(arangodb::httpclient::SimpleHttpClient& httpClient,
|
|
arangodb::RestoreFeature::JobData& jobData) {
|
|
arangodb::Result result;
|
|
|
|
VPackSlice const parameters = jobData.collection.get("parameters");
|
|
std::string const cname =
|
|
arangodb::basics::VelocyPackHelper::getStringValue(parameters, "name", "");
|
|
|
|
if (cname == "_users") {
|
|
// special case: never restore data in the _users collection first as it could
|
|
// potentially change user permissions. In that case index creation will fail.
|
|
result = ::restoreIndexes(httpClient, jobData);
|
|
if (result.fail()) {
|
|
return result;
|
|
}
|
|
result = ::restoreData(httpClient, jobData);
|
|
if (result.fail()) {
|
|
return result;
|
|
}
|
|
} else {
|
|
if (jobData.options.indexesFirst && jobData.options.importStructure) {
|
|
// restore indexes first if we are using rocksdb
|
|
result = ::restoreIndexes(httpClient, jobData);
|
|
if (result.fail()) {
|
|
return result;
|
|
}
|
|
}
|
|
if (jobData.options.importData) {
|
|
result = ::restoreData(httpClient, jobData);
|
|
if (result.fail()) {
|
|
return result;
|
|
}
|
|
}
|
|
if (!jobData.options.indexesFirst && jobData.options.importStructure) {
|
|
// restore indexes second if we are using mmfiles
|
|
result = ::restoreIndexes(httpClient, jobData);
|
|
if (result.fail()) {
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
|
|
++jobData.stats.restoredCollections;
|
|
|
|
if (jobData.options.progress) {
|
|
VPackSlice const parameters = jobData.collection.get("parameters");
|
|
std::string const cname =
|
|
arangodb::basics::VelocyPackHelper::getStringValue(parameters, "name",
|
|
"");
|
|
int type = arangodb::basics::VelocyPackHelper::getNumericValue<int>(parameters,
|
|
"type", 2);
|
|
std::string const collectionType(type == 2 ? "document" : "edge");
|
|
LOG_TOPIC("6ae09", INFO, arangodb::Logger::RESTORE)
|
|
<< "# Successfully restored " << collectionType << " collection '"
|
|
<< cname << "'";
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// @brief handle the result of a single job
|
|
void handleJobResult(std::unique_ptr<arangodb::RestoreFeature::JobData>&& jobData,
|
|
arangodb::Result const& result) {
|
|
if (result.fail()) {
|
|
jobData->feature.reportError(result);
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
namespace arangodb {
|
|
|
|
RestoreFeature::JobData::JobData(ManagedDirectory& d, RestoreFeature& f,
|
|
RestoreFeature::Options const& o,
|
|
RestoreFeature::Stats& s, VPackSlice const& c)
|
|
: directory{d}, feature{f}, options{o}, stats{s}, collection{c} {}
|
|
|
|
RestoreFeature::RestoreFeature(application_features::ApplicationServer& server, int& exitCode)
|
|
: ApplicationFeature(server, RestoreFeature::featureName()),
|
|
_clientManager{Logger::RESTORE},
|
|
_clientTaskQueue{::processJob, ::handleJobResult},
|
|
_exitCode{exitCode} {
|
|
requiresElevatedPrivileges(false);
|
|
setOptional(false);
|
|
startsAfter("BasicsPhase");
|
|
|
|
using arangodb::basics::FileUtils::buildFilename;
|
|
using arangodb::basics::FileUtils::currentDirectory;
|
|
_options.inputPath = buildFilename(currentDirectory().result(), "dump");
|
|
}
|
|
|
|
void RestoreFeature::collectOptions(std::shared_ptr<options::ProgramOptions> options) {
|
|
using arangodb::options::BooleanParameter;
|
|
using arangodb::options::StringParameter;
|
|
using arangodb::options::UInt32Parameter;
|
|
using arangodb::options::UInt64Parameter;
|
|
using arangodb::options::VectorParameter;
|
|
|
|
options->addOption(
|
|
"--collection",
|
|
"restrict to collection name (can be specified multiple times)",
|
|
new VectorParameter<StringParameter>(&_options.collections));
|
|
|
|
options->addOption("--view",
|
|
"restrict to view name (can be specified multiple times)",
|
|
new VectorParameter<StringParameter>(&_options.views));
|
|
|
|
options->addObsoleteOption("--recycle-ids",
|
|
"collection ids are now handled automatically", false);
|
|
|
|
options->addOption("--batch-size",
|
|
"maximum size for individual data batches (in bytes)",
|
|
new UInt64Parameter(&_options.chunkSize));
|
|
|
|
options
|
|
->addOption("--threads",
|
|
"maximum number of collections to process in parallel",
|
|
new UInt32Parameter(&_options.threadCount))
|
|
.setIntroducedIn(30400);
|
|
|
|
options->addOption("--include-system-collections",
|
|
"include system collections",
|
|
new BooleanParameter(&_options.includeSystemCollections));
|
|
|
|
options->addOption("--create-database",
|
|
"create the target database if it does not exist",
|
|
new BooleanParameter(&_options.createDatabase));
|
|
|
|
options->addOption(
|
|
"--force-same-database",
|
|
"force usage of the same database name as in the source dump.json file",
|
|
new BooleanParameter(&_options.forceSameDatabase));
|
|
|
|
options
|
|
->addOption("--all-databases", "restore data to all databases",
|
|
new BooleanParameter(&_options.allDatabases))
|
|
.setIntroducedIn(30500);
|
|
|
|
options->addOption("--input-directory", "input directory",
|
|
new StringParameter(&_options.inputPath));
|
|
|
|
options
|
|
->addOption(
|
|
"--cleanup-duplicate-attributes",
|
|
"clean up duplicate attributes (use first specified value) in input "
|
|
"documents instead of making the restore operation fail",
|
|
new BooleanParameter(&_options.cleanupDuplicateAttributes),
|
|
arangodb::options::makeFlags(arangodb::options::Flags::Hidden))
|
|
.setIntroducedIn(30322)
|
|
.setIntroducedIn(30402);
|
|
|
|
options->addOption("--import-data", "import data into collection",
|
|
new BooleanParameter(&_options.importData));
|
|
|
|
options->addOption("--create-collection", "create collection structure",
|
|
new BooleanParameter(&_options.importStructure));
|
|
|
|
options->addOption("--progress", "show progress",
|
|
new BooleanParameter(&_options.progress));
|
|
|
|
options->addOption("--overwrite", "overwrite collections if they exist",
|
|
new BooleanParameter(&_options.overwrite));
|
|
|
|
options
|
|
->addOption(
|
|
"--number-of-shards",
|
|
"override value for numberOfShards (can be specified multiple times, "
|
|
"e.g. --numberOfShards 2 --numberOfShards myCollection=3)",
|
|
new VectorParameter<StringParameter>(&_options.numberOfShards))
|
|
.setIntroducedIn(30322)
|
|
.setIntroducedIn(30402);
|
|
|
|
options
|
|
->addOption("--replication-factor",
|
|
"override value for replicationFactor (can be specified "
|
|
"multiple times, e.g. --replicationFactor 2 "
|
|
"--replicationFactor myCollection=3)",
|
|
new VectorParameter<StringParameter>(&_options.replicationFactor))
|
|
.setIntroducedIn(30322)
|
|
.setIntroducedIn(30402);
|
|
|
|
options->addOption(
|
|
"--ignore-distribute-shards-like-errors",
|
|
"continue restore even if sharding prototype collection is missing",
|
|
new BooleanParameter(&_options.ignoreDistributeShardsLikeErrors));
|
|
|
|
options->addOption(
|
|
"--force", "continue restore even in the face of some server-side errors",
|
|
new BooleanParameter(&_options.force));
|
|
|
|
// deprecated options
|
|
options
|
|
->addOption("--default-number-of-shards",
|
|
"default value for numberOfShards if not specified in dump",
|
|
new UInt64Parameter(&_options.defaultNumberOfShards),
|
|
arangodb::options::makeFlags(arangodb::options::Flags::Hidden))
|
|
.setDeprecatedIn(30322)
|
|
.setDeprecatedIn(30402);
|
|
|
|
options
|
|
->addOption(
|
|
"--default-replication-factor",
|
|
"default value for replicationFactor if not specified in dump",
|
|
new UInt64Parameter(&_options.defaultReplicationFactor),
|
|
arangodb::options::makeFlags(arangodb::options::Flags::Hidden))
|
|
.setDeprecatedIn(30322)
|
|
.setDeprecatedIn(30402);
|
|
}
|
|
|
|
void RestoreFeature::validateOptions(std::shared_ptr<options::ProgramOptions> options) {
|
|
using arangodb::basics::StringUtils::join;
|
|
|
|
auto const& positionals = options->processingResult()._positionals;
|
|
size_t n = positionals.size();
|
|
|
|
if (1 == n) {
|
|
_options.inputPath = positionals[0];
|
|
} else if (1 < n) {
|
|
LOG_TOPIC("d249a", FATAL, arangodb::Logger::RESTORE)
|
|
<< "expecting at most one directory, got " + join(positionals, ", ");
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
|
|
if (_options.allDatabases) {
|
|
if (options->processingResult().touched("server.database")) {
|
|
LOG_TOPIC("94d22", FATAL, arangodb::Logger::RESTORE)
|
|
<< "cannot use --server.database and --all-databases at the same "
|
|
"time";
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
|
|
if (_options.forceSameDatabase) {
|
|
LOG_TOPIC("fd66a", FATAL, arangodb::Logger::RESTORE)
|
|
<< "cannot use --force-same-database and --all-databases at the same "
|
|
"time";
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
}
|
|
|
|
// use a minimum value for batches
|
|
if (_options.chunkSize < 1024 * 128) {
|
|
_options.chunkSize = 1024 * 128;
|
|
}
|
|
|
|
auto clamped = boost::algorithm::clamp(_options.threadCount, uint32_t(1),
|
|
uint32_t(4 * TRI_numberProcessors()));
|
|
if (_options.threadCount != clamped) {
|
|
LOG_TOPIC("53570", WARN, Logger::RESTORE) << "capping --threads value to " << clamped;
|
|
_options.threadCount = clamped;
|
|
}
|
|
|
|
// validate shards and replication factor
|
|
if (_options.defaultNumberOfShards == 0) {
|
|
LOG_TOPIC("248ee", FATAL, arangodb::Logger::RESTORE)
|
|
<< "invalid value for `--default-number-of-shards`, expecting at least "
|
|
"1";
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
|
|
if (_options.defaultReplicationFactor == 0) {
|
|
LOG_TOPIC("daf22", FATAL, arangodb::Logger::RESTORE)
|
|
<< "invalid value for `--default-replication-factor, expecting at "
|
|
"least 1";
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
|
|
for (auto& it : _options.numberOfShards) {
|
|
auto parts = basics::StringUtils::split(it, '=');
|
|
if (parts.size() == 1 && basics::StringUtils::int64(parts[0]) > 0) {
|
|
// valid
|
|
continue;
|
|
} else if (parts.size() == 2 && basics::StringUtils::int64(parts[1]) > 0) {
|
|
// valid
|
|
continue;
|
|
}
|
|
// invalid!
|
|
LOG_TOPIC("1951e", FATAL, arangodb::Logger::RESTORE)
|
|
<< "got invalid value '" << it << "' for `--number-of-shards";
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
|
|
for (auto& it : _options.replicationFactor) {
|
|
auto parts = basics::StringUtils::split(it, '=');
|
|
if (parts.size() == 1) {
|
|
if (parts[0] == "satellite" || basics::StringUtils::int64(parts[0]) > 0) {
|
|
// valid
|
|
continue;
|
|
}
|
|
} else if (parts.size() == 2) {
|
|
if (parts[1] == "satellite" || basics::StringUtils::int64(parts[1]) > 0) {
|
|
// valid
|
|
continue;
|
|
}
|
|
}
|
|
// invalid!
|
|
LOG_TOPIC("d038e", FATAL, arangodb::Logger::RESTORE)
|
|
<< "got invalid value '" << it << "' for `--replication-factor";
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
}
|
|
|
|
void RestoreFeature::prepare() {
|
|
if (!_options.inputPath.empty() && _options.inputPath.back() == TRI_DIR_SEPARATOR_CHAR) {
|
|
// trim trailing slash from path because it may cause problems on ...
|
|
// Windows
|
|
TRI_ASSERT(_options.inputPath.size() > 0);
|
|
_options.inputPath.pop_back();
|
|
}
|
|
|
|
if (!_options.importStructure && !_options.importData) {
|
|
LOG_TOPIC("1281f", FATAL, arangodb::Logger::RESTORE)
|
|
<< "Error: must specify either --create-collection or --import-data";
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
}
|
|
|
|
void RestoreFeature::start() {
|
|
using arangodb::httpclient::SimpleHttpClient;
|
|
|
|
double const start = TRI_microtime();
|
|
|
|
// set up the output directory, not much else
|
|
_directory = std::make_unique<ManagedDirectory>(_options.inputPath, false, false);
|
|
if (_directory->status().fail()) {
|
|
switch (_directory->status().errorNumber()) {
|
|
case TRI_ERROR_FILE_NOT_FOUND:
|
|
LOG_TOPIC("3246c", FATAL, arangodb::Logger::RESTORE)
|
|
<< "input directory '" << _options.inputPath << "' does not exist";
|
|
break;
|
|
default:
|
|
LOG_TOPIC("535b3", FATAL, arangodb::Logger::RESTORE)
|
|
<< _directory->status().errorMessage();
|
|
break;
|
|
}
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
|
|
ClientFeature* client = application_features::ApplicationServer::getFeature<ClientFeature>(
|
|
"Client");
|
|
|
|
_exitCode = EXIT_SUCCESS;
|
|
|
|
// enumerate all databases present in the dump directory (in case of
|
|
// --all-databases=true, or use just the flat files in case of --all-databases=false)
|
|
std::vector<std::string> databases;
|
|
if (_options.allDatabases) {
|
|
for (auto const& it : basics::FileUtils::listFiles(_options.inputPath)) {
|
|
std::string path = basics::FileUtils::buildFilename(_options.inputPath, it);
|
|
if (basics::FileUtils::isDirectory(path)) {
|
|
databases.push_back(it);
|
|
}
|
|
}
|
|
|
|
// sort by name, with _system last
|
|
// this is necessary because in the system database there is the _users collection,
|
|
// and we have to process users last of all. otherwise we risk updating the
|
|
// credentials for the user which users the current arangorestore connection, and
|
|
// this will make subsequent arangorestore calls to the server fail with "unauthorized"
|
|
std::sort(databases.begin(), databases.end(),
|
|
[](std::string const& lhs, std::string const& rhs) {
|
|
if (lhs == "_system" && rhs != "_system") {
|
|
return false;
|
|
} else if (rhs == "_system" && lhs != "_system") {
|
|
return true;
|
|
}
|
|
return lhs < rhs;
|
|
});
|
|
if (databases.empty()) {
|
|
LOG_TOPIC("b41d9", FATAL, Logger::RESTORE)
|
|
<< "Unable to find per-database subdirectories in input directory '"
|
|
<< _options.inputPath << "'. No data will be restored!";
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
} else {
|
|
databases.push_back(client->databaseName());
|
|
}
|
|
|
|
std::unique_ptr<SimpleHttpClient> httpClient;
|
|
|
|
// final result
|
|
Result result;
|
|
|
|
result = _clientManager.getConnectedClient(httpClient, _options.force, true,
|
|
!_options.createDatabase, false);
|
|
if (result.is(TRI_ERROR_SIMPLE_CLIENT_COULD_NOT_CONNECT)) {
|
|
LOG_TOPIC("c23bf", FATAL, Logger::RESTORE)
|
|
<< "cannot create server connection, giving up!";
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
if (result.is(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND)) {
|
|
std::string dbName = client->databaseName();
|
|
if (_options.createDatabase) {
|
|
// database not found, but database creation requested
|
|
LOG_TOPIC("9b5a6", INFO, Logger::RESTORE) << "Creating database '" << dbName << "'";
|
|
|
|
client->setDatabaseName("_system");
|
|
|
|
Result res = ::tryCreateDatabase(dbName);
|
|
if (res.fail()) {
|
|
LOG_TOPIC("b19db", FATAL, Logger::RESTORE)
|
|
<< "Could not create database '" << dbName
|
|
<< "': " << httpClient->getErrorMessage();
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
|
|
// restore old database name
|
|
client->setDatabaseName(dbName);
|
|
|
|
// re-check connection and version
|
|
result = _clientManager.getConnectedClient(httpClient, _options.force,
|
|
true, true, false);
|
|
} else {
|
|
LOG_TOPIC("ad95b", WARN, Logger::RESTORE)
|
|
<< "Database '"
|
|
<< dbName << "' does not exist on target endpoint. In order to create this database along with the restore, please use the --create-database option";
|
|
}
|
|
}
|
|
|
|
if (result.fail() && !_options.force) {
|
|
LOG_TOPIC("62a31", FATAL, Logger::RESTORE)
|
|
<< "cannot create server connection: " << result.errorMessage();
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
|
|
// check if we are in cluster or single-server mode
|
|
std::string role;
|
|
std::tie(result, role) = _clientManager.getArangoIsCluster(*httpClient);
|
|
_options.clusterMode = (role == "COORDINATOR");
|
|
if (result.fail()) {
|
|
LOG_TOPIC("b18ac", FATAL, arangodb::Logger::RESTORE)
|
|
<< "Error: could not detect ArangoDB instance type: " << result.errorMessage();
|
|
_exitCode = EXIT_FAILURE;
|
|
return;
|
|
}
|
|
|
|
if (role == "DBSERVER" || role == "PRIMARY") {
|
|
LOG_TOPIC("1fc99", WARN, arangodb::Logger::RESTORE)
|
|
<< "You connected to a DBServer node, but operations in a cluster "
|
|
"should be carried out via a Coordinator. This is an unsupported "
|
|
"operation!";
|
|
}
|
|
|
|
std::tie(result, _options.indexesFirst) =
|
|
_clientManager.getArangoIsUsingEngine(*httpClient, "rocksdb");
|
|
if (result.fail()) {
|
|
LOG_TOPIC("b90ec", FATAL, arangodb::Logger::RESTORE)
|
|
<< "Error while trying to determine server storage engine: "
|
|
<< result.errorMessage();
|
|
_exitCode = EXIT_FAILURE;
|
|
return;
|
|
}
|
|
|
|
if (_options.progress) {
|
|
LOG_TOPIC("05c30", INFO, Logger::RESTORE)
|
|
<< "Connected to ArangoDB '" << httpClient->getEndpointSpecification() << "'";
|
|
}
|
|
|
|
// set up threads and workers
|
|
_clientTaskQueue.spawnWorkers(_clientManager, _options.threadCount);
|
|
|
|
LOG_TOPIC("6bb3c", DEBUG, Logger::RESTORE)
|
|
<< "Using " << _options.threadCount << " worker thread(s)";
|
|
|
|
if (_options.allDatabases) {
|
|
LOG_TOPIC("7c10a", INFO, Logger::RESTORE)
|
|
<< "About to restore databases '"
|
|
<< basics::StringUtils::join(databases, "', '")
|
|
<< "' from dump directory '" << _options.inputPath << "'...";
|
|
}
|
|
|
|
for (auto const& db : databases) {
|
|
result.reset();
|
|
|
|
if (_options.allDatabases) {
|
|
// inject current database
|
|
client->setDatabaseName(db);
|
|
LOG_TOPIC("36075", INFO, Logger::RESTORE) << "Restoring database '" << db << "'";
|
|
_directory = std::make_unique<ManagedDirectory>(
|
|
basics::FileUtils::buildFilename(_options.inputPath, db), false, false);
|
|
|
|
result = _clientManager.getConnectedClient(httpClient, _options.force, false,
|
|
!_options.createDatabase, false);
|
|
if (result.is(TRI_ERROR_SIMPLE_CLIENT_COULD_NOT_CONNECT)) {
|
|
LOG_TOPIC("3e715", FATAL, Logger::RESTORE)
|
|
<< "cannot create server connection, giving up!";
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
|
|
if (result.is(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND)) {
|
|
if (_options.createDatabase) {
|
|
// database not found, but database creation requested
|
|
LOG_TOPIC("080f3", INFO, Logger::RESTORE) << "Creating database '" << db << "'";
|
|
|
|
client->setDatabaseName("_system");
|
|
|
|
result = ::tryCreateDatabase(db);
|
|
if (result.fail()) {
|
|
LOG_TOPIC("7a35f", ERR, Logger::RESTORE)
|
|
<< "Could not create database '" << db
|
|
<< "': " << httpClient->getErrorMessage();
|
|
break;
|
|
}
|
|
|
|
// restore old database name
|
|
client->setDatabaseName(db);
|
|
|
|
// re-check connection and version
|
|
result = _clientManager.getConnectedClient(httpClient, _options.force,
|
|
false, true, false);
|
|
} else {
|
|
LOG_TOPIC("be594", WARN, Logger::RESTORE)
|
|
<< "Database '"
|
|
<< db << "' does not exist on target endpoint. In order to create this database along with the restore, please use the --create-database option";
|
|
}
|
|
}
|
|
|
|
if (result.fail()) {
|
|
result.reset(result.errorNumber(),
|
|
std::string("cannot create server connection: ") + result.errorMessage());
|
|
|
|
if (!_options.force) {
|
|
break;
|
|
}
|
|
|
|
LOG_TOPIC("be86d", ERR, arangodb::Logger::RESTORE) << result.errorMessage();
|
|
// continue with next db
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// read encryption info
|
|
::checkEncryption(*_directory);
|
|
|
|
// read dump info
|
|
result = ::checkDumpDatabase(*_directory, _options.forceSameDatabase);
|
|
if (result.fail()) {
|
|
LOG_TOPIC("0cbdf", FATAL, arangodb::Logger::RESTORE) << result.errorMessage();
|
|
FATAL_ERROR_EXIT();
|
|
}
|
|
|
|
// run the actual restore
|
|
try {
|
|
result = ::processInputDirectory(*httpClient, _clientTaskQueue, *this,
|
|
_options, *_directory, _stats);
|
|
} catch (basics::Exception const& ex) {
|
|
LOG_TOPIC("52b22", ERR, arangodb::Logger::RESTORE)
|
|
<< "caught exception: " << ex.what();
|
|
result = {ex.code(), ex.what()};
|
|
} catch (std::exception const& ex) {
|
|
LOG_TOPIC("8f13f", ERR, arangodb::Logger::RESTORE)
|
|
<< "caught exception: " << ex.what();
|
|
result = {TRI_ERROR_INTERNAL, ex.what()};
|
|
} catch (...) {
|
|
LOG_TOPIC("a74e8", ERR, arangodb::Logger::RESTORE)
|
|
<< "caught unknown exception";
|
|
result = {TRI_ERROR_INTERNAL};
|
|
}
|
|
|
|
if (result.fail()) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (result.fail()) {
|
|
LOG_TOPIC("cb69f", ERR, arangodb::Logger::RESTORE) << result.errorMessage();
|
|
_exitCode = EXIT_FAILURE;
|
|
}
|
|
|
|
if (_options.progress) {
|
|
double totalTime = TRI_microtime() - start;
|
|
|
|
if (_options.importData) {
|
|
LOG_TOPIC("a66e1", INFO, Logger::RESTORE)
|
|
<< "Processed " << _stats.restoredCollections << " collection(s) in "
|
|
<< Logger::FIXED(totalTime, 6) << " s, "
|
|
<< "read " << _stats.totalRead << " byte(s) from datafiles, "
|
|
<< "sent " << _stats.totalBatches << " data batch(es) of "
|
|
<< _stats.totalSent << " byte(s) total size";
|
|
} else if (_options.importStructure) {
|
|
LOG_TOPIC("147ca", INFO, Logger::RESTORE)
|
|
<< "Processed " << _stats.restoredCollections << " collection(s) in "
|
|
<< Logger::FIXED(totalTime, 6) << " s";
|
|
}
|
|
}
|
|
}
|
|
|
|
std::string RestoreFeature::featureName() { return ::FeatureName; }
|
|
|
|
void RestoreFeature::reportError(Result const& error) {
|
|
try {
|
|
MUTEX_LOCKER(lock, _workerErrorLock);
|
|
_workerErrors.emplace(error);
|
|
_clientTaskQueue.clearQueue();
|
|
} catch (...) {
|
|
}
|
|
}
|
|
|
|
Result RestoreFeature::getFirstError() const {
|
|
{
|
|
MUTEX_LOCKER(lock, _workerErrorLock);
|
|
if (!_workerErrors.empty()) {
|
|
return _workerErrors.front();
|
|
}
|
|
}
|
|
return {TRI_ERROR_NO_ERROR};
|
|
}
|
|
|
|
} // namespace arangodb
|