1
0
Fork 0

arangodump & arangorestore all databases (#8394)

This commit is contained in:
Jan 2019-03-18 17:05:08 +01:00 committed by GitHub
parent c2042c86e9
commit 5d527168d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 352 additions and 74 deletions

View File

@ -1,6 +1,31 @@
devel
-----
* added option `--all-databases` to arangodump and arangorestore
When set to true, this makes arangodump dump all available databases
the current user has access to. The option `--all-databases` cannot be
used in combination with the option `--server.database`.
When `--all-databases` is used, arangodump will create a subdirectory
with the data of each dumped database. Databases will be dumped one
after the other. However, inside each database, the collections of the
database can be dumped in parallel using multiple threads.
For arangorestore, this makes it restore all databases from inside the
subdirectories of the specified dump directory. Using the option for
arangorestore only makes sense for dumps created with arangodump and
the `--all-databases` option. As for arangodump, arangorestore cannot
be invoked with the options `--all-databases` and `--server.database`
at the same time. Additionally, the option `--force-same-database` cannot
be used together with `--all-databases`.
If the to-be-restored databases do not exist on the target server, then
restoring data into them will fail unless the option `--create-database`
is also specified. Please note that in this case a database user must
be used that has access to the `_system` database, in order to create
the databases on restore.
* added "name" property for indices
If a name is not specified on index creation, one will be auto-generated.

View File

@ -192,6 +192,44 @@ The index types "hash", "skiplist" and "persistent" are just aliases of each oth
when using the RocksDB engine, so there is no need to offer all of them in parallel.
Client tools
------------
### arangodump
arangodump got an option `--all-databases` to make it dump all available databases
instead of just a single database specified via the option `--server.database`.
When set to true, this makes arangodump dump all available databases the current
user has access to. The option `--all-databases` cannot be used in combination with
the option `--server.database`.
When `--all-databases` is used, arangodump will create a subdirectory with the data
of each dumped database. Databases will be dumped one after the after. However,
inside each database, the collections of the database can be dumped in parallel
using multiple threads.
When dumping all databases, the consistency guarantees of arangodump are the same
as when dumping multiple single database individually, so the dump does not provide
cross-database consistency of the data.
### arangorestore
arangorestore got an option `--all-databases` to make it restore all databases from
inside the subdirectories of the specified dump directory, instead of just the
single database specified via the option `--server.database`.
Using the option for arangorestore only makes sense for dumps created with arangodump
and the `--all-databases` option. As for arangodump, arangorestore cannot be invoked
with the both options `--all-databases` and `--server.database` at the same time.
Additionally, the option `--force-same-database` cannot be used together with
`--all-databases`.
If the to-be-restored databases do not exist on the target server, then restoring data
into them will fail unless the option `--create-database` is also specified for
arangorestore. Please note that in this case a database user must be used that has
access to the `_system` database, in order to create the databases on restore.
Miscellaneous
-------------

View File

@ -113,6 +113,55 @@ arangodb::Result fileError(arangodb::ManagedDirectory::File* file, bool isWritab
return file->status();
}
/// @brief get a list of available databases to dump for the current user
std::pair<arangodb::Result, std::vector<std::string>> getDatabases(arangodb::httpclient::SimpleHttpClient& client) {
std::string const url = "/_api/database/user";
std::vector<std::string> databases;
std::unique_ptr<arangodb::httpclient::SimpleHttpResult> response(
client.request(arangodb::rest::RequestType::GET, url, "", 0));
auto check = ::checkHttpResponse(client, response);
if (check.fail()) {
LOG_TOPIC(ERR, arangodb::Logger::DUMP)
<< "An error occurred while trying to determine list of databases: " << check.errorMessage();
return {check, databases};
}
// extract vpack body from response
std::shared_ptr<VPackBuilder> parsedBody;
try {
parsedBody = response->getBodyVelocyPack();
} catch (...) {
return {::ErrorMalformedJsonResponse, databases};
}
VPackSlice resBody = parsedBody->slice();
if (resBody.isObject()) {
resBody = resBody.get("result");
}
if (!resBody.isArray()) {
return {{TRI_ERROR_FAILED, "expecting list of databases to be an array"}, databases};
}
for (auto const& it : arangodb::velocypack::ArrayIterator(resBody)) {
if (it.isString()) {
databases.push_back(it.copyString());
}
}
// sort by name, with _system first
std::sort(databases.begin(), databases.end(), [](std::string const& lhs, std::string const& rhs) {
if (lhs == "_system" && rhs != "_system") {
return true;
} else if (rhs == "_system" && lhs != "_system") {
return false;
}
return lhs < rhs;
});
return {{TRI_ERROR_NO_ERROR}, databases};
}
/// @brief start a batch via the replication API
std::pair<arangodb::Result, uint64_t> startBatch(arangodb::httpclient::SimpleHttpClient& client,
std::string const& DBserver) {
@ -560,6 +609,11 @@ void DumpFeature::collectOptions(std::shared_ptr<options::ProgramOptions> option
options->addOption("--dump-data", "dump collection data",
new BooleanParameter(&_options.dumpData));
options->addOption(
"--all-databases", "dump data of all databases",
new BooleanParameter(&_options.allDatabases))
.setIntroducedIn(30500);
options->addOption(
"--force", "continue dumping even in the face of some server-side errors",
@ -603,7 +657,7 @@ void DumpFeature::validateOptions(std::shared_ptr<options::ProgramOptions> optio
if (1 == n) {
_options.outputPath = positionals[0];
} else if (1 < n) {
LOG_TOPIC(FATAL, arangodb::Logger::FIXME)
LOG_TOPIC(FATAL, arangodb::Logger::DUMP)
<< "expecting at most one directory, got " +
arangodb::basics::StringUtils::join(positionals, ", ");
FATAL_ERROR_EXIT();
@ -616,10 +670,17 @@ void DumpFeature::validateOptions(std::shared_ptr<options::ProgramOptions> optio
boost::algorithm::clamp(_options.maxChunkSize, _options.initialChunkSize, ::MaxChunkSize);
if (_options.tickEnd < _options.tickStart) {
LOG_TOPIC(FATAL, arangodb::Logger::FIXME)
LOG_TOPIC(FATAL, arangodb::Logger::DUMP)
<< "invalid values for --tick-start or --tick-end";
FATAL_ERROR_EXIT();
}
if (options->processingResult().touched("server.database") &&
_options.allDatabases) {
LOG_TOPIC(FATAL, arangodb::Logger::DUMP)
<< "cannot use --server.database and --all-databases at the same time";
FATAL_ERROR_EXIT();
}
// trim trailing slash from path because it may cause problems on ...
// Windows
@ -632,7 +693,7 @@ void DumpFeature::validateOptions(std::shared_ptr<options::ProgramOptions> optio
boost::algorithm::clamp(_options.threadCount, 1,
4 * static_cast<uint32_t>(TRI_numberProcessors()));
if (_options.threadCount != clamped) {
LOG_TOPIC(WARN, Logger::FIXME) << "capping --threads value to " << clamped;
LOG_TOPIC(WARN, Logger::DUMP) << "capping --threads value to " << clamped;
_options.threadCount = clamped;
}
}
@ -1014,18 +1075,18 @@ void DumpFeature::start() {
if (_directory->status().fail()) {
switch (_directory->status().errorNumber()) {
case TRI_ERROR_FILE_EXISTS:
LOG_TOPIC(FATAL, Logger::FIXME) << "cannot write to output directory '"
<< _options.outputPath << "'";
LOG_TOPIC(FATAL, Logger::DUMP) << "cannot write to output directory '"
<< _options.outputPath << "'";
break;
case TRI_ERROR_CANNOT_OVERWRITE_FILE:
LOG_TOPIC(FATAL, Logger::FIXME)
LOG_TOPIC(FATAL, Logger::DUMP)
<< "output directory '" << _options.outputPath
<< "' already exists. use \"--overwrite true\" to "
"overwrite data in it";
break;
default:
LOG_TOPIC(ERR, Logger::FIXME) << _directory->status().errorMessage();
LOG_TOPIC(ERR, Logger::DUMP) << _directory->status().errorMessage();
break;
}
FATAL_ERROR_EXIT();
@ -1034,7 +1095,6 @@ void DumpFeature::start() {
// get database name to operate on
auto client = application_features::ApplicationServer::getFeature<ClientFeature>(
"Client");
auto dbName = client->databaseName();
// get a client to use in main thread
auto httpClient = _clientManager.getConnectedClient(_options.force, true, true);
@ -1057,7 +1117,7 @@ void DumpFeature::start() {
// special cluster-mode parameter checks
if (_options.clusterMode) {
if (_options.tickStart != 0 || _options.tickEnd != 0) {
LOG_TOPIC(ERR, Logger::FIXME)
LOG_TOPIC(ERR, Logger::DUMP)
<< "Error: cannot use tick-start or tick-end on a cluster";
FATAL_ERROR_EXIT();
}
@ -1069,33 +1129,68 @@ void DumpFeature::start() {
if (_options.progress) {
LOG_TOPIC(INFO, Logger::DUMP)
<< "Connected to ArangoDB '" << client->endpoint() << "', database: '"
<< dbName << "', username: '" << client->username() << "'";
<< client->databaseName() << "', username: '" << client->username() << "'";
LOG_TOPIC(INFO, Logger::DUMP)
<< "Writing dump to output directory '" << _directory->path()
<< "' with " << _options.threadCount << " thread(s)";
}
// final result
Result res;
try {
if (!_options.clusterMode) {
res = runDump(*httpClient, dbName);
} else {
res = runClusterDump(*httpClient, dbName);
}
} catch (basics::Exception const& ex) {
LOG_TOPIC(ERR, Logger::FIXME) << "caught exception: " << ex.what();
res = {ex.code(), ex.what()};
} catch (std::exception const& ex) {
LOG_TOPIC(ERR, Logger::FIXME) << "caught exception: " << ex.what();
res = {TRI_ERROR_INTERNAL, ex.what()};
} catch (...) {
LOG_TOPIC(ERR, Logger::FIXME) << "caught unknown exception";
res = {TRI_ERROR_INTERNAL};
std::vector<std::string> databases;
if (_options.allDatabases) {
// get list of available databases
std::tie(res, databases) = ::getDatabases(*httpClient);
} else {
// use just the single database that was specified
databases.push_back(client->databaseName());
}
if (res.ok()) {
for (auto const& db : databases) {
if (_options.allDatabases) {
// inject current database
LOG_TOPIC(INFO, Logger::DUMP) << "Dumping database '" << db << "'";
client->setDatabaseName(db);
httpClient = _clientManager.getConnectedClient(_options.force, false, true);
_directory = std::make_unique<ManagedDirectory>(arangodb::basics::FileUtils::buildFilename(_options.outputPath, db),
true, true);
if (_directory->status().fail()) {
res = _directory->status();
LOG_TOPIC(ERR, Logger::DUMP) << _directory->status().errorMessage();
break;
}
}
try {
if (!_options.clusterMode) {
res = runDump(*httpClient, db);
} else {
res = runClusterDump(*httpClient, db);
}
} catch (basics::Exception const& ex) {
LOG_TOPIC(ERR, Logger::DUMP) << "caught exception: " << ex.what();
res = {ex.code(), ex.what()};
} catch (std::exception const& ex) {
LOG_TOPIC(ERR, Logger::DUMP) << "caught exception: " << ex.what();
res = {TRI_ERROR_INTERNAL, ex.what()};
} catch (...) {
LOG_TOPIC(ERR, Logger::DUMP) << "caught unknown exception";
res = {TRI_ERROR_INTERNAL};
}
if (res.fail() && !_options.force) {
break;
}
}
}
if (res.fail()) {
LOG_TOPIC(ERR, Logger::FIXME) << "An error occurred: " + res.errorMessage();
LOG_TOPIC(ERR, Logger::DUMP) << "An error occurred: " + res.errorMessage();
_exitCode = EXIT_FAILURE;
}

View File

@ -72,6 +72,7 @@ class DumpFeature : public application_features::ApplicationFeature {
uint32_t threadCount{2};
uint64_t tickStart{0};
uint64_t tickEnd{0};
bool allDatabases{false};
bool clusterMode{false};
bool dumpData{true};
bool force{false};

View File

@ -765,7 +765,7 @@ arangodb::Result processInputDirectory(
restrictViews.insert(options.views.begin(), options.views.end());
try {
std::vector<std::string> const files = listFiles(options.inputPath);
std::vector<std::string> const files = listFiles(directory.path());
std::string const collectionSuffix = std::string(".structure.json");
std::string const viewsSuffix = std::string(".view.json");
std::vector<VPackBuilder> collections, views;
@ -1104,6 +1104,11 @@ void RestoreFeature::collectOptions(std::shared_ptr<options::ProgramOptions> opt
"--force-same-database",
"force usage of the same database name as in the source dump.json file",
new BooleanParameter(&_options.forceSameDatabase));
options->addOption(
"--all-databases", "restore data to all databases",
new BooleanParameter(&_options.allDatabases))
.setIntroducedIn(30500);
options->addOption("--input-directory", "input directory",
new StringParameter(&_options.inputPath));
@ -1189,6 +1194,20 @@ void RestoreFeature::validateOptions(std::shared_ptr<options::ProgramOptions> op
<< "expecting at most one directory, got " + join(positionals, ", ");
FATAL_ERROR_EXIT();
}
if (_options.allDatabases) {
if (options->processingResult().touched("server.database")) {
LOG_TOPIC(FATAL, arangodb::Logger::RESTORE)
<< "cannot use --server.database and --all-databases at the same time";
FATAL_ERROR_EXIT();
}
if (_options.forceSameDatabase) {
LOG_TOPIC(FATAL, arangodb::Logger::RESTORE)
<< "cannot use --force-same-database and --all-databases at the same time";
FATAL_ERROR_EXIT();
}
}
// use a minimum value for batches
if (_options.chunkSize < 1024 * 128) {
@ -1292,51 +1311,78 @@ void RestoreFeature::start() {
"Client");
_exitCode = EXIT_SUCCESS;
// enumerate all databases present in the dump directory (in case of
// --all-databases=true, or use just the flat files in case of --all-databases=false)
std::vector<std::string> databases;
if (_options.allDatabases) {
for (auto const& it : basics::FileUtils::listFiles(_options.inputPath)) {
std::string path = basics::FileUtils::buildFilename(_options.inputPath, it);
if (basics::FileUtils::isDirectory(path)) {
databases.push_back(it);
}
}
// sort by name, with _system first
std::sort(databases.begin(), databases.end(), [](std::string const& lhs, std::string const& rhs) {
if (lhs == "_system" && rhs != "_system") {
return true;
} else if (rhs == "_system" && lhs != "_system") {
return false;
}
return lhs < rhs;
});
if (databases.empty()) {
LOG_TOPIC(FATAL, Logger::RESTORE) << "Unable to find per-database subdirectories in input directory '" << _options.inputPath << "'. No data will be restored!";
FATAL_ERROR_EXIT();
}
} else {
databases.push_back(client->databaseName());
}
std::unique_ptr<SimpleHttpClient> httpClient;
Result result = _clientManager.getConnectedClient(httpClient, _options.force,
true, !_options.createDatabase);
// final result
Result result;
result = _clientManager.getConnectedClient(httpClient, _options.force,
true, !_options.createDatabase);
if (result.is(TRI_SIMPLE_CLIENT_COULD_NOT_CONNECT)) {
LOG_TOPIC(FATAL, Logger::RESTORE)
<< "cannot create server connection, giving up!";
FATAL_ERROR_EXIT();
} else if (result.is(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND) && _options.createDatabase) {
// database not found, but database creation requested
std::string dbName = client->databaseName();
LOG_TOPIC(INFO, Logger::RESTORE) << "Creating database '" << dbName << "'";
client->setDatabaseName("_system");
Result res = ::tryCreateDatabase(dbName);
if (res.fail()) {
LOG_TOPIC(ERR, Logger::RESTORE) << "Could not create database '" << dbName << "'";
LOG_TOPIC(FATAL, Logger::RESTORE) << httpClient->getErrorMessage();
FATAL_ERROR_EXIT();
}
// restore old database name
client->setDatabaseName(dbName);
// re-check connection and version
result = _clientManager.getConnectedClient(httpClient, _options.force, true, true);
}
if (result.is(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND)) {
std::string dbName = client->databaseName();
if (_options.createDatabase) {
// database not found, but database creation requested
LOG_TOPIC(INFO, Logger::RESTORE) << "Creating database '" << dbName << "'";
client->setDatabaseName("_system");
Result res = ::tryCreateDatabase(dbName);
if (res.fail()) {
LOG_TOPIC(FATAL, Logger::RESTORE) << "Could not create database '" << dbName << "': " << httpClient->getErrorMessage();
FATAL_ERROR_EXIT();
}
// restore old database name
client->setDatabaseName(dbName);
// re-check connection and version
result = _clientManager.getConnectedClient(httpClient, _options.force, true, true);
} else {
LOG_TOPIC(WARN, Logger::RESTORE) << "Database '" << dbName << "' does not exist on target endpoint. In order to create this database along with the restore, please use the --create-database option";
}
}
if (result.fail() && !_options.force) {
LOG_TOPIC(FATAL, Logger::RESTORE)
<< "cannot create server connection: " << result.errorMessage();
FATAL_ERROR_EXIT();
}
// read encryption info
::checkEncryption(*_directory);
// read dump info
result = ::checkDumpDatabase(*_directory, _options.forceSameDatabase);
if (result.fail()) {
LOG_TOPIC(FATAL, arangodb::Logger::RESTORE) << result.errorMessage();
FATAL_ERROR_EXIT();
}
// check if we are in cluster or single-server mode
std::string role;
std::tie(result, role) = _clientManager.getArangoIsCluster(*httpClient);
_options.clusterMode = (role == "COORDINATOR");
@ -1360,7 +1406,7 @@ void RestoreFeature::start() {
_exitCode = EXIT_FAILURE;
return;
}
if (_options.progress) {
LOG_TOPIC(INFO, Logger::RESTORE)
<< "Connected to ArangoDB '" << httpClient->getEndpointSpecification() << "'";
@ -1371,19 +1417,91 @@ void RestoreFeature::start() {
LOG_TOPIC(DEBUG, Logger::RESTORE) << "Using " << _options.threadCount << " worker thread(s)";
// run the actual restore
try {
result = ::processInputDirectory(*httpClient, _clientTaskQueue, *this,
_options, *_directory, _stats);
} catch (basics::Exception const& ex) {
LOG_TOPIC(ERR, arangodb::Logger::RESTORE) << "caught exception: " << ex.what();
result = {ex.code(), ex.what()};
} catch (std::exception const& ex) {
LOG_TOPIC(ERR, arangodb::Logger::RESTORE) << "caught exception: " << ex.what();
result = {TRI_ERROR_INTERNAL, ex.what()};
} catch (...) {
LOG_TOPIC(ERR, arangodb::Logger::RESTORE) << "caught unknown exception";
result = {TRI_ERROR_INTERNAL};
if (_options.allDatabases) {
LOG_TOPIC(INFO, Logger::RESTORE) << "About to restore databases '" << basics::StringUtils::join(databases, "', '") << "' from dump directory '" << _options.inputPath << "'...";
}
for (auto const& db : databases) {
result.reset();
if (_options.allDatabases) {
// inject current database
client->setDatabaseName(db);
LOG_TOPIC(INFO, Logger::RESTORE) << "Restoring database '" << db << "'";
_directory = std::make_unique<ManagedDirectory>(basics::FileUtils::buildFilename(_options.inputPath, db), false, false);
result = _clientManager.getConnectedClient(httpClient, _options.force,
false, !_options.createDatabase);
if (result.is(TRI_SIMPLE_CLIENT_COULD_NOT_CONNECT)) {
LOG_TOPIC(FATAL, Logger::RESTORE)
<< "cannot create server connection, giving up!";
FATAL_ERROR_EXIT();
}
if (result.is(TRI_ERROR_ARANGO_DATABASE_NOT_FOUND)) {
if (_options.createDatabase) {
// database not found, but database creation requested
LOG_TOPIC(INFO, Logger::RESTORE) << "Creating database '" << db << "'";
client->setDatabaseName("_system");
result = ::tryCreateDatabase(db);
if (result.fail()) {
LOG_TOPIC(ERR, Logger::RESTORE) << "Could not create database '" << db << "': " << httpClient->getErrorMessage();
break;
}
// restore old database name
client->setDatabaseName(db);
// re-check connection and version
result = _clientManager.getConnectedClient(httpClient, _options.force, false, true);
} else {
LOG_TOPIC(WARN, Logger::RESTORE) << "Database '" << db << "' does not exist on target endpoint. In order to create this database along with the restore, please use the --create-database option";
}
}
if (result.fail()) {
result.reset(result.errorNumber(), std::string("cannot create server connection: ") + result.errorMessage());
if (!_options.force) {
break;
}
LOG_TOPIC(ERR, arangodb::Logger::RESTORE) << result.errorMessage();
// continue with next db
continue;
}
}
// read encryption info
::checkEncryption(*_directory);
// read dump info
result = ::checkDumpDatabase(*_directory, _options.forceSameDatabase);
if (result.fail()) {
LOG_TOPIC(FATAL, arangodb::Logger::RESTORE) << result.errorMessage();
FATAL_ERROR_EXIT();
}
// run the actual restore
try {
result = ::processInputDirectory(*httpClient, _clientTaskQueue, *this,
_options, *_directory, _stats);
} catch (basics::Exception const& ex) {
LOG_TOPIC(ERR, arangodb::Logger::RESTORE) << "caught exception: " << ex.what();
result = {ex.code(), ex.what()};
} catch (std::exception const& ex) {
LOG_TOPIC(ERR, arangodb::Logger::RESTORE) << "caught exception: " << ex.what();
result = {TRI_ERROR_INTERNAL, ex.what()};
} catch (...) {
LOG_TOPIC(ERR, arangodb::Logger::RESTORE) << "caught unknown exception";
result = {TRI_ERROR_INTERNAL};
}
if (result.fail()) {
break;
}
}
if (result.fail()) {

View File

@ -83,6 +83,7 @@ class RestoreFeature final : public application_features::ApplicationFeature {
bool createDatabase{false};
bool force{false};
bool forceSameDatabase{false};
bool allDatabases{false};
bool ignoreDistributeShardsLikeErrors{false};
bool importData{true};
bool importStructure{true};