1
0
Fork 0

add 'create database' to 'import feature' (#4278)

This commit is contained in:
Jan Christoph Uhde 2018-01-15 15:15:31 +01:00 committed by Jan
parent 9c2ebb7bfc
commit bddadda0ee
9 changed files with 196 additions and 46 deletions

View File

@ -1,6 +1,8 @@
devel
-----
* issue #1190: added option `--create-database` for arangoimport
* issue #3504: added option `--force-same-database` for arangorestore
with this option set to true, it is possible to make any arangorestore attempt

View File

@ -90,7 +90,9 @@ be inhomogeneous.
Please note that by default, _arangoimport_ will import data into the specified
collection in the default database (*_system*). To specify a different database,
use the *--server.database* option when invoking _arangoimport_.
use the *--server.database* option when invoking _arangoimport_. If you want to
import into a nonexistent database you need to pass *--create-database true*.
Note *--create-database* defaults to *false*
The tool also supports parallel imports, with multiple threads. Using multiple
threads may provide a speedup, especially when using the RocksDB storage engine.

View File

@ -31,6 +31,7 @@
#include "Shell/ClientFeature.h"
#include "SimpleHttpClient/GeneralClientConnection.h"
#include "SimpleHttpClient/SimpleHttpClient.h"
#include "SimpleHttpClient/SimpleHttpResult.h"
#include <iostream>
#include <regex>
@ -52,6 +53,7 @@ ImportFeature::ImportFeature(application_features::ApplicationServer* server,
_fromCollectionPrefix(""),
_toCollectionPrefix(""),
_createCollection(false),
_createDatabase(false),
_createCollectionType("document"),
_typeImport("json"),
_overwrite(false),
@ -105,6 +107,10 @@ void ImportFeature::collectOptions(
"create collection if it does not yet exist",
new BooleanParameter(&_createCollection));
options->addOption("--create-database",
"create the target database if it does not exist",
new BooleanParameter(&_createDatabase));
options->addOption("--skip-lines",
"number of lines to skip for formats (csv and tsv only)",
new UInt64Parameter(&_rowsToSkip));
@ -269,73 +275,109 @@ void ImportFeature::start() {
}
}
std::unique_ptr<SimpleHttpClient> httpClient;
try {
httpClient = client->createHttpClient();
_httpClient = client->createHttpClient();
} catch (...) {
LOG_TOPIC(FATAL, arangodb::Logger::FIXME)
<< "cannot create server connection, giving up!";
FATAL_ERROR_EXIT();
}
httpClient->params().setLocationRewriter(static_cast<void*>(client),
_httpClient->params().setLocationRewriter(static_cast<void*>(client),
&rewriteLocation);
httpClient->params().setUserNamePassword("/", client->username(),
_httpClient->params().setUserNamePassword("/", client->username(),
client->password());
// must stay here in order to establish the connection
httpClient->getServerVersion();
if (!httpClient->isConnected()) {
int err = TRI_ERROR_NO_ERROR;
auto versionString = _httpClient->getServerVersion(&err);
auto dbName = client->databaseName();
bool createdDatabase = false;
auto successfulConnection = [&](){
std::cout << "Connected to ArangoDB '"
<< _httpClient->getEndpointSpecification() << "', version "
<< versionString << ", database: '"
<< client->databaseName() << "', username: '" << client->username()
<< "'" << std::endl;
std::cout << "----------------------------------------" << std::endl;
std::cout << "database: " << client->databaseName()
<< std::endl;
std::cout << "collection: " << _collectionName << std::endl;
if (!_fromCollectionPrefix.empty()) {
std::cout << "from collection prefix: " << _fromCollectionPrefix
<< std::endl;
}
if (!_toCollectionPrefix.empty()) {
std::cout << "to collection prefix: " << _toCollectionPrefix << std::endl;
}
std::cout << "create: " << (_createCollection ? "yes" : "no")
<< std::endl;
std::cout << "create database: " << (_createDatabase ? "yes" : "no")
<< std::endl;
std::cout << "source filename: " << _filename << std::endl;
std::cout << "file type: " << _typeImport << std::endl;
if (_typeImport == "csv") {
std::cout << "quote: " << _quote << std::endl;
}
if (_typeImport == "csv" || _typeImport == "tsv") {
std::cout << "separator: " << _separator << std::endl;
}
std::cout << "threads: " << _threadCount << std::endl;
std::cout << "connect timeout: " << client->connectionTimeout()
<< std::endl;
std::cout << "request timeout: " << client->requestTimeout()
<< std::endl;
std::cout << "----------------------------------------" << std::endl;
};
if (_createDatabase && err == TRI_ERROR_ARANGO_DATABASE_NOT_FOUND) {
// database not found, but database creation requested
std::cout << "Creating database '" << dbName << "'" << std::endl;
client->setDatabaseName("_system");
int res = tryCreateDatabase(client, dbName);
if (res != TRI_ERROR_NO_ERROR) {
LOG_TOPIC(ERR, arangodb::Logger::FIXME) << "Could not create database '"
<< dbName << "'";
LOG_TOPIC(FATAL, arangodb::Logger::FIXME)
<< _httpClient->getErrorMessage() << "'";
FATAL_ERROR_EXIT();
}
successfulConnection();
// restore old database name
client->setDatabaseName(dbName);
versionString = _httpClient->getServerVersion(nullptr);
createdDatabase = true;
}
if (!_httpClient->isConnected()) {
LOG_TOPIC(ERR, arangodb::Logger::FIXME)
<< "Could not connect to endpoint '" << client->endpoint()
<< "', database: '" << client->databaseName() << "', username: '"
<< client->username() << "'";
LOG_TOPIC(FATAL, arangodb::Logger::FIXME) << httpClient->getErrorMessage()
LOG_TOPIC(FATAL, arangodb::Logger::FIXME) << _httpClient->getErrorMessage()
<< "'";
FATAL_ERROR_EXIT();
}
// successfully connected
std::cout << "Connected to ArangoDB '"
<< httpClient->getEndpointSpecification() << "', version "
<< httpClient->getServerVersion() << ", database: '"
<< client->databaseName() << "', username: '" << client->username()
<< "'" << std::endl;
std::cout << "----------------------------------------" << std::endl;
std::cout << "database: " << client->databaseName()
<< std::endl;
std::cout << "collection: " << _collectionName << std::endl;
if (!_fromCollectionPrefix.empty()) {
std::cout << "from collection prefix: " << _fromCollectionPrefix
<< std::endl;
if(!createdDatabase) {
successfulConnection();
}
if (!_toCollectionPrefix.empty()) {
std::cout << "to collection prefix: " << _toCollectionPrefix << std::endl;
}
std::cout << "create: " << (_createCollection ? "yes" : "no")
<< std::endl;
std::cout << "source filename: " << _filename << std::endl;
std::cout << "file type: " << _typeImport << std::endl;
_httpClient->disconnect(); // we do not reuse this anymore
if (_typeImport == "csv") {
std::cout << "quote: " << _quote << std::endl;
}
if (_typeImport == "csv" || _typeImport == "tsv") {
std::cout << "separator: " << _separator << std::endl;
}
std::cout << "threads: " << _threadCount << std::endl;
std::cout << "connect timeout: " << client->connectionTimeout()
<< std::endl;
std::cout << "request timeout: " << client->requestTimeout()
<< std::endl;
std::cout << "----------------------------------------" << std::endl;
httpClient->disconnect(); // we do not reuse this anymore
SimpleHttpClientParams params = httpClient->params();
SimpleHttpClientParams params = _httpClient->params();
arangodb::import::ImportHelper ih(client, client->endpoint(), params,
_chunkSize, _threadCount);
@ -496,3 +538,46 @@ void ImportFeature::start() {
*_result = ret;
}
int ImportFeature::tryCreateDatabase(ClientFeature* client,
std::string const& name) {
VPackBuilder builder;
builder.openObject();
builder.add("name", VPackValue(name));
builder.add("users", VPackValue(VPackValueType::Array));
builder.openObject();
builder.add("username", VPackValue(client->username()));
builder.add("passwd", VPackValue(client->password()));
builder.close();
builder.close();
builder.close();
std::string const body = builder.slice().toJson();
std::unique_ptr<SimpleHttpResult> response(_httpClient->request(
rest::RequestType::POST, "/_api/database", body.c_str(), body.size()));
if (response == nullptr || !response->isComplete()) {
return TRI_ERROR_INTERNAL;
}
auto returnCode = response->getHttpReturnCode();
if (returnCode == static_cast<int>(rest::ResponseCode::OK) ||
returnCode == static_cast<int>(rest::ResponseCode::CREATED)) {
// all ok
return TRI_ERROR_NO_ERROR;
}
if (returnCode == static_cast<int>(rest::ResponseCode::UNAUTHORIZED) ||
returnCode == static_cast<int>(rest::ResponseCode::FORBIDDEN)) {
// invalid authorization
_httpClient->setErrorMessage(getHttpErrorMessage(response.get(), nullptr),
false);
return TRI_ERROR_FORBIDDEN;
}
// any other error
_httpClient->setErrorMessage(getHttpErrorMessage(response.get(), nullptr),
false);
return TRI_ERROR_INTERNAL;
}

View File

@ -25,6 +25,7 @@
#include "ApplicationFeatures/ApplicationFeature.h"
#include "V8Client/ArangoClientHelper.h"
#include "Shell/ClientFeature.h"
namespace arangodb {
namespace httpclient {
@ -45,6 +46,8 @@ class ImportFeature final : public application_features::ApplicationFeature,
void start() override;
private:
int tryCreateDatabase(ClientFeature*, std::string const& name);
std::string _filename;
bool _useBackslash;
bool _convert;
@ -54,6 +57,7 @@ class ImportFeature final : public application_features::ApplicationFeature,
std::string _fromCollectionPrefix;
std::string _toCollectionPrefix;
bool _createCollection;
bool _createDatabase;
std::string _createCollectionType;
std::string _typeImport;
std::vector<std::string> _translations;
@ -65,7 +69,6 @@ class ImportFeature final : public application_features::ApplicationFeature,
bool _ignoreMissing;
std::string _onDuplicateAction;
uint64_t _rowsToSkip;
int* _result;
};
}

View File

@ -552,6 +552,14 @@ function runArangoImport (options, instanceInfo, what) {
args['create-collection'] = what.create;
}
if (what.createDatabase !== undefined) {
args['create-database'] = what.createDatabase;
}
if (what.database !== undefined) {
args['server.database'] = what.database;
}
if (what.backslash !== undefined) {
args['backslash-escape'] = what.backslash;
}

View File

@ -174,6 +174,14 @@ const impTodos = [{
type: 'csv',
create: 'true',
removeAttribute: 'a'
}, {
id: 'createDB',
data: tu.makePathUnix('js/common/test-data/import/import-1.json'),
coll: 'UnitTestsImportJson1',
type: 'json',
create: 'true',
database: 'UnitTestImportCreateDatabase',
createDatabase: 'true'
}];
function importing (options) {

View File

@ -52,6 +52,10 @@
db._drop("UnitTestsImportIgnore");
db._drop("UnitTestsImportUniqueConstraints");
db._drop("UnitTestsImportRemoveAttribute");
db._drop("UnitTestsImportRemoveAttribute");
try {
db._dropDatabase("UnitTestImportCreateDatabase");
} catch(err) {}
db._create("UnitTestsImportJson1");
db._create("UnitTestsImportJson2");

View File

@ -51,6 +51,9 @@
db._drop("UnitTestsImportEdge");
db._drop("UnitTestsImportIgnore");
db._drop("UnitTestsImportUniqueConstraints");
try {
db._dropDatabase("UnitTestImportCreateDatabase");
} catch(err) {}
})();
return {

View File

@ -441,6 +441,41 @@ function importTestSuite () {
assertEqual(expected, actual);
},
////////////////////////////////////////////////////////////////////////////////
/// @brief test database creation
////////////////////////////////////////////////////////////////////////////////
testCreateDatabase : function () {
var db = require("@arangodb").db;
try {
var testdb = db._useDatabase("UnitTestImportCreateDatabase");
var expected = [ { "id": 1,
"one": 1,
"three": 3,
"two": 2 },
{ "a": 1234,
"b": "the quick fox",
"id": 2,
"jumped":
"over the fox",
"null": null },
{ "id": 3,
"not": "important",
"spacing": "is" },
{ " c ": "h\"'ihi",
"a": true,
"b": false,
"d": "",
"id": 4 },
{ "id": 5 } ];
var actual = getQueryResults("FOR i IN UnitTestsImportJson1 SORT i.id RETURN i");
assertEqual(expected, actual);
} finally {
db._useDatabase("_system");
}
},
// END OF TEST DEFINITIONS /////////////////////////////////////////////////////
};
}