mirror of https://gitee.com/bigwinds/arangodb
Remove attribute option for arangoimp (#3141)
* Adding a remove attribute option to arangoimp (only for csv) * Adding documentation * Fixed log statement * Update CHANGELOG
This commit is contained in:
parent
00650e6a3f
commit
885ea3dd5f
|
@ -1,6 +1,8 @@
|
||||||
devel
|
devel
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
* arangoimp now has a --remove-attribute option
|
||||||
|
|
||||||
* added V8 context lifetime control options
|
* added V8 context lifetime control options
|
||||||
`--javascript.v8-contexts-max-invocations` and `--javascript.v8-contexts-max-age`
|
`--javascript.v8-contexts-max-invocations` and `--javascript.v8-contexts-max-age`
|
||||||
|
|
||||||
|
|
|
@ -272,6 +272,24 @@ Other common cases are to rename columns in the input file to *_from* and *_to*:
|
||||||
The *translate* option can be specified multiple types. The source attribute name
|
The *translate* option can be specified multiple types. The source attribute name
|
||||||
and the target attribute must be separated with a *=*.
|
and the target attribute must be separated with a *=*.
|
||||||
|
|
||||||
|
|
||||||
|
### Ignoring Attributes
|
||||||
|
|
||||||
|
|
||||||
|
For the CSV and TSV input formats, certain attribute names can be ignored on imports.
|
||||||
|
In an ArangoDB cluster there are cases where this can come in handy,
|
||||||
|
when your documents already contain a `_key` attribute
|
||||||
|
and your collection has a sharding attribute other than `_key`: In the cluster this
|
||||||
|
configuration is not supported, because ArangoDB needs to guarantee the uniqueness of the `_key`
|
||||||
|
attribute in *all* shards of the collection.
|
||||||
|
|
||||||
|
> arangoimp --file "data.csv" --type csv --remove-attribute "_key"
|
||||||
|
|
||||||
|
The same thing would apply if your data contains an *_id* attribute:
|
||||||
|
|
||||||
|
> arangoimp --file "data.csv" --type csv --remove-attribute "_id"
|
||||||
|
|
||||||
|
|
||||||
### Importing into an Edge Collection
|
### Importing into an Edge Collection
|
||||||
|
|
||||||
arangoimp can also be used to import data into an existing edge collection.
|
arangoimp can also be used to import data into an existing edge collection.
|
||||||
|
|
|
@ -118,6 +118,11 @@ void ImportFeature::collectOptions(
|
||||||
"translate an attribute name (use as --translate "
|
"translate an attribute name (use as --translate "
|
||||||
"\"from=to\", for csv and tsv only)",
|
"\"from=to\", for csv and tsv only)",
|
||||||
new VectorParameter<StringParameter>(&_translations));
|
new VectorParameter<StringParameter>(&_translations));
|
||||||
|
|
||||||
|
options->addOption("--remove-attribute",
|
||||||
|
"remove an attribute before inserting an attribute"
|
||||||
|
" into a collection (for csv and tsv only)",
|
||||||
|
new VectorParameter<StringParameter>(&_removeAttributes));
|
||||||
|
|
||||||
std::unordered_set<std::string> types = {"document", "edge"};
|
std::unordered_set<std::string> types = {"document", "edge"};
|
||||||
std::vector<std::string> typesVector(types.begin(), types.end());
|
std::vector<std::string> typesVector(types.begin(), types.end());
|
||||||
|
@ -222,6 +227,14 @@ void ImportFeature::validateOptions(
|
||||||
FATAL_ERROR_EXIT();
|
FATAL_ERROR_EXIT();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (std::string& str : _removeAttributes) {
|
||||||
|
StringUtils::trimInPlace(str);
|
||||||
|
if (str.empty()) {
|
||||||
|
LOG_TOPIC(FATAL, arangodb::Logger::FIXME)
|
||||||
|
<< "cannot remove an empty attribute";
|
||||||
|
FATAL_ERROR_EXIT();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ImportFeature::start() {
|
void ImportFeature::start() {
|
||||||
|
@ -347,6 +360,7 @@ void ImportFeature::start() {
|
||||||
}
|
}
|
||||||
|
|
||||||
ih.setTranslations(translations);
|
ih.setTranslations(translations);
|
||||||
|
ih.setRemoveAttributes(_removeAttributes);
|
||||||
|
|
||||||
// quote
|
// quote
|
||||||
if (_quote.length() <= 1) {
|
if (_quote.length() <= 1) {
|
||||||
|
|
|
@ -57,6 +57,7 @@ class ImportFeature final : public application_features::ApplicationFeature,
|
||||||
std::string _createCollectionType;
|
std::string _createCollectionType;
|
||||||
std::string _typeImport;
|
std::string _typeImport;
|
||||||
std::vector<std::string> _translations;
|
std::vector<std::string> _translations;
|
||||||
|
std::vector<std::string> _removeAttributes;
|
||||||
bool _overwrite;
|
bool _overwrite;
|
||||||
std::string _quote;
|
std::string _quote;
|
||||||
std::string _separator;
|
std::string _separator;
|
||||||
|
|
|
@ -162,6 +162,8 @@ ImportHelper::ImportHelper(ClientFeature const* client,
|
||||||
_collectionName(),
|
_collectionName(),
|
||||||
_lineBuffer(TRI_UNKNOWN_MEM_ZONE),
|
_lineBuffer(TRI_UNKNOWN_MEM_ZONE),
|
||||||
_outputBuffer(TRI_UNKNOWN_MEM_ZONE),
|
_outputBuffer(TRI_UNKNOWN_MEM_ZONE),
|
||||||
|
_firstLine(""),
|
||||||
|
_columnNames(),
|
||||||
_hasError(false) {
|
_hasError(false) {
|
||||||
for (uint32_t i = 0; i < threadCount; i++) {
|
for (uint32_t i = 0; i < threadCount; i++) {
|
||||||
auto http = client->createHttpClient(endpoint, params);
|
auto http = client->createHttpClient(endpoint, params);
|
||||||
|
@ -508,29 +510,36 @@ void ImportHelper::ProcessCsvAdd(TRI_csv_parser_t* parser, char const* field,
|
||||||
size_t fieldLength, size_t row, size_t column,
|
size_t fieldLength, size_t row, size_t column,
|
||||||
bool escaped) {
|
bool escaped) {
|
||||||
auto importHelper = static_cast<ImportHelper*>(parser->_dataAdd);
|
auto importHelper = static_cast<ImportHelper*>(parser->_dataAdd);
|
||||||
|
|
||||||
if (importHelper->getRowsRead() < importHelper->getRowsToSkip()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
importHelper->addField(field, fieldLength, row, column, escaped);
|
importHelper->addField(field, fieldLength, row, column, escaped);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ImportHelper::addField(char const* field, size_t fieldLength, size_t row,
|
void ImportHelper::addField(char const* field, size_t fieldLength, size_t row,
|
||||||
size_t column, bool escaped) {
|
size_t column, bool escaped) {
|
||||||
|
if (_rowsRead < _rowsToSkip) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// we read the first line if we get here
|
||||||
|
if (row == _rowsToSkip) {
|
||||||
|
std::string name = std::string(field, fieldLength);
|
||||||
|
if (fieldLength > 0) { // translate field
|
||||||
|
auto it = _translations.find(name);
|
||||||
|
if (it != _translations.end()) {
|
||||||
|
field = (*it).second.c_str();
|
||||||
|
fieldLength = (*it).second.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_columnNames.push_back(std::move(name));
|
||||||
|
}
|
||||||
|
// skip removable attributes
|
||||||
|
if (!_removeAttributes.empty() &&
|
||||||
|
_removeAttributes.find(_columnNames[column]) != _removeAttributes.end()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (column > 0) {
|
if (column > 0) {
|
||||||
_lineBuffer.appendChar(',');
|
_lineBuffer.appendChar(',');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (row == _rowsToSkip && fieldLength > 0) {
|
|
||||||
// translate field
|
|
||||||
auto it = _translations.find(std::string(field, fieldLength));
|
|
||||||
if (it != _translations.end()) {
|
|
||||||
field = (*it).second.c_str();
|
|
||||||
fieldLength = (*it).second.size();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_keyColumn == -1 && row == _rowsToSkip && fieldLength == 4 &&
|
if (_keyColumn == -1 && row == _rowsToSkip && fieldLength == 4 &&
|
||||||
memcmp(field, "_key", 4) == 0) {
|
memcmp(field, "_key", 4) == 0) {
|
||||||
_keyColumn = column;
|
_keyColumn = column;
|
||||||
|
|
|
@ -153,6 +153,12 @@ class ImportHelper {
|
||||||
std::unordered_map<std::string, std::string> const& translations) {
|
std::unordered_map<std::string, std::string> const& translations) {
|
||||||
_translations = translations;
|
_translations = translations;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setRemoveAttributes(std::vector<std::string> const& attr) {
|
||||||
|
for (std::string const& str : attr) {
|
||||||
|
_removeAttributes.insert(str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
/// @brief whether or not to overwrite existing data in the collection
|
/// @brief whether or not to overwrite existing data in the collection
|
||||||
|
@ -289,8 +295,10 @@ class ImportHelper {
|
||||||
arangodb::basics::StringBuffer _lineBuffer;
|
arangodb::basics::StringBuffer _lineBuffer;
|
||||||
arangodb::basics::StringBuffer _outputBuffer;
|
arangodb::basics::StringBuffer _outputBuffer;
|
||||||
std::string _firstLine;
|
std::string _firstLine;
|
||||||
|
std::vector<std::string> _columnNames;
|
||||||
|
|
||||||
std::unordered_map<std::string, std::string> _translations;
|
std::unordered_map<std::string, std::string> _translations;
|
||||||
|
std::unordered_set<std::string> _removeAttributes;
|
||||||
|
|
||||||
bool _hasError;
|
bool _hasError;
|
||||||
std::vector<std::string> _errorMessages;
|
std::vector<std::string> _errorMessages;
|
||||||
|
|
|
@ -523,6 +523,9 @@ function runArangoImp (options, instanceInfo, what) {
|
||||||
if (what.convert !== undefined) {
|
if (what.convert !== undefined) {
|
||||||
args['convert'] = what.convert ? 'true' : 'false';
|
args['convert'] = what.convert ? 'true' : 'false';
|
||||||
}
|
}
|
||||||
|
if (what.removeAttribute !== undefined) {
|
||||||
|
args['remove-attribute'] = what.removeAttribute;
|
||||||
|
}
|
||||||
|
|
||||||
return executeAndWait(ARANGOIMP_BIN, toArgv(args), options, 'arangoimp', instanceInfo.rootDir);
|
return executeAndWait(ARANGOIMP_BIN, toArgv(args), options, 'arangoimp', instanceInfo.rootDir);
|
||||||
}
|
}
|
||||||
|
|
|
@ -159,6 +159,13 @@ const impTodos = [{
|
||||||
type: 'json',
|
type: 'json',
|
||||||
create: 'false',
|
create: 'false',
|
||||||
onDuplicate: 'replace'
|
onDuplicate: 'replace'
|
||||||
|
}, {
|
||||||
|
id: 'removeAttribute',
|
||||||
|
data: tu.makePathUnix('js/common/test-data/import/import-1.csv'),
|
||||||
|
coll: 'UnitTestsImportRemoveAttribute',
|
||||||
|
type: 'csv',
|
||||||
|
create: 'true',
|
||||||
|
removeAttribute: 'a'
|
||||||
}];
|
}];
|
||||||
|
|
||||||
function importing (options) {
|
function importing (options) {
|
||||||
|
|
|
@ -50,6 +50,7 @@
|
||||||
db._drop("UnitTestsImportEdge");
|
db._drop("UnitTestsImportEdge");
|
||||||
db._drop("UnitTestsImportIgnore");
|
db._drop("UnitTestsImportIgnore");
|
||||||
db._drop("UnitTestsImportUniqueConstraints");
|
db._drop("UnitTestsImportUniqueConstraints");
|
||||||
|
db._drop("UnitTestsImportRemoveAttribute");
|
||||||
|
|
||||||
db._create("UnitTestsImportJson1");
|
db._create("UnitTestsImportJson1");
|
||||||
db._create("UnitTestsImportJson2");
|
db._create("UnitTestsImportJson2");
|
||||||
|
|
|
@ -405,7 +405,24 @@ function importTestSuite () {
|
||||||
|
|
||||||
var actual = getQueryResults("FOR i IN UnitTestsImportUniqueConstraints SORT i._key RETURN i", true);
|
var actual = getQueryResults("FOR i IN UnitTestsImportUniqueConstraints SORT i._key RETURN i", true);
|
||||||
assertEqual(expected, actual);
|
assertEqual(expected, actual);
|
||||||
}
|
},
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// @brief test csv import removing attribute
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
testCsvImportRemoveAttribute : function () {
|
||||||
|
var expected = [
|
||||||
|
{ "b": 1, "c": "1.3", "e": -5, "id": 1 },
|
||||||
|
{ "b": "", "c": 3.1, "d": -2.5, "e": "ddd \" ' ffd", "id": 2 },
|
||||||
|
{ "b": "test", "c" : -99999999, "d": true, "e": -888.4434, "id": 5 },
|
||||||
|
{ "b": 20.5, "c": -42, "d": " null ", "e": false, "id": 6 },
|
||||||
|
{ "b": 1.05e-2, "c": true, "d": false, "id": 7 }
|
||||||
|
];
|
||||||
|
|
||||||
|
var actual = getQueryResults("FOR i IN UnitTestsImportRemoveAttribute SORT i.id RETURN i");
|
||||||
|
assertEqual(expected, actual);
|
||||||
|
},
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue