mirror of https://gitee.com/bigwinds/arangodb
Remove attribute option for arangoimp (#3141)
* Adding a remove attribute option to arangoimp (only for csv) * Adding documentation * Fixed log statement * Update CHANGELOG
This commit is contained in:
parent
00650e6a3f
commit
885ea3dd5f
|
@ -1,6 +1,8 @@
|
|||
devel
|
||||
-----
|
||||
|
||||
* arangoimp now has a --remove-attribute option
|
||||
|
||||
* added V8 context lifetime control options
|
||||
`--javascript.v8-contexts-max-invocations` and `--javascript.v8-contexts-max-age`
|
||||
|
||||
|
|
|
@ -272,6 +272,24 @@ Other common cases are to rename columns in the input file to *_from* and *_to*:
|
|||
The *translate* option can be specified multiple types. The source attribute name
|
||||
and the target attribute must be separated with a *=*.
|
||||
|
||||
|
||||
### Ignoring Attributes
|
||||
|
||||
|
||||
For the CSV and TSV input formats, certain attribute names can be ignored on imports.
|
||||
In an ArangoDB cluster there are cases where this can come in handy,
|
||||
when your documents already contain a `_key` attribute
|
||||
and your collection has a sharding attribute other than `_key`: In the cluster this
|
||||
configuration is not supported, because ArangoDB needs to guarantee the uniqueness of the `_key`
|
||||
attribute in *all* shards of the collection.
|
||||
|
||||
> arangoimp --file "data.csv" --type csv --remove-attribute "_key"
|
||||
|
||||
The same thing would apply if your data contains an *_id* attribute:
|
||||
|
||||
> arangoimp --file "data.csv" --type csv --remove-attribute "_id"
|
||||
|
||||
|
||||
### Importing into an Edge Collection
|
||||
|
||||
arangoimp can also be used to import data into an existing edge collection.
|
||||
|
|
|
@ -118,6 +118,11 @@ void ImportFeature::collectOptions(
|
|||
"translate an attribute name (use as --translate "
|
||||
"\"from=to\", for csv and tsv only)",
|
||||
new VectorParameter<StringParameter>(&_translations));
|
||||
|
||||
options->addOption("--remove-attribute",
|
||||
"remove an attribute before inserting an attribute"
|
||||
" into a collection (for csv and tsv only)",
|
||||
new VectorParameter<StringParameter>(&_removeAttributes));
|
||||
|
||||
std::unordered_set<std::string> types = {"document", "edge"};
|
||||
std::vector<std::string> typesVector(types.begin(), types.end());
|
||||
|
@ -222,6 +227,14 @@ void ImportFeature::validateOptions(
|
|||
FATAL_ERROR_EXIT();
|
||||
}
|
||||
}
|
||||
for (std::string& str : _removeAttributes) {
|
||||
StringUtils::trimInPlace(str);
|
||||
if (str.empty()) {
|
||||
LOG_TOPIC(FATAL, arangodb::Logger::FIXME)
|
||||
<< "cannot remove an empty attribute";
|
||||
FATAL_ERROR_EXIT();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ImportFeature::start() {
|
||||
|
@ -347,6 +360,7 @@ void ImportFeature::start() {
|
|||
}
|
||||
|
||||
ih.setTranslations(translations);
|
||||
ih.setRemoveAttributes(_removeAttributes);
|
||||
|
||||
// quote
|
||||
if (_quote.length() <= 1) {
|
||||
|
|
|
@ -57,6 +57,7 @@ class ImportFeature final : public application_features::ApplicationFeature,
|
|||
std::string _createCollectionType;
|
||||
std::string _typeImport;
|
||||
std::vector<std::string> _translations;
|
||||
std::vector<std::string> _removeAttributes;
|
||||
bool _overwrite;
|
||||
std::string _quote;
|
||||
std::string _separator;
|
||||
|
|
|
@ -162,6 +162,8 @@ ImportHelper::ImportHelper(ClientFeature const* client,
|
|||
_collectionName(),
|
||||
_lineBuffer(TRI_UNKNOWN_MEM_ZONE),
|
||||
_outputBuffer(TRI_UNKNOWN_MEM_ZONE),
|
||||
_firstLine(""),
|
||||
_columnNames(),
|
||||
_hasError(false) {
|
||||
for (uint32_t i = 0; i < threadCount; i++) {
|
||||
auto http = client->createHttpClient(endpoint, params);
|
||||
|
@ -508,29 +510,36 @@ void ImportHelper::ProcessCsvAdd(TRI_csv_parser_t* parser, char const* field,
|
|||
size_t fieldLength, size_t row, size_t column,
|
||||
bool escaped) {
|
||||
auto importHelper = static_cast<ImportHelper*>(parser->_dataAdd);
|
||||
|
||||
if (importHelper->getRowsRead() < importHelper->getRowsToSkip()) {
|
||||
return;
|
||||
}
|
||||
|
||||
importHelper->addField(field, fieldLength, row, column, escaped);
|
||||
}
|
||||
|
||||
void ImportHelper::addField(char const* field, size_t fieldLength, size_t row,
|
||||
size_t column, bool escaped) {
|
||||
if (_rowsRead < _rowsToSkip) {
|
||||
return;
|
||||
}
|
||||
// we read the first line if we get here
|
||||
if (row == _rowsToSkip) {
|
||||
std::string name = std::string(field, fieldLength);
|
||||
if (fieldLength > 0) { // translate field
|
||||
auto it = _translations.find(name);
|
||||
if (it != _translations.end()) {
|
||||
field = (*it).second.c_str();
|
||||
fieldLength = (*it).second.size();
|
||||
}
|
||||
}
|
||||
_columnNames.push_back(std::move(name));
|
||||
}
|
||||
// skip removable attributes
|
||||
if (!_removeAttributes.empty() &&
|
||||
_removeAttributes.find(_columnNames[column]) != _removeAttributes.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (column > 0) {
|
||||
_lineBuffer.appendChar(',');
|
||||
}
|
||||
|
||||
if (row == _rowsToSkip && fieldLength > 0) {
|
||||
// translate field
|
||||
auto it = _translations.find(std::string(field, fieldLength));
|
||||
if (it != _translations.end()) {
|
||||
field = (*it).second.c_str();
|
||||
fieldLength = (*it).second.size();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (_keyColumn == -1 && row == _rowsToSkip && fieldLength == 4 &&
|
||||
memcmp(field, "_key", 4) == 0) {
|
||||
_keyColumn = column;
|
||||
|
|
|
@ -153,6 +153,12 @@ class ImportHelper {
|
|||
std::unordered_map<std::string, std::string> const& translations) {
|
||||
_translations = translations;
|
||||
}
|
||||
|
||||
void setRemoveAttributes(std::vector<std::string> const& attr) {
|
||||
for (std::string const& str : attr) {
|
||||
_removeAttributes.insert(str);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief whether or not to overwrite existing data in the collection
|
||||
|
@ -289,8 +295,10 @@ class ImportHelper {
|
|||
arangodb::basics::StringBuffer _lineBuffer;
|
||||
arangodb::basics::StringBuffer _outputBuffer;
|
||||
std::string _firstLine;
|
||||
std::vector<std::string> _columnNames;
|
||||
|
||||
std::unordered_map<std::string, std::string> _translations;
|
||||
std::unordered_set<std::string> _removeAttributes;
|
||||
|
||||
bool _hasError;
|
||||
std::vector<std::string> _errorMessages;
|
||||
|
|
|
@ -523,6 +523,9 @@ function runArangoImp (options, instanceInfo, what) {
|
|||
if (what.convert !== undefined) {
|
||||
args['convert'] = what.convert ? 'true' : 'false';
|
||||
}
|
||||
if (what.removeAttribute !== undefined) {
|
||||
args['remove-attribute'] = what.removeAttribute;
|
||||
}
|
||||
|
||||
return executeAndWait(ARANGOIMP_BIN, toArgv(args), options, 'arangoimp', instanceInfo.rootDir);
|
||||
}
|
||||
|
|
|
@ -159,6 +159,13 @@ const impTodos = [{
|
|||
type: 'json',
|
||||
create: 'false',
|
||||
onDuplicate: 'replace'
|
||||
}, {
|
||||
id: 'removeAttribute',
|
||||
data: tu.makePathUnix('js/common/test-data/import/import-1.csv'),
|
||||
coll: 'UnitTestsImportRemoveAttribute',
|
||||
type: 'csv',
|
||||
create: 'true',
|
||||
removeAttribute: 'a'
|
||||
}];
|
||||
|
||||
function importing (options) {
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
db._drop("UnitTestsImportEdge");
|
||||
db._drop("UnitTestsImportIgnore");
|
||||
db._drop("UnitTestsImportUniqueConstraints");
|
||||
db._drop("UnitTestsImportRemoveAttribute");
|
||||
|
||||
db._create("UnitTestsImportJson1");
|
||||
db._create("UnitTestsImportJson2");
|
||||
|
|
|
@ -405,7 +405,24 @@ function importTestSuite () {
|
|||
|
||||
var actual = getQueryResults("FOR i IN UnitTestsImportUniqueConstraints SORT i._key RETURN i", true);
|
||||
assertEqual(expected, actual);
|
||||
}
|
||||
},
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// @brief test csv import removing attribute
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
testCsvImportRemoveAttribute : function () {
|
||||
var expected = [
|
||||
{ "b": 1, "c": "1.3", "e": -5, "id": 1 },
|
||||
{ "b": "", "c": 3.1, "d": -2.5, "e": "ddd \" ' ffd", "id": 2 },
|
||||
{ "b": "test", "c" : -99999999, "d": true, "e": -888.4434, "id": 5 },
|
||||
{ "b": 20.5, "c": -42, "d": " null ", "e": false, "id": 6 },
|
||||
{ "b": 1.05e-2, "c": true, "d": false, "id": 7 }
|
||||
];
|
||||
|
||||
var actual = getQueryResults("FOR i IN UnitTestsImportRemoveAttribute SORT i.id RETURN i");
|
||||
assertEqual(expected, actual);
|
||||
},
|
||||
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue