1
0
Fork 0

added --skip-lines for import

This commit is contained in:
Jan Steemann 2016-07-04 11:18:15 +02:00
parent 325ede0676
commit 7b62d71c63
5 changed files with 70 additions and 11 deletions

View File

@ -182,9 +182,7 @@ int RestImportHandler::handleSingleDocument(
VPackBuilder newBuilder;
if (isEdgeCollection) {
// Validate from and to
// TODO: Check if this is unified in trx.insert
// add prefixes to _from and _to
if (!_fromPrefix.empty() || !_toPrefix.empty()) {
TransactionBuilderLeaser tempBuilder(&trx);
@ -196,6 +194,9 @@ int RestImportHandler::handleSingleDocument(
if (f.find('/') == std::string::npos) {
tempBuilder->add(StaticStrings::FromString, VPackValue(_fromPrefix + f));
}
} else if (from.isInteger()) {
uint64_t f = from.getNumber<uint64_t>();
tempBuilder->add(StaticStrings::FromString, VPackValue(_fromPrefix + std::to_string(f)));
}
}
if (!_toPrefix.empty()) {
@ -205,6 +206,9 @@ int RestImportHandler::handleSingleDocument(
if (t.find('/') == std::string::npos) {
tempBuilder->add(StaticStrings::ToString, VPackValue(_toPrefix + t));
}
} else if (to.isInteger()) {
uint64_t t = to.getNumber<uint64_t>();
tempBuilder->add(StaticStrings::ToString, VPackValue(_toPrefix + std::to_string(t)));
}
}
tempBuilder->close();

View File

@ -42,6 +42,7 @@ ImportFeature::ImportFeature(application_features::ApplicationServer* server,
: ApplicationFeature(server, "Import"),
_filename(""),
_useBackslash(false),
_convert(true),
_chunkSize(1024 * 1024 * 16),
_collectionName(""),
_fromCollectionPrefix(""),
@ -54,6 +55,7 @@ ImportFeature::ImportFeature(application_features::ApplicationServer* server,
_separator(""),
_progress(true),
_onDuplicateAction("error"),
_rowsToSkip(0),
_result(result) {
requiresElevatedPrivileges(false);
setOptional(false);
@ -88,6 +90,14 @@ void ImportFeature::collectOptions(
options->addOption("--create-collection",
"create collection if it does not yet exist",
new BooleanParameter(&_createCollection));
options->addOption("--skip-lines",
"number of lines to skip for formats (csv and tsv only)",
new UInt64Parameter(&_rowsToSkip));
options->addOption("--convert",
"convert the strings 'null', 'false', 'true' and strings containing numbers into non-string types (csv and tsv only)",
new BooleanParameter(&_convert));
std::unordered_set<std::string> types = {"document", "edge"};
std::vector<std::string> typesVector(types.begin(), types.end());
@ -225,6 +235,8 @@ void ImportFeature::start() {
ih.setCreateCollectionType(_createCollectionType);
}
ih.setConversion(_convert);
ih.setRowsToSkip(static_cast<size_t>(_rowsToSkip));
ih.setOverwrite(_overwrite);
ih.useBackslash(_useBackslash);

View File

@ -48,6 +48,7 @@ class ImportFeature final : public application_features::ApplicationFeature,
private:
std::string _filename;
bool _useBackslash;
bool _convert;
uint64_t _chunkSize;
std::string _collectionName;
std::string _fromCollectionPrefix;
@ -60,6 +61,7 @@ class ImportFeature final : public application_features::ApplicationFeature,
std::string _separator;
bool _progress;
std::string _onDuplicateAction;
uint64_t _rowsToSkip;
private:
int* _result;

View File

@ -143,6 +143,7 @@ ImportHelper::ImportHelper(httpclient::SimpleHttpClient* client,
_quote("\""),
_createCollectionType("document"),
_useBackslash(false),
_convert(true),
_createCollection(false),
_overwrite(false),
_progress(false),
@ -154,6 +155,7 @@ ImportHelper::ImportHelper(httpclient::SimpleHttpClient* client,
_numberIgnored(0),
_rowsRead(0),
_rowOffset(0),
_rowsToSkip(0),
_onDuplicateAction("error"),
_collectionName(),
_lineBuffer(TRI_UNKNOWN_MEM_ZONE),
@ -464,8 +466,13 @@ void ImportHelper::beginLine(size_t row) {
void ImportHelper::ProcessCsvAdd(TRI_csv_parser_t* parser, char const* field,
size_t fieldLength, size_t row, size_t column,
bool escaped) {
static_cast<ImportHelper*>(parser->_dataAdd)
->addField(field, fieldLength, row, column, escaped);
auto importHelper = static_cast<ImportHelper*>(parser->_dataAdd);
if (importHelper->getRowsRead() < importHelper->getRowsToSkip()) {
return;
}
importHelper->addField(field, fieldLength, row, column, escaped);
}
void ImportHelper::addField(char const* field, size_t fieldLength, size_t row,
@ -479,6 +486,11 @@ void ImportHelper::addField(char const* field, size_t fieldLength, size_t row,
_lineBuffer.appendJsonEncoded(field, fieldLength);
return;
}
if (!_convert) {
_lineBuffer.appendText(field, fieldLength);
return;
}
if (*field == '\0') {
// do nothing
@ -550,12 +562,14 @@ void ImportHelper::addField(char const* field, size_t fieldLength, size_t row,
void ImportHelper::ProcessCsvEnd(TRI_csv_parser_t* parser, char const* field,
size_t fieldLength, size_t row, size_t column,
bool escaped) {
ImportHelper* ih = static_cast<ImportHelper*>(parser->_dataAdd);
if (ih) {
ih->addLastField(field, fieldLength, row, column, escaped);
ih->incRowsRead();
auto importHelper = static_cast<ImportHelper*>(parser->_dataAdd);
if (importHelper->getRowsRead() < importHelper->getRowsToSkip()) {
return;
}
importHelper->addLastField(field, fieldLength, row, column, escaped);
importHelper->incRowsRead();
}
void ImportHelper::addLastField(char const* field, size_t fieldLength,

View File

@ -143,6 +143,25 @@ class ImportHelper {
//////////////////////////////////////////////////////////////////////////////
void setOverwrite(bool value) { _overwrite = value; }
//////////////////////////////////////////////////////////////////////////////
/// @brief set the number of rows to skip
//////////////////////////////////////////////////////////////////////////////
void setRowsToSkip(size_t value) { _rowsToSkip = value; }
//////////////////////////////////////////////////////////////////////////////
/// @brief get the number of rows to skip
//////////////////////////////////////////////////////////////////////////////
size_t getRowsToSkip() const { return _rowsToSkip; }
//////////////////////////////////////////////////////////////////////////////
/// @brief whether or not to convert strings that contain "null", "false",
/// "true" or that look like numbers into those types
//////////////////////////////////////////////////////////////////////////////
void setConversion(bool value) { _convert = value; }
//////////////////////////////////////////////////////////////////////////////
/// @brief set the progress indicator
@ -178,13 +197,19 @@ class ImportHelper {
/// @brief get the number of ignored documents
//////////////////////////////////////////////////////////////////////////////
size_t getNumberIgnored() { return _numberIgnored; }
size_t getNumberIgnored() const { return _numberIgnored; }
//////////////////////////////////////////////////////////////////////////////
/// @brief increase the row counter
//////////////////////////////////////////////////////////////////////////////
void incRowsRead() { ++_rowsRead; }
//////////////////////////////////////////////////////////////////////////////
/// @brief get the number of rows read
//////////////////////////////////////////////////////////////////////////////
size_t getRowsRead() const { return _rowsRead; }
//////////////////////////////////////////////////////////////////////////////
/// @brief get the error message
@ -222,6 +247,7 @@ class ImportHelper {
std::string _quote;
std::string _createCollectionType;
bool _useBackslash;
bool _convert;
bool _createCollection;
bool _overwrite;
bool _progress;
@ -235,6 +261,7 @@ class ImportHelper {
size_t _rowsRead;
size_t _rowOffset;
size_t _rowsToSkip;
std::string _onDuplicateAction;
std::string _collectionName;