1
0
Fork 0

added --skip-lines for import

This commit is contained in:
Jan Steemann 2016-07-04 11:18:15 +02:00
parent 325ede0676
commit 7b62d71c63
5 changed files with 70 additions and 11 deletions

View File

@ -182,9 +182,7 @@ int RestImportHandler::handleSingleDocument(
VPackBuilder newBuilder; VPackBuilder newBuilder;
if (isEdgeCollection) { if (isEdgeCollection) {
// Validate from and to // add prefixes to _from and _to
// TODO: Check if this is unified in trx.insert
if (!_fromPrefix.empty() || !_toPrefix.empty()) { if (!_fromPrefix.empty() || !_toPrefix.empty()) {
TransactionBuilderLeaser tempBuilder(&trx); TransactionBuilderLeaser tempBuilder(&trx);
@ -196,6 +194,9 @@ int RestImportHandler::handleSingleDocument(
if (f.find('/') == std::string::npos) { if (f.find('/') == std::string::npos) {
tempBuilder->add(StaticStrings::FromString, VPackValue(_fromPrefix + f)); tempBuilder->add(StaticStrings::FromString, VPackValue(_fromPrefix + f));
} }
} else if (from.isInteger()) {
uint64_t f = from.getNumber<uint64_t>();
tempBuilder->add(StaticStrings::FromString, VPackValue(_fromPrefix + std::to_string(f)));
} }
} }
if (!_toPrefix.empty()) { if (!_toPrefix.empty()) {
@ -205,6 +206,9 @@ int RestImportHandler::handleSingleDocument(
if (t.find('/') == std::string::npos) { if (t.find('/') == std::string::npos) {
tempBuilder->add(StaticStrings::ToString, VPackValue(_toPrefix + t)); tempBuilder->add(StaticStrings::ToString, VPackValue(_toPrefix + t));
} }
} else if (to.isInteger()) {
uint64_t t = to.getNumber<uint64_t>();
tempBuilder->add(StaticStrings::ToString, VPackValue(_toPrefix + std::to_string(t)));
} }
} }
tempBuilder->close(); tempBuilder->close();

View File

@ -42,6 +42,7 @@ ImportFeature::ImportFeature(application_features::ApplicationServer* server,
: ApplicationFeature(server, "Import"), : ApplicationFeature(server, "Import"),
_filename(""), _filename(""),
_useBackslash(false), _useBackslash(false),
_convert(true),
_chunkSize(1024 * 1024 * 16), _chunkSize(1024 * 1024 * 16),
_collectionName(""), _collectionName(""),
_fromCollectionPrefix(""), _fromCollectionPrefix(""),
@ -54,6 +55,7 @@ ImportFeature::ImportFeature(application_features::ApplicationServer* server,
_separator(""), _separator(""),
_progress(true), _progress(true),
_onDuplicateAction("error"), _onDuplicateAction("error"),
_rowsToSkip(0),
_result(result) { _result(result) {
requiresElevatedPrivileges(false); requiresElevatedPrivileges(false);
setOptional(false); setOptional(false);
@ -88,6 +90,14 @@ void ImportFeature::collectOptions(
options->addOption("--create-collection", options->addOption("--create-collection",
"create collection if it does not yet exist", "create collection if it does not yet exist",
new BooleanParameter(&_createCollection)); new BooleanParameter(&_createCollection));
options->addOption("--skip-lines",
"number of lines to skip for formats (csv and tsv only)",
new UInt64Parameter(&_rowsToSkip));
options->addOption("--convert",
"convert the strings 'null', 'false', 'true' and strings containing numbers into non-string types (csv and tsv only)",
new BooleanParameter(&_convert));
std::unordered_set<std::string> types = {"document", "edge"}; std::unordered_set<std::string> types = {"document", "edge"};
std::vector<std::string> typesVector(types.begin(), types.end()); std::vector<std::string> typesVector(types.begin(), types.end());
@ -225,6 +235,8 @@ void ImportFeature::start() {
ih.setCreateCollectionType(_createCollectionType); ih.setCreateCollectionType(_createCollectionType);
} }
ih.setConversion(_convert);
ih.setRowsToSkip(static_cast<size_t>(_rowsToSkip));
ih.setOverwrite(_overwrite); ih.setOverwrite(_overwrite);
ih.useBackslash(_useBackslash); ih.useBackslash(_useBackslash);

View File

@ -48,6 +48,7 @@ class ImportFeature final : public application_features::ApplicationFeature,
private: private:
std::string _filename; std::string _filename;
bool _useBackslash; bool _useBackslash;
bool _convert;
uint64_t _chunkSize; uint64_t _chunkSize;
std::string _collectionName; std::string _collectionName;
std::string _fromCollectionPrefix; std::string _fromCollectionPrefix;
@ -60,6 +61,7 @@ class ImportFeature final : public application_features::ApplicationFeature,
std::string _separator; std::string _separator;
bool _progress; bool _progress;
std::string _onDuplicateAction; std::string _onDuplicateAction;
uint64_t _rowsToSkip;
private: private:
int* _result; int* _result;

View File

@ -143,6 +143,7 @@ ImportHelper::ImportHelper(httpclient::SimpleHttpClient* client,
_quote("\""), _quote("\""),
_createCollectionType("document"), _createCollectionType("document"),
_useBackslash(false), _useBackslash(false),
_convert(true),
_createCollection(false), _createCollection(false),
_overwrite(false), _overwrite(false),
_progress(false), _progress(false),
@ -154,6 +155,7 @@ ImportHelper::ImportHelper(httpclient::SimpleHttpClient* client,
_numberIgnored(0), _numberIgnored(0),
_rowsRead(0), _rowsRead(0),
_rowOffset(0), _rowOffset(0),
_rowsToSkip(0),
_onDuplicateAction("error"), _onDuplicateAction("error"),
_collectionName(), _collectionName(),
_lineBuffer(TRI_UNKNOWN_MEM_ZONE), _lineBuffer(TRI_UNKNOWN_MEM_ZONE),
@ -464,8 +466,13 @@ void ImportHelper::beginLine(size_t row) {
void ImportHelper::ProcessCsvAdd(TRI_csv_parser_t* parser, char const* field, void ImportHelper::ProcessCsvAdd(TRI_csv_parser_t* parser, char const* field,
size_t fieldLength, size_t row, size_t column, size_t fieldLength, size_t row, size_t column,
bool escaped) { bool escaped) {
static_cast<ImportHelper*>(parser->_dataAdd) auto importHelper = static_cast<ImportHelper*>(parser->_dataAdd);
->addField(field, fieldLength, row, column, escaped);
if (importHelper->getRowsRead() < importHelper->getRowsToSkip()) {
return;
}
importHelper->addField(field, fieldLength, row, column, escaped);
} }
void ImportHelper::addField(char const* field, size_t fieldLength, size_t row, void ImportHelper::addField(char const* field, size_t fieldLength, size_t row,
@ -479,6 +486,11 @@ void ImportHelper::addField(char const* field, size_t fieldLength, size_t row,
_lineBuffer.appendJsonEncoded(field, fieldLength); _lineBuffer.appendJsonEncoded(field, fieldLength);
return; return;
} }
if (!_convert) {
_lineBuffer.appendText(field, fieldLength);
return;
}
if (*field == '\0') { if (*field == '\0') {
// do nothing // do nothing
@ -550,12 +562,14 @@ void ImportHelper::addField(char const* field, size_t fieldLength, size_t row,
void ImportHelper::ProcessCsvEnd(TRI_csv_parser_t* parser, char const* field, void ImportHelper::ProcessCsvEnd(TRI_csv_parser_t* parser, char const* field,
size_t fieldLength, size_t row, size_t column, size_t fieldLength, size_t row, size_t column,
bool escaped) { bool escaped) {
ImportHelper* ih = static_cast<ImportHelper*>(parser->_dataAdd); auto importHelper = static_cast<ImportHelper*>(parser->_dataAdd);
if (ih) { if (importHelper->getRowsRead() < importHelper->getRowsToSkip()) {
ih->addLastField(field, fieldLength, row, column, escaped); return;
ih->incRowsRead();
} }
importHelper->addLastField(field, fieldLength, row, column, escaped);
importHelper->incRowsRead();
} }
void ImportHelper::addLastField(char const* field, size_t fieldLength, void ImportHelper::addLastField(char const* field, size_t fieldLength,

View File

@ -143,6 +143,25 @@ class ImportHelper {
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
void setOverwrite(bool value) { _overwrite = value; } void setOverwrite(bool value) { _overwrite = value; }
//////////////////////////////////////////////////////////////////////////////
/// @brief set the number of rows to skip
//////////////////////////////////////////////////////////////////////////////
void setRowsToSkip(size_t value) { _rowsToSkip = value; }
//////////////////////////////////////////////////////////////////////////////
/// @brief get the number of rows to skip
//////////////////////////////////////////////////////////////////////////////
size_t getRowsToSkip() const { return _rowsToSkip; }
//////////////////////////////////////////////////////////////////////////////
/// @brief whether or not to convert strings that contain "null", "false",
/// "true" or that look like numbers into those types
//////////////////////////////////////////////////////////////////////////////
void setConversion(bool value) { _convert = value; }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
/// @brief set the progress indicator /// @brief set the progress indicator
@ -178,13 +197,19 @@ class ImportHelper {
/// @brief get the number of ignored documents /// @brief get the number of ignored documents
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
size_t getNumberIgnored() { return _numberIgnored; } size_t getNumberIgnored() const { return _numberIgnored; }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
/// @brief increase the row counter /// @brief increase the row counter
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
void incRowsRead() { ++_rowsRead; } void incRowsRead() { ++_rowsRead; }
//////////////////////////////////////////////////////////////////////////////
/// @brief get the number of rows read
//////////////////////////////////////////////////////////////////////////////
size_t getRowsRead() const { return _rowsRead; }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
/// @brief get the error message /// @brief get the error message
@ -222,6 +247,7 @@ class ImportHelper {
std::string _quote; std::string _quote;
std::string _createCollectionType; std::string _createCollectionType;
bool _useBackslash; bool _useBackslash;
bool _convert;
bool _createCollection; bool _createCollection;
bool _overwrite; bool _overwrite;
bool _progress; bool _progress;
@ -235,6 +261,7 @@ class ImportHelper {
size_t _rowsRead; size_t _rowsRead;
size_t _rowOffset; size_t _rowOffset;
size_t _rowsToSkip;
std::string _onDuplicateAction; std::string _onDuplicateAction;
std::string _collectionName; std::string _collectionName;