diff --git a/arangod/RestHandler/RestImportHandler.cpp b/arangod/RestHandler/RestImportHandler.cpp index adaa4c7027..f339b54931 100644 --- a/arangod/RestHandler/RestImportHandler.cpp +++ b/arangod/RestHandler/RestImportHandler.cpp @@ -182,9 +182,7 @@ int RestImportHandler::handleSingleDocument( VPackBuilder newBuilder; if (isEdgeCollection) { - // Validate from and to - // TODO: Check if this is unified in trx.insert - + // add prefixes to _from and _to if (!_fromPrefix.empty() || !_toPrefix.empty()) { TransactionBuilderLeaser tempBuilder(&trx); @@ -196,6 +194,9 @@ int RestImportHandler::handleSingleDocument( if (f.find('/') == std::string::npos) { tempBuilder->add(StaticStrings::FromString, VPackValue(_fromPrefix + f)); } + } else if (from.isInteger()) { + uint64_t f = from.getNumber(); + tempBuilder->add(StaticStrings::FromString, VPackValue(_fromPrefix + std::to_string(f))); } } if (!_toPrefix.empty()) { @@ -205,6 +206,9 @@ int RestImportHandler::handleSingleDocument( if (t.find('/') == std::string::npos) { tempBuilder->add(StaticStrings::ToString, VPackValue(_toPrefix + t)); } + } else if (to.isInteger()) { + uint64_t t = to.getNumber(); + tempBuilder->add(StaticStrings::ToString, VPackValue(_toPrefix + std::to_string(t))); } } tempBuilder->close(); diff --git a/arangosh/Import/ImportFeature.cpp b/arangosh/Import/ImportFeature.cpp index 13c90b638a..7cf05db198 100644 --- a/arangosh/Import/ImportFeature.cpp +++ b/arangosh/Import/ImportFeature.cpp @@ -42,6 +42,7 @@ ImportFeature::ImportFeature(application_features::ApplicationServer* server, : ApplicationFeature(server, "Import"), _filename(""), _useBackslash(false), + _convert(true), _chunkSize(1024 * 1024 * 16), _collectionName(""), _fromCollectionPrefix(""), @@ -54,6 +55,7 @@ ImportFeature::ImportFeature(application_features::ApplicationServer* server, _separator(""), _progress(true), _onDuplicateAction("error"), + _rowsToSkip(0), _result(result) { requiresElevatedPrivileges(false); setOptional(false); @@ -88,6 +90,14 @@ void ImportFeature::collectOptions( options->addOption("--create-collection", "create collection if it does not yet exist", new BooleanParameter(&_createCollection)); + + options->addOption("--skip-lines", + "number of lines to skip for formats (csv and tsv only)", + new UInt64Parameter(&_rowsToSkip)); + + options->addOption("--convert", + "convert the strings 'null', 'false', 'true' and strings containing numbers into non-string types (csv and tsv only)", + new BooleanParameter(&_convert)); std::unordered_set types = {"document", "edge"}; std::vector typesVector(types.begin(), types.end()); @@ -225,6 +235,8 @@ void ImportFeature::start() { ih.setCreateCollectionType(_createCollectionType); } + ih.setConversion(_convert); + ih.setRowsToSkip(static_cast(_rowsToSkip)); ih.setOverwrite(_overwrite); ih.useBackslash(_useBackslash); diff --git a/arangosh/Import/ImportFeature.h b/arangosh/Import/ImportFeature.h index 31816b318f..aafbc8c452 100644 --- a/arangosh/Import/ImportFeature.h +++ b/arangosh/Import/ImportFeature.h @@ -48,6 +48,7 @@ class ImportFeature final : public application_features::ApplicationFeature, private: std::string _filename; bool _useBackslash; + bool _convert; uint64_t _chunkSize; std::string _collectionName; std::string _fromCollectionPrefix; @@ -60,6 +61,7 @@ class ImportFeature final : public application_features::ApplicationFeature, std::string _separator; bool _progress; std::string _onDuplicateAction; + uint64_t _rowsToSkip; private: int* _result; diff --git a/arangosh/Import/ImportHelper.cpp b/arangosh/Import/ImportHelper.cpp index 716e2576a4..8ba39da1a2 100644 --- a/arangosh/Import/ImportHelper.cpp +++ b/arangosh/Import/ImportHelper.cpp @@ -143,6 +143,7 @@ ImportHelper::ImportHelper(httpclient::SimpleHttpClient* client, _quote("\""), _createCollectionType("document"), _useBackslash(false), + _convert(true), _createCollection(false), _overwrite(false), _progress(false), @@ -154,6 +155,7 @@ ImportHelper::ImportHelper(httpclient::SimpleHttpClient* client, _numberIgnored(0), _rowsRead(0), _rowOffset(0), + _rowsToSkip(0), _onDuplicateAction("error"), _collectionName(), _lineBuffer(TRI_UNKNOWN_MEM_ZONE), @@ -464,8 +466,13 @@ void ImportHelper::beginLine(size_t row) { void ImportHelper::ProcessCsvAdd(TRI_csv_parser_t* parser, char const* field, size_t fieldLength, size_t row, size_t column, bool escaped) { - static_cast(parser->_dataAdd) - ->addField(field, fieldLength, row, column, escaped); + auto importHelper = static_cast(parser->_dataAdd); + + if (importHelper->getRowsRead() < importHelper->getRowsToSkip()) { + return; + } + + importHelper->addField(field, fieldLength, row, column, escaped); } void ImportHelper::addField(char const* field, size_t fieldLength, size_t row, @@ -479,6 +486,11 @@ void ImportHelper::addField(char const* field, size_t fieldLength, size_t row, _lineBuffer.appendJsonEncoded(field, fieldLength); return; } + + if (!_convert) { + _lineBuffer.appendText(field, fieldLength); + return; + } if (*field == '\0') { // do nothing @@ -550,12 +562,14 @@ void ImportHelper::addField(char const* field, size_t fieldLength, size_t row, void ImportHelper::ProcessCsvEnd(TRI_csv_parser_t* parser, char const* field, size_t fieldLength, size_t row, size_t column, bool escaped) { - ImportHelper* ih = static_cast(parser->_dataAdd); - - if (ih) { - ih->addLastField(field, fieldLength, row, column, escaped); - ih->incRowsRead(); + auto importHelper = static_cast(parser->_dataAdd); + + if (importHelper->getRowsRead() < importHelper->getRowsToSkip()) { + return; } + + importHelper->addLastField(field, fieldLength, row, column, escaped); + importHelper->incRowsRead(); } void ImportHelper::addLastField(char const* field, size_t fieldLength, diff --git a/arangosh/Import/ImportHelper.h b/arangosh/Import/ImportHelper.h index 564d1aa31f..0fd46b81f4 100644 --- a/arangosh/Import/ImportHelper.h +++ b/arangosh/Import/ImportHelper.h @@ -143,6 +143,25 @@ class ImportHelper { ////////////////////////////////////////////////////////////////////////////// void setOverwrite(bool value) { _overwrite = value; } + + ////////////////////////////////////////////////////////////////////////////// + /// @brief set the number of rows to skip + ////////////////////////////////////////////////////////////////////////////// + + void setRowsToSkip(size_t value) { _rowsToSkip = value; } + + ////////////////////////////////////////////////////////////////////////////// + /// @brief get the number of rows to skip + ////////////////////////////////////////////////////////////////////////////// + + size_t getRowsToSkip() const { return _rowsToSkip; } + + ////////////////////////////////////////////////////////////////////////////// + /// @brief whether or not to convert strings that contain "null", "false", + /// "true" or that look like numbers into those types + ////////////////////////////////////////////////////////////////////////////// + + void setConversion(bool value) { _convert = value; } ////////////////////////////////////////////////////////////////////////////// /// @brief set the progress indicator @@ -178,13 +197,19 @@ class ImportHelper { /// @brief get the number of ignored documents ////////////////////////////////////////////////////////////////////////////// - size_t getNumberIgnored() { return _numberIgnored; } + size_t getNumberIgnored() const { return _numberIgnored; } ////////////////////////////////////////////////////////////////////////////// /// @brief increase the row counter ////////////////////////////////////////////////////////////////////////////// void incRowsRead() { ++_rowsRead; } + + ////////////////////////////////////////////////////////////////////////////// + /// @brief get the number of rows read + ////////////////////////////////////////////////////////////////////////////// + + size_t getRowsRead() const { return _rowsRead; } ////////////////////////////////////////////////////////////////////////////// /// @brief get the error message @@ -222,6 +247,7 @@ class ImportHelper { std::string _quote; std::string _createCollectionType; bool _useBackslash; + bool _convert; bool _createCollection; bool _overwrite; bool _progress; @@ -235,6 +261,7 @@ class ImportHelper { size_t _rowsRead; size_t _rowOffset; + size_t _rowsToSkip; std::string _onDuplicateAction; std::string _collectionName;