//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany /// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Dr. Frank Celler /// @author Achim Brandt //////////////////////////////////////////////////////////////////////////////// #ifndef ARANGODB_IMPORT_IMPORT_HELPER_H #define ARANGODB_IMPORT_IMPORT_HELPER_H 1 #include "Basics/Common.h" #include "Basics/csv.h" #include "Basics/StringBuffer.h" #ifdef _WIN32 #include "Basics/win-utils.h" #endif namespace arangodb { namespace httpclient { class SimpleHttpClient; class SimpleHttpResult; } } //////////////////////////////////////////////////////////////////////////////// /// @brief class for http requests //////////////////////////////////////////////////////////////////////////////// namespace arangodb { namespace import { class ImportHelper { public: ////////////////////////////////////////////////////////////////////////////// /// @brief type of delimited import ////////////////////////////////////////////////////////////////////////////// enum DelimitedImportType { CSV = 0, TSV }; private: ImportHelper(ImportHelper const&) = delete; ImportHelper& operator=(ImportHelper const&) = delete; public: ImportHelper(httpclient::SimpleHttpClient* client, uint64_t maxUploadSize); ~ImportHelper(); ////////////////////////////////////////////////////////////////////////////// /// @brief imports a delimited file ////////////////////////////////////////////////////////////////////////////// bool importDelimited(std::string const& collectionName, std::string const& fileName, DelimitedImportType typeImport); ////////////////////////////////////////////////////////////////////////////// /// @brief imports a file with JSON objects /// each line must contain a complete JSON object ////////////////////////////////////////////////////////////////////////////// bool importJson(std::string const& collectionName, std::string const& fileName); ////////////////////////////////////////////////////////////////////////////// /// @brief sets the action to carry out on duplicate _key ////////////////////////////////////////////////////////////////////////////// void setOnDuplicateAction(std::string const& action) { _onDuplicateAction = action; } ////////////////////////////////////////////////////////////////////////////// /// @brief sets the quote character /// /// this is a string because the quote might also be empty if not used ////////////////////////////////////////////////////////////////////////////// void setQuote(std::string const& quote) { _quote = quote; } ////////////////////////////////////////////////////////////////////////////// /// @brief set collection name prefix for _from ////////////////////////////////////////////////////////////////////////////// void setFrom (std::string const& from) { _fromCollectionPrefix = from; } ////////////////////////////////////////////////////////////////////////////// /// @brief set collection name prefix for _to ////////////////////////////////////////////////////////////////////////////// void setTo (std::string const& to) { _toCollectionPrefix = to; } ////////////////////////////////////////////////////////////////////////////// /// @brief whether or not backslashes can be used for escaping quotes ////////////////////////////////////////////////////////////////////////////// void useBackslash(bool value) { _useBackslash = value; } ////////////////////////////////////////////////////////////////////////////// /// @brief sets the separator ////////////////////////////////////////////////////////////////////////////// void setSeparator(std::string const& separator) { _separator = separator; } ////////////////////////////////////////////////////////////////////////////// /// @brief sets the createCollection flag /// /// @param bool value create the collection if it does not /// exist ////////////////////////////////////////////////////////////////////////////// void setCreateCollection(bool value) { _createCollection = value; } void setCreateCollectionType(std::string const& value) { _createCollectionType = value; } ////////////////////////////////////////////////////////////////////////////// /// @brief whether or not to overwrite existing data in the collection ////////////////////////////////////////////////////////////////////////////// void setOverwrite(bool value) { _overwrite = value; } ////////////////////////////////////////////////////////////////////////////// /// @brief set the progress indicator ////////////////////////////////////////////////////////////////////////////// void setProgress(bool value) { _progress = value; } ////////////////////////////////////////////////////////////////////////////// /// @brief get the number of lines read (meaningful for CSV only) ////////////////////////////////////////////////////////////////////////////// size_t getReadLines() { return _numberLines; } ////////////////////////////////////////////////////////////////////////////// /// @brief get the number of documents imported ////////////////////////////////////////////////////////////////////////////// size_t getNumberCreated() { return _numberCreated; } ////////////////////////////////////////////////////////////////////////////// /// @brief get the number of errors ////////////////////////////////////////////////////////////////////////////// size_t getNumberErrors() { return _numberErrors; } ////////////////////////////////////////////////////////////////////////////// /// @brief get the number of updated documents ////////////////////////////////////////////////////////////////////////////// size_t getNumberUpdated() { return _numberUpdated; } ////////////////////////////////////////////////////////////////////////////// /// @brief get the number of ignored documents ////////////////////////////////////////////////////////////////////////////// size_t getNumberIgnored() { return _numberIgnored; } ////////////////////////////////////////////////////////////////////////////// /// @brief increase the row counter ////////////////////////////////////////////////////////////////////////////// void incRowsRead() { ++_rowsRead; } ////////////////////////////////////////////////////////////////////////////// /// @brief get the error message /// /// @return string get the error message ////////////////////////////////////////////////////////////////////////////// std::string getErrorMessage() { return _errorMessage; } private: static void ProcessCsvBegin(TRI_csv_parser_t*, size_t); static void ProcessCsvAdd(TRI_csv_parser_t*, char const*, size_t, size_t, size_t, bool); static void ProcessCsvEnd(TRI_csv_parser_t*, char const*, size_t, size_t, size_t, bool); void reportProgress(int64_t, int64_t, double&); std::string getCollectionUrlPart(); void beginLine(size_t row); void addField(char const*, size_t, size_t row, size_t column, bool escaped); void addLastField(char const*, size_t, size_t row, size_t column, bool escaped); void sendCsvBuffer(); void sendJsonBuffer(char const* str, size_t len, bool isObject); void handleResult(httpclient::SimpleHttpResult* result); private: httpclient::SimpleHttpClient* _client; uint64_t _maxUploadSize; std::string _separator; std::string _quote; std::string _createCollectionType; bool _useBackslash; bool _createCollection; bool _overwrite; bool _progress; bool _firstChunk; size_t _numberLines; size_t _numberCreated; size_t _numberErrors; size_t _numberUpdated; size_t _numberIgnored; size_t _rowsRead; size_t _rowOffset; std::string _onDuplicateAction; std::string _collectionName; std::string _fromCollectionPrefix; std::string _toCollectionPrefix; arangodb::basics::StringBuffer _lineBuffer; arangodb::basics::StringBuffer _outputBuffer; std::string _firstLine; bool _hasError; std::string _errorMessage; static double const ProgressStep; }; } } #endif