1
0
Fork 0
arangodb/arangosh/Import/ImportHelper.h

254 lines
8.8 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2014-2016 ArangoDB GmbH, Cologne, Germany
/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Dr. Frank Celler
/// @author Achim Brandt
////////////////////////////////////////////////////////////////////////////////
#ifndef ARANGODB_IMPORT_IMPORT_HELPER_H
#define ARANGODB_IMPORT_IMPORT_HELPER_H 1
#include "Basics/Common.h"
#include "Basics/csv.h"
#include "Basics/StringBuffer.h"
#ifdef _WIN32
#include "Basics/win-utils.h"
#endif
namespace arangodb {
namespace httpclient {
class SimpleHttpClient;
class SimpleHttpResult;
}
}
////////////////////////////////////////////////////////////////////////////////
/// @brief class for http requests
////////////////////////////////////////////////////////////////////////////////
namespace arangodb {
namespace import {
class ImportHelper {
public:
//////////////////////////////////////////////////////////////////////////////
/// @brief type of delimited import
//////////////////////////////////////////////////////////////////////////////
enum DelimitedImportType { CSV = 0, TSV };
private:
ImportHelper(ImportHelper const&) = delete;
ImportHelper& operator=(ImportHelper const&) = delete;
public:
ImportHelper(httpclient::SimpleHttpClient* client, uint64_t maxUploadSize);
~ImportHelper();
//////////////////////////////////////////////////////////////////////////////
/// @brief imports a delimited file
//////////////////////////////////////////////////////////////////////////////
bool importDelimited(std::string const& collectionName,
std::string const& fileName,
DelimitedImportType typeImport);
//////////////////////////////////////////////////////////////////////////////
/// @brief imports a file with JSON objects
/// each line must contain a complete JSON object
//////////////////////////////////////////////////////////////////////////////
bool importJson(std::string const& collectionName,
std::string const& fileName);
//////////////////////////////////////////////////////////////////////////////
/// @brief sets the action to carry out on duplicate _key
//////////////////////////////////////////////////////////////////////////////
void setOnDuplicateAction(std::string const& action) {
_onDuplicateAction = action;
}
//////////////////////////////////////////////////////////////////////////////
/// @brief sets the quote character
///
/// this is a string because the quote might also be empty if not used
//////////////////////////////////////////////////////////////////////////////
void setQuote(std::string const& quote) { _quote = quote; }
//////////////////////////////////////////////////////////////////////////////
/// @brief set collection name prefix for _from
//////////////////////////////////////////////////////////////////////////////
void setFrom (std::string const& from) {
_fromCollectionPrefix = from;
}
//////////////////////////////////////////////////////////////////////////////
/// @brief set collection name prefix for _to
//////////////////////////////////////////////////////////////////////////////
void setTo (std::string const& to) {
_toCollectionPrefix = to;
}
//////////////////////////////////////////////////////////////////////////////
/// @brief whether or not backslashes can be used for escaping quotes
//////////////////////////////////////////////////////////////////////////////
void useBackslash(bool value) { _useBackslash = value; }
//////////////////////////////////////////////////////////////////////////////
/// @brief sets the separator
//////////////////////////////////////////////////////////////////////////////
void setSeparator(std::string const& separator) { _separator = separator; }
//////////////////////////////////////////////////////////////////////////////
/// @brief sets the createCollection flag
///
/// @param bool value create the collection if it does not
/// exist
//////////////////////////////////////////////////////////////////////////////
void setCreateCollection(bool value) { _createCollection = value; }
void setCreateCollectionType(std::string const& value) {
_createCollectionType = value;
}
//////////////////////////////////////////////////////////////////////////////
/// @brief whether or not to overwrite existing data in the collection
//////////////////////////////////////////////////////////////////////////////
void setOverwrite(bool value) { _overwrite = value; }
//////////////////////////////////////////////////////////////////////////////
/// @brief set the progress indicator
//////////////////////////////////////////////////////////////////////////////
void setProgress(bool value) { _progress = value; }
//////////////////////////////////////////////////////////////////////////////
/// @brief get the number of lines read (meaningful for CSV only)
//////////////////////////////////////////////////////////////////////////////
size_t getReadLines() { return _numberLines; }
//////////////////////////////////////////////////////////////////////////////
/// @brief get the number of documents imported
//////////////////////////////////////////////////////////////////////////////
size_t getNumberCreated() { return _numberCreated; }
//////////////////////////////////////////////////////////////////////////////
/// @brief get the number of errors
//////////////////////////////////////////////////////////////////////////////
size_t getNumberErrors() { return _numberErrors; }
//////////////////////////////////////////////////////////////////////////////
/// @brief get the number of updated documents
//////////////////////////////////////////////////////////////////////////////
size_t getNumberUpdated() { return _numberUpdated; }
//////////////////////////////////////////////////////////////////////////////
/// @brief get the number of ignored documents
//////////////////////////////////////////////////////////////////////////////
size_t getNumberIgnored() { return _numberIgnored; }
//////////////////////////////////////////////////////////////////////////////
/// @brief increase the row counter
//////////////////////////////////////////////////////////////////////////////
void incRowsRead() { ++_rowsRead; }
//////////////////////////////////////////////////////////////////////////////
/// @brief get the error message
///
/// @return string get the error message
//////////////////////////////////////////////////////////////////////////////
std::string getErrorMessage() { return _errorMessage; }
private:
static void ProcessCsvBegin(TRI_csv_parser_t*, size_t);
static void ProcessCsvAdd(TRI_csv_parser_t*, char const*, size_t, size_t,
size_t, bool);
static void ProcessCsvEnd(TRI_csv_parser_t*, char const*, size_t, size_t,
size_t, bool);
void reportProgress(int64_t, int64_t, double&);
std::string getCollectionUrlPart();
void beginLine(size_t row);
void addField(char const*, size_t, size_t row, size_t column, bool escaped);
void addLastField(char const*, size_t, size_t row, size_t column,
bool escaped);
void sendCsvBuffer();
void sendJsonBuffer(char const* str, size_t len, bool isObject);
void handleResult(httpclient::SimpleHttpResult* result);
private:
httpclient::SimpleHttpClient* _client;
uint64_t _maxUploadSize;
std::string _separator;
std::string _quote;
std::string _createCollectionType;
bool _useBackslash;
bool _createCollection;
bool _overwrite;
bool _progress;
bool _firstChunk;
size_t _numberLines;
size_t _numberCreated;
size_t _numberErrors;
size_t _numberUpdated;
size_t _numberIgnored;
size_t _rowsRead;
size_t _rowOffset;
std::string _onDuplicateAction;
std::string _collectionName;
std::string _fromCollectionPrefix;
std::string _toCollectionPrefix;
arangodb::basics::StringBuffer _lineBuffer;
arangodb::basics::StringBuffer _outputBuffer;
std::string _firstLine;
bool _hasError;
std::string _errorMessage;
static double const ProgressStep;
};
}
}
#endif