//////////////////////////////////////////////////////////////////////////////// /// @brief import helper /// /// @file /// /// DISCLAIMER /// /// Copyright by triAGENS GmbH - All rights reserved. /// /// The Programs (which include both the software and documentation) /// contain proprietary information of triAGENS GmbH; they are /// provided under a license agreement containing restrictions on use and /// disclosure and are also protected by copyright, patent and other /// intellectual and industrial property laws. Reverse engineering, /// disassembly or decompilation of the Programs, except to the extent /// required to obtain interoperability with other independently created /// software or as specified by law, is prohibited. /// /// The Programs are not intended for use in any nuclear, aviation, mass /// transit, medical, or other inherently dangerous applications. It shall /// be the licensee's responsibility to take all appropriate fail-safe, /// backup, redundancy, and other measures to ensure the safe use of such /// applications if the Programs are used for such purposes, and triAGENS /// GmbH disclaims liability for any damages caused by such use of /// the Programs. /// /// This software is the confidential and proprietary information of /// triAGENS GmbH. You shall not disclose such confidential and /// proprietary information and shall use it only in accordance with the /// terms of the license agreement you entered into with triAGENS GmbH. /// /// Copyright holder is triAGENS GmbH, Cologne, Germany /// /// @author Dr. Frank Celler /// @author Achim Brandt /// @author Copyright 2008-2011, triagens GmbH, Cologne, Germany //////////////////////////////////////////////////////////////////////////////// #include "ImportHelper.h" #include #include "BasicsC/csv.h" #include #include "Variant/VariantArray.h" #include "Variant/VariantBoolean.h" #include "Variant/VariantInt64.h" #include "Variant/VariantString.h" #include "SimpleHttpClient/SimpleHttpClient.h" #include "SimpleHttpClient/SimpleHttpResult.h" #include "json.h" using namespace triagens::basics; using namespace triagens::httpclient; using namespace std; namespace triagens { namespace v8client { //////////////////////////////////////////////////////////////////////////////// /// constructor and destructor //////////////////////////////////////////////////////////////////////////////// ImportHelper::ImportHelper (httpclient::SimpleHttpClient* _client, size_t maxUploadSize) : _client(_client), _maxUploadSize(maxUploadSize), _lineBuffer(TRI_UNKNOWN_MEM_ZONE), _outputBuffer(TRI_UNKNOWN_MEM_ZONE) { _quote = '"'; _separator = ','; regcomp(&_doubleRegex, "^[-+]?([0-9]+\\.?[0-9]*|\\.[0-9]+)([eE][-+]?[0-8]+)?$", REG_ICASE | REG_EXTENDED); regcomp(&_intRegex, "^[-+]?([0-9]+)$", REG_ICASE | REG_EXTENDED); _hasError = false; } ImportHelper::~ImportHelper () { } //////////////////////////////////////////////////////////////////////////////// /// public functions //////////////////////////////////////////////////////////////////////////////// bool ImportHelper::importCsv (const string& collectionName, const string& fileName) { _collectionName = collectionName; _firstLine = ""; _numberLines = 0; _numberOk = 0; _numberError = 0; _outputBuffer.clear(); _lineBuffer.clear(); _errorMessage = ""; _hasError = false; // read and convert int fd; if (fileName == "-") { fd = STDIN_FILENO; } else { fd = open(fileName.c_str(), O_RDONLY); } if (fd < 0) { _errorMessage = TRI_LAST_ERROR_STR; return false; } TRI_csv_parser_t parser; TRI_InitCsvParser(&parser, TRI_UNKNOWN_MEM_ZONE, ProcessCsvBegin, ProcessCsvAdd, ProcessCsvEnd); parser._separator = _separator; parser._quote = _quote; parser._dataAdd = this; char buffer[10240]; while (!_hasError) { v8::HandleScope scope; ssize_t n = read(fd, buffer, sizeof (buffer)); if (n < 0) { TRI_DestroyCsvParser(&parser); _errorMessage = TRI_LAST_ERROR_STR; return false; } else if (n == 0) { break; } TRI_ParseCsvString2(&parser, buffer, n); } if (_outputBuffer.length() > 0) { sendCsvBuffer(); } TRI_DestroyCsvParser(&parser); if (fileName != "-") { close(fd); } _outputBuffer.clear(); return !_hasError; } bool ImportHelper::importJson (const string& collectionName, const string& fileName) { _collectionName = collectionName; _firstLine = ""; _numberLines = 0; _numberOk = 0; _numberError = 0; _outputBuffer.clear(); _errorMessage = ""; _hasError = false; // read and convert int fd; if (fileName == "-") { fd = STDIN_FILENO; } else { fd = open(fileName.c_str(), O_RDONLY); } if (fd < 0) { _errorMessage = TRI_LAST_ERROR_STR; return false; } char buffer[10240]; while (!_hasError) { ssize_t n = read(fd, buffer, sizeof(buffer)); if (n < 0) { _errorMessage = TRI_LAST_ERROR_STR; return false; } else if (n == 0) { break; } _outputBuffer.appendText(buffer, n); if (_outputBuffer.length() > _maxUploadSize) { // send all data before last '\n' const char* first = _outputBuffer.c_str(); char* pos = (char*) memrchr(first, '\n', _outputBuffer.length()); if (pos != 0) { size_t len = pos - first + 1; sendJsonBuffer(first, len); _outputBuffer.erase_front(len); } } } if (_outputBuffer.length() > 0) { sendJsonBuffer(_outputBuffer.c_str(), _outputBuffer.length()); } _numberLines = _numberError + _numberOk; if (fileName != "-") { close(fd); } _outputBuffer.clear(); return !_hasError; } //////////////////////////////////////////////////////////////////////////////// /// private functions //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// /// @brief start a new csv line //////////////////////////////////////////////////////////////////////////////// void ImportHelper::ProcessCsvBegin (TRI_csv_parser_t* parser, size_t row) { ImportHelper* ih = reinterpret_cast (parser->_dataAdd); if (ih) ih->beginLine(row); } void ImportHelper::beginLine(size_t row) { if (_lineBuffer.length() > 0) { // error ++_numberError; _lineBuffer.clear(); } ++_numberLines; if (row > 0) { _lineBuffer.appendChar('\n'); } _lineBuffer.appendChar('['); } //////////////////////////////////////////////////////////////////////////////// /// @brief adds a new CSV field //////////////////////////////////////////////////////////////////////////////// void ImportHelper::ProcessCsvAdd (TRI_csv_parser_t* parser, char const* field, size_t row, size_t column, bool escaped) { ImportHelper* ih = reinterpret_cast (parser->_dataAdd); if (ih) ih->addField(field, row, column, escaped); } void ImportHelper::addField (char const* field, size_t row, size_t column, bool escaped) { if (column > 0) { _lineBuffer.appendChar(','); } if (row == 0) { // head line _lineBuffer.appendChar('"'); _lineBuffer.appendText(StringUtils::escapeUnicode(field)); _lineBuffer.appendChar('"'); } else { if (escaped) { _lineBuffer.appendChar('"'); _lineBuffer.appendText(StringUtils::escapeUnicode(field)); _lineBuffer.appendChar('"'); } else { string s(field); if (s.length() == 0) { // do nothing _lineBuffer.appendText("null"); } else if ("true" == s || "false" == s) { _lineBuffer.appendText(field); } else { if (regexec(&_intRegex, s.c_str(), 0, 0, 0) == 0) { int64_t num = StringUtils::int64(s); _lineBuffer.appendInteger(num); } else if (regexec(&_doubleRegex, s.c_str(), 0, 0, 0) == 0) { double num = StringUtils::doubleDecimal(s); _lineBuffer.appendDecimal(num); } else { _lineBuffer.appendChar('"'); _lineBuffer.appendText(StringUtils::escapeUnicode(field)); _lineBuffer.appendChar('"'); } } } } } //////////////////////////////////////////////////////////////////////////////// /// @brief ends a CSV line //////////////////////////////////////////////////////////////////////////////// void ImportHelper::ProcessCsvEnd (TRI_csv_parser_t* parser, char const* field, size_t row, size_t column, bool escaped) { ImportHelper* ih = reinterpret_cast (parser->_dataAdd); if (ih) ih->addLastField(field, row, column, escaped); } void ImportHelper::addLastField (char const* field, size_t row, size_t column, bool escaped) { if (column == 0 && StringUtils::trim(field) == "") { // ignore empty line _lineBuffer.clear(); return; } addField(field, row, column, escaped); _lineBuffer.appendChar(']'); if (row == 0) { // save the first line _firstLine = _lineBuffer.c_str(); } else if (row > 0 && _firstLine == "") { // error ++_numberError; _lineBuffer.clear(); return; } // read a complete line if (_lineBuffer.length() > 0) { _outputBuffer.appendText(_lineBuffer); _lineBuffer.clear(); } else { ++_numberError; } if (_outputBuffer.length() > _maxUploadSize) { sendCsvBuffer(); _outputBuffer.appendText(_firstLine); } } void ImportHelper::sendCsvBuffer () { if (_hasError) { return; } map headerFields; SimpleHttpResult* result = _client->request(SimpleHttpClient::POST, "/_api/import?collection=" + StringUtils::urlEncode(_collectionName), _outputBuffer.c_str(), _outputBuffer.length(), headerFields); handleResult(result); _outputBuffer.clear(); } void ImportHelper::sendJsonBuffer (char const* str, size_t len) { if (_hasError) { return; } map headerFields; SimpleHttpResult* result = _client->request(SimpleHttpClient::POST, "/_api/import?type=documents&collection=" + StringUtils::urlEncode(_collectionName), str, len, headerFields); handleResult(result); } void ImportHelper::handleResult (SimpleHttpResult* result) { if (!result) { return; } VariantArray* va = result->getBodyAsVariantArray(); if (va) { VariantBoolean* vb = va->lookupBoolean("error"); if (vb && vb->getValue()) { // is error _hasError = true; VariantString* vs = va->lookupString("errorMessage"); if (vs) { _errorMessage = vs->getValue(); } } VariantInt64* vi = va->lookupInt64("created"); if (vi && vi->getValue() > 0) { _numberOk += (size_t) vi->getValue(); } vi = va->lookupInt64("errors"); if (vi && vi->getValue() > 0) { _numberError += (size_t) vi->getValue(); } delete va; } delete result; } } }