1
0
Fork 0

Bug fix 3.5/issue 10193 (#10194)

* fixed issue #10193: Arangoexport does not handle line feeds when exporting as csvi

* escape \r too
This commit is contained in:
Jan 2019-10-09 15:01:51 +02:00 committed by KVS85
parent 345a06ade4
commit bf187f4b50
7 changed files with 115 additions and 70 deletions

View File

@ -1,6 +1,9 @@
v3.5.2 (XXXX-XX-XX)
-------------------
* Fixed issue #10193: Arangoexport does not handle line feeds when exporting as
csv.
* Removed debug log messages "found comm task ..." that could be logged
on server shutdown.

View File

@ -34,8 +34,12 @@
#include "SimpleHttpClient/SimpleHttpClient.h"
#include "SimpleHttpClient/SimpleHttpResult.h"
#include <boost/algorithm/string.hpp>
#include <boost/property_tree/detail/xml_parser_utils.hpp>
#include <velocypack/Builder.h>
#include <velocypack/Dumper.h>
#include <velocypack/Slice.h>
#include <velocypack/Sink.h>
#include <velocypack/velocypack-aliases.h>
#include <iostream>
#include <regex>
#include <sys/types.h>
@ -63,8 +67,6 @@ ExportFeature::ExportFeature(application_features::ApplicationServer& server, in
_graphName(),
_xgmmlLabelAttribute("label"),
_typeExport("json"),
_csvFieldOptions(),
_csvFields(),
_xgmmlLabelOnly(false),
_outputDirectory(),
_overwrite(false),
@ -111,7 +113,7 @@ void ExportFeature::collectOptions(std::shared_ptr<options::ProgramOptions> opti
options->addOption("--progress", "show progress", new BooleanParameter(&_progress));
options->addOption("--fields",
"comma separated list of fileds to export into a csv file",
"comma separated list of fields to export into a csv file",
new StringParameter(&_csvFieldOptions));
std::unordered_set<std::string> exports = {"csv", "json", "jsonl", "xgmml",
@ -178,7 +180,7 @@ void ExportFeature::validateOptions(std::shared_ptr<options::ProgramOptions> opt
FATAL_ERROR_EXIT();
}
boost::split(_csvFields, _csvFieldOptions, boost::is_any_of(","));
_csvFields = StringUtils::split(_csvFieldOptions, ',');
}
}
@ -341,10 +343,10 @@ void ExportFeature::collectionExport(SimpleHttpClient* httpClient) {
if (_typeExport == "json") {
std::string closingBracket = "\n]";
writeToFile(*fd, closingBracket, fileName);
writeToFile(*fd, closingBracket);
} else if (_typeExport == "xml") {
std::string xmlFooter = "</collection>";
writeToFile(*fd, xmlFooter, fileName);
writeToFile(*fd, xmlFooter);
}
}
}
@ -394,10 +396,10 @@ void ExportFeature::queryExport(SimpleHttpClient* httpClient) {
if (_typeExport == "json") {
std::string closingBracket = "\n]";
writeToFile(*fd, closingBracket, fileName);
writeToFile(*fd, closingBracket);
} else if (_typeExport == "xml") {
std::string xmlFooter = "</collection>";
writeToFile(*fd, xmlFooter, fileName);
writeToFile(*fd, xmlFooter);
}
}
@ -406,7 +408,7 @@ void ExportFeature::writeFirstLine(ManagedDirectory::File & fd, std::string cons
_firstLine = true;
if (_typeExport == "json") {
std::string openingBracket = "[";
writeToFile(fd, openingBracket, fileName);
writeToFile(fd, openingBracket);
} else if (_typeExport == "xml") {
std::string xmlHeader =
@ -414,10 +416,10 @@ void ExportFeature::writeFirstLine(ManagedDirectory::File & fd, std::string cons
"<collection name=\"";
xmlHeader.append(encode_char_entities(collection));
xmlHeader.append("\">\n");
writeToFile(fd, xmlHeader, fileName);
writeToFile(fd, xmlHeader);
} else if (_typeExport == "csv") {
std::string firstLine = "";
std::string firstLine;
bool isFirstValue = true;
for (auto const& str : _csvFields) {
if (isFirstValue) {
@ -428,22 +430,28 @@ void ExportFeature::writeFirstLine(ManagedDirectory::File & fd, std::string cons
}
}
firstLine += "\n";
writeToFile(fd, firstLine, fileName);
writeToFile(fd, firstLine);
}
}
void ExportFeature::writeBatch(ManagedDirectory::File & fd, VPackArrayIterator it, std::string const& fileName) {
void ExportFeature::writeBatch(ManagedDirectory::File & fd, VPackArrayIterator it, std::string const& fileName) {
std::string line;
line.reserve(1024);
if (_typeExport == "jsonl") {
VPackStringSink sink(&line);
VPackDumper dumper(&sink);
for (auto const& doc : it) {
line.clear();
line += doc.toJson();
dumper.dump(doc);
line.push_back('\n');
writeToFile(fd, line, fileName);
writeToFile(fd, line);
}
} else if (_typeExport == "json") {
VPackStringSink sink(&line);
VPackDumper dumper(&sink);
for (auto const& doc : it) {
line.clear();
if (!_firstLine) {
@ -452,8 +460,8 @@ void ExportFeature::writeFirstLine(ManagedDirectory::File & fd, std::string cons
line.append("\n ", 3);
_firstLine = false;
}
line += doc.toJson();
writeToFile(fd, line, fileName);
dumper.dump(doc);
writeToFile(fd, line);
}
} else if (_typeExport == "csv") {
for (auto const& doc : it) {
@ -461,39 +469,50 @@ void ExportFeature::writeFirstLine(ManagedDirectory::File & fd, std::string cons
bool isFirstValue = true;
for (auto const& key : _csvFields) {
std::string value = "";
if (isFirstValue) {
isFirstValue = false;
} else {
line.append(",");
line.push_back(',');
}
if (doc.hasKey(key)) {
VPackSlice val = doc.get(key);
VPackSlice val = doc.get(key);
if (!val.isNone()) {
std::string value;
bool escape = false;
if (val.isArray() || val.isObject()) {
value = val.toJson();
escape = true;
} else {
if (val.isString()) {
value = val.copyString();
escape = true;
} else {
value = val.toString();
}
}
value = std::regex_replace(value, std::regex("\""), "\"\"");
if (escape) {
value = std::regex_replace(value, std::regex("\""), "\"\"");
if (value.find(",") != std::string::npos ||
value.find("\"\"") != std::string::npos) {
value = "\"" + value;
value.append("\"");
if (value.find(',') != std::string::npos ||
value.find('\"') != std::string::npos ||
value.find('\r') != std::string::npos ||
value.find('\n') != std::string::npos) {
// escape value and put it in quotes
line.push_back('\"');
line.append(value);
line.push_back('\"');
continue;
}
}
// write unescaped
line.append(value);
}
line.append(value);
}
line.append("\n");
writeToFile(fd, line, fileName);
line.push_back('\n');
writeToFile(fd, line);
}
} else if (_typeExport == "xml") {
for (auto const& doc : it) {
@ -501,18 +520,18 @@ void ExportFeature::writeFirstLine(ManagedDirectory::File & fd, std::string cons
line.append("<doc key=\"");
line.append(encode_char_entities(doc.get("_key").copyString()));
line.append("\">\n");
writeToFile(fd, line, fileName);
writeToFile(fd, line);
for (auto const& att : VPackObjectIterator(doc)) {
xgmmlWriteOneAtt(fd, fileName, att.value, att.key.copyString(), 2);
xgmmlWriteOneAtt(fd, att.value, att.key.copyString(), 2);
}
line.clear();
line.append("</doc>\n");
writeToFile(fd, line, fileName);
writeToFile(fd, line);
}
}
}
void ExportFeature::writeToFile(ManagedDirectory::File & fd, std::string const& line, std::string const& fileName) {
void ExportFeature::writeToFile(ManagedDirectory::File & fd, std::string const& line) {
fd.write(line.c_str(), line.size());
}
@ -613,14 +632,14 @@ void ExportFeature::graphExport(SimpleHttpClient* httpClient) {
std::string xmlHeader =
R"(<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<graph label=")";
writeToFile(*fd, xmlHeader, fileName);
writeToFile(*fd, _graphName, fileName);
writeToFile(*fd, xmlHeader);
writeToFile(*fd, _graphName);
xmlHeader = R"("
xmlns="http://www.cs.rpi.edu/XGMML"
directed="1">
)";
writeToFile(*fd, xmlHeader, fileName);
writeToFile(*fd, xmlHeader);
for (auto const& collection : _collections) {
if (_progress) {
@ -656,7 +675,7 @@ directed="1">
}
}
std::string closingGraphTag = "</graph>\n";
writeToFile(*fd, closingGraphTag, fileName);
writeToFile(*fd, closingGraphTag);
if (_skippedDeepNested) {
std::cout << "skipped " << _skippedDeepNested
@ -677,21 +696,21 @@ void ExportFeature::writeGraphBatch(ManagedDirectory::File & fd, VPackArrayItera
"\" source=\"" + encode_char_entities(doc.get("_from").copyString()) +
"\" target=\"" +
encode_char_entities(doc.get("_to").copyString()) + "\"";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
if (!_xgmmlLabelOnly) {
xmlTag = ">\n";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
for (auto const& it : VPackObjectIterator(doc)) {
xgmmlWriteOneAtt(fd, fileName, it.value, it.key.copyString());
xgmmlWriteOneAtt(fd, it.value, it.key.copyString());
}
xmlTag = "</edge>\n";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
} else {
xmlTag = " />\n";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
}
} else {
@ -702,27 +721,27 @@ void ExportFeature::writeGraphBatch(ManagedDirectory::File & fd, VPackArrayItera
? doc.get(_xgmmlLabelAttribute).copyString()
: "Default-Label") +
"\" id=\"" + encode_char_entities(doc.get("_id").copyString()) + "\"";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
if (!_xgmmlLabelOnly) {
xmlTag = ">\n";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
for (auto const& it : VPackObjectIterator(doc)) {
xgmmlWriteOneAtt(fd, fileName, it.value, it.key.copyString());
xgmmlWriteOneAtt(fd, it.value, it.key.copyString());
}
xmlTag = "</node>\n";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
} else {
xmlTag = " />\n";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
}
}
}
}
void ExportFeature::xgmmlWriteOneAtt(ManagedDirectory::File & fd, std::string const& fileName,
void ExportFeature::xgmmlWriteOneAtt(ManagedDirectory::File & fd,
VPackSlice const& slice,
std::string const& name, int deep) {
std::string value, type, xmlTag;
@ -761,38 +780,38 @@ void ExportFeature::xgmmlWriteOneAtt(ManagedDirectory::File & fd, std::string co
xmlTag = " <att name=\"" + encode_char_entities(name) +
"\" type=\"string\" value=\"" +
encode_char_entities(slice.toString()) + "\"/>\n";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
return;
}
if (!type.empty()) {
xmlTag = " <att name=\"" + encode_char_entities(name) + "\" type=\"" +
type + "\" value=\"" + encode_char_entities(value) + "\"/>\n";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
} else if (slice.isArray()) {
xmlTag =
" <att name=\"" + encode_char_entities(name) + "\" type=\"list\">\n";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
for (VPackSlice val : VPackArrayIterator(slice)) {
xgmmlWriteOneAtt(fd, fileName, val, name, deep + 1);
xgmmlWriteOneAtt(fd, val, name, deep + 1);
}
xmlTag = " </att>\n";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
} else if (slice.isObject()) {
xmlTag =
" <att name=\"" + encode_char_entities(name) + "\" type=\"list\">\n";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
for (auto const& it : VPackObjectIterator(slice)) {
xgmmlWriteOneAtt(fd, fileName, it.value, it.key.copyString(), deep + 1);
xgmmlWriteOneAtt(fd, it.value, it.key.copyString(), deep + 1);
}
xmlTag = " </att>\n";
writeToFile(fd, xmlTag, fileName);
writeToFile(fd, xmlTag);
}
}

View File

@ -53,14 +53,14 @@ class ExportFeature final : public application_features::ApplicationFeature,
private:
void collectionExport(httpclient::SimpleHttpClient* httpClient);
void queryExport(httpclient::SimpleHttpClient* httpClient);
void writeFirstLine(ManagedDirectory::File & fd, std::string const& fileName, std::string const& collection);
void writeBatch(ManagedDirectory::File & fd, VPackArrayIterator it, std::string const& fileName);
void writeFirstLine(ManagedDirectory::File& fd, std::string const& fileName, std::string const& collection);
void writeBatch(ManagedDirectory::File& fd, VPackArrayIterator it, std::string const& fileName);
void graphExport(httpclient::SimpleHttpClient* httpClient);
void writeGraphBatch(ManagedDirectory::File &fd, VPackArrayIterator it, std::string const& fileName);
void xgmmlWriteOneAtt(ManagedDirectory::File & fd, std::string const& fileName, VPackSlice const& slice,
void writeGraphBatch(ManagedDirectory::File& fd, VPackArrayIterator it, std::string const& fileName);
void xgmmlWriteOneAtt(ManagedDirectory::File& fd, VPackSlice const& slice,
std::string const& name, int deep = 0);
void writeToFile(ManagedDirectory::File & fd, std::string const& string, std::string const& fileName);
void writeToFile(ManagedDirectory::File& fd, std::string const& string);
std::shared_ptr<VPackBuilder> httpCall(httpclient::SimpleHttpClient* httpClient,
std::string const& url, arangodb::rest::RequestType,
std::string postBody = "");

View File

@ -28,8 +28,7 @@
const functionsDocumentation = {
'export': 'export formats tests'
};
const optionsDocumentation = [
];
const optionsDocumentation = [];
const fs = require('fs');
const pu = require('@arangodb/process-utils');
@ -100,7 +99,7 @@ function exportTest (options) {
'overwrite': true,
'output-directory': tmpPath
};
const results = {failed: 0};
let results = {failed: 0};
function shutdown () {
print(CYAN + 'Shutting down...' + RESET);
@ -333,7 +332,7 @@ function exportTest (options) {
results.exportQueryGz = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath, false, options.coreCheck);
results.exportQueryGz.failed = results.exportQuery.status ? 0 : 1;
try {
fs.readGzip(fs.join(tmpPath, 'query.jsonl')).split('\n')
fs.readGzip(fs.join(tmpPath, 'query.jsonl.gz')).split('\n')
.filter(line => line.trim() !== '')
.forEach(line => JSON.parse(line));
results.parseQueryResultGz = {
@ -341,7 +340,6 @@ function exportTest (options) {
status: true
};
} catch (e) {
print(e);
results.failed += 1;
results.parseQueryResultGz = {
failed: 1,
@ -350,6 +348,29 @@ function exportTest (options) {
};
}
args['compress-output'] = 'false';
print(CYAN + Date() + ': Export data (csv)' + RESET);
args['type'] = 'csv';
args['query'] = 'FOR doc IN UnitTestsExport RETURN doc';
args['fields'] = '_key,value1,value2,value3,value4';
results.exportCsv = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath, false, options.coreCheck);
results.exportCsv.failed = results.exportJsonl.status ? 0 : 1;
try {
fs.read(fs.join(tmpPath, 'query.csv'));
results.parseCsv = {
failed: 0,
status: true
};
} catch (e) {
results.failed += 1;
results.parseCsv = {
failed: 1,
status: false,
message: e
};
}
delete args['fields'];
return shutdown();
}

View File

@ -1109,7 +1109,7 @@ char* TRI_SlurpGzipFile(char const* filename, size_t* length) {
TRI_set_errno(TRI_ERROR_NO_ERROR);
gzFile gzFd(gzopen(filename,"rb"));
auto fdGuard = arangodb::scopeGuard([&gzFd](){ if (nullptr != gzFd) gzclose(gzFd); });
char * retPtr = nullptr;
char* retPtr = nullptr;
if (nullptr != gzFd) {
TRI_string_buffer_t result;

View File

@ -44,6 +44,7 @@
for (let i = 0; i < 100; ++i) {
col.save({ _key: "export" + i, value1: i, value2: "this is export", value3: "export" + i, value4: "%<>\"'" });
}
col.save({ _key: "special", value1: "abc \"def\" ghi", value2: [1, 2], value3: { foo: "bar" }, value4: "abc\r\ncd" });
}
return {

View File

@ -44,6 +44,7 @@
for (let i = 0; i < 100; ++i) {
col.save({ _key: "export" + i, value1: i, value2: "this is export", value3: "export" + i, value4: "%<>\"'" });
}
col.save({ _key: "special", value1: "abc \"def\" ghi", value2: [1, 2], value3: { foo: "bar" }, value4: "abc\r\ncd" });
}
return {