mirror of https://gitee.com/bigwinds/arangodb
Bug fix 3.5/issue 10193 (#10194)
* fixed issue #10193: Arangoexport does not handle line feeds when exporting as csvi * escape \r too
This commit is contained in:
parent
345a06ade4
commit
bf187f4b50
|
@ -1,6 +1,9 @@
|
|||
v3.5.2 (XXXX-XX-XX)
|
||||
-------------------
|
||||
|
||||
* Fixed issue #10193: Arangoexport does not handle line feeds when exporting as
|
||||
csv.
|
||||
|
||||
* Removed debug log messages "found comm task ..." that could be logged
|
||||
on server shutdown.
|
||||
|
||||
|
|
|
@ -34,8 +34,12 @@
|
|||
#include "SimpleHttpClient/SimpleHttpClient.h"
|
||||
#include "SimpleHttpClient/SimpleHttpResult.h"
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/property_tree/detail/xml_parser_utils.hpp>
|
||||
#include <velocypack/Builder.h>
|
||||
#include <velocypack/Dumper.h>
|
||||
#include <velocypack/Slice.h>
|
||||
#include <velocypack/Sink.h>
|
||||
#include <velocypack/velocypack-aliases.h>
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
#include <sys/types.h>
|
||||
|
@ -63,8 +67,6 @@ ExportFeature::ExportFeature(application_features::ApplicationServer& server, in
|
|||
_graphName(),
|
||||
_xgmmlLabelAttribute("label"),
|
||||
_typeExport("json"),
|
||||
_csvFieldOptions(),
|
||||
_csvFields(),
|
||||
_xgmmlLabelOnly(false),
|
||||
_outputDirectory(),
|
||||
_overwrite(false),
|
||||
|
@ -111,7 +113,7 @@ void ExportFeature::collectOptions(std::shared_ptr<options::ProgramOptions> opti
|
|||
options->addOption("--progress", "show progress", new BooleanParameter(&_progress));
|
||||
|
||||
options->addOption("--fields",
|
||||
"comma separated list of fileds to export into a csv file",
|
||||
"comma separated list of fields to export into a csv file",
|
||||
new StringParameter(&_csvFieldOptions));
|
||||
|
||||
std::unordered_set<std::string> exports = {"csv", "json", "jsonl", "xgmml",
|
||||
|
@ -178,7 +180,7 @@ void ExportFeature::validateOptions(std::shared_ptr<options::ProgramOptions> opt
|
|||
FATAL_ERROR_EXIT();
|
||||
}
|
||||
|
||||
boost::split(_csvFields, _csvFieldOptions, boost::is_any_of(","));
|
||||
_csvFields = StringUtils::split(_csvFieldOptions, ',');
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -341,10 +343,10 @@ void ExportFeature::collectionExport(SimpleHttpClient* httpClient) {
|
|||
|
||||
if (_typeExport == "json") {
|
||||
std::string closingBracket = "\n]";
|
||||
writeToFile(*fd, closingBracket, fileName);
|
||||
writeToFile(*fd, closingBracket);
|
||||
} else if (_typeExport == "xml") {
|
||||
std::string xmlFooter = "</collection>";
|
||||
writeToFile(*fd, xmlFooter, fileName);
|
||||
writeToFile(*fd, xmlFooter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -394,10 +396,10 @@ void ExportFeature::queryExport(SimpleHttpClient* httpClient) {
|
|||
|
||||
if (_typeExport == "json") {
|
||||
std::string closingBracket = "\n]";
|
||||
writeToFile(*fd, closingBracket, fileName);
|
||||
writeToFile(*fd, closingBracket);
|
||||
} else if (_typeExport == "xml") {
|
||||
std::string xmlFooter = "</collection>";
|
||||
writeToFile(*fd, xmlFooter, fileName);
|
||||
writeToFile(*fd, xmlFooter);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -406,7 +408,7 @@ void ExportFeature::writeFirstLine(ManagedDirectory::File & fd, std::string cons
|
|||
_firstLine = true;
|
||||
if (_typeExport == "json") {
|
||||
std::string openingBracket = "[";
|
||||
writeToFile(fd, openingBracket, fileName);
|
||||
writeToFile(fd, openingBracket);
|
||||
|
||||
} else if (_typeExport == "xml") {
|
||||
std::string xmlHeader =
|
||||
|
@ -414,10 +416,10 @@ void ExportFeature::writeFirstLine(ManagedDirectory::File & fd, std::string cons
|
|||
"<collection name=\"";
|
||||
xmlHeader.append(encode_char_entities(collection));
|
||||
xmlHeader.append("\">\n");
|
||||
writeToFile(fd, xmlHeader, fileName);
|
||||
writeToFile(fd, xmlHeader);
|
||||
|
||||
} else if (_typeExport == "csv") {
|
||||
std::string firstLine = "";
|
||||
std::string firstLine;
|
||||
bool isFirstValue = true;
|
||||
for (auto const& str : _csvFields) {
|
||||
if (isFirstValue) {
|
||||
|
@ -428,22 +430,28 @@ void ExportFeature::writeFirstLine(ManagedDirectory::File & fd, std::string cons
|
|||
}
|
||||
}
|
||||
firstLine += "\n";
|
||||
writeToFile(fd, firstLine, fileName);
|
||||
writeToFile(fd, firstLine);
|
||||
}
|
||||
}
|
||||
|
||||
void ExportFeature::writeBatch(ManagedDirectory::File & fd, VPackArrayIterator it, std::string const& fileName) {
|
||||
void ExportFeature::writeBatch(ManagedDirectory::File & fd, VPackArrayIterator it, std::string const& fileName) {
|
||||
std::string line;
|
||||
line.reserve(1024);
|
||||
|
||||
if (_typeExport == "jsonl") {
|
||||
VPackStringSink sink(&line);
|
||||
VPackDumper dumper(&sink);
|
||||
|
||||
for (auto const& doc : it) {
|
||||
line.clear();
|
||||
line += doc.toJson();
|
||||
dumper.dump(doc);
|
||||
line.push_back('\n');
|
||||
writeToFile(fd, line, fileName);
|
||||
writeToFile(fd, line);
|
||||
}
|
||||
} else if (_typeExport == "json") {
|
||||
VPackStringSink sink(&line);
|
||||
VPackDumper dumper(&sink);
|
||||
|
||||
for (auto const& doc : it) {
|
||||
line.clear();
|
||||
if (!_firstLine) {
|
||||
|
@ -452,8 +460,8 @@ void ExportFeature::writeFirstLine(ManagedDirectory::File & fd, std::string cons
|
|||
line.append("\n ", 3);
|
||||
_firstLine = false;
|
||||
}
|
||||
line += doc.toJson();
|
||||
writeToFile(fd, line, fileName);
|
||||
dumper.dump(doc);
|
||||
writeToFile(fd, line);
|
||||
}
|
||||
} else if (_typeExport == "csv") {
|
||||
for (auto const& doc : it) {
|
||||
|
@ -461,39 +469,50 @@ void ExportFeature::writeFirstLine(ManagedDirectory::File & fd, std::string cons
|
|||
bool isFirstValue = true;
|
||||
|
||||
for (auto const& key : _csvFields) {
|
||||
std::string value = "";
|
||||
|
||||
if (isFirstValue) {
|
||||
isFirstValue = false;
|
||||
} else {
|
||||
line.append(",");
|
||||
line.push_back(',');
|
||||
}
|
||||
|
||||
if (doc.hasKey(key)) {
|
||||
VPackSlice val = doc.get(key);
|
||||
|
||||
VPackSlice val = doc.get(key);
|
||||
if (!val.isNone()) {
|
||||
std::string value;
|
||||
bool escape = false;
|
||||
if (val.isArray() || val.isObject()) {
|
||||
value = val.toJson();
|
||||
escape = true;
|
||||
} else {
|
||||
if (val.isString()) {
|
||||
value = val.copyString();
|
||||
escape = true;
|
||||
} else {
|
||||
value = val.toString();
|
||||
}
|
||||
}
|
||||
|
||||
value = std::regex_replace(value, std::regex("\""), "\"\"");
|
||||
if (escape) {
|
||||
value = std::regex_replace(value, std::regex("\""), "\"\"");
|
||||
|
||||
if (value.find(",") != std::string::npos ||
|
||||
value.find("\"\"") != std::string::npos) {
|
||||
value = "\"" + value;
|
||||
value.append("\"");
|
||||
if (value.find(',') != std::string::npos ||
|
||||
value.find('\"') != std::string::npos ||
|
||||
value.find('\r') != std::string::npos ||
|
||||
value.find('\n') != std::string::npos) {
|
||||
// escape value and put it in quotes
|
||||
line.push_back('\"');
|
||||
line.append(value);
|
||||
line.push_back('\"');
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// write unescaped
|
||||
line.append(value);
|
||||
}
|
||||
line.append(value);
|
||||
}
|
||||
line.append("\n");
|
||||
writeToFile(fd, line, fileName);
|
||||
line.push_back('\n');
|
||||
writeToFile(fd, line);
|
||||
}
|
||||
} else if (_typeExport == "xml") {
|
||||
for (auto const& doc : it) {
|
||||
|
@ -501,18 +520,18 @@ void ExportFeature::writeFirstLine(ManagedDirectory::File & fd, std::string cons
|
|||
line.append("<doc key=\"");
|
||||
line.append(encode_char_entities(doc.get("_key").copyString()));
|
||||
line.append("\">\n");
|
||||
writeToFile(fd, line, fileName);
|
||||
writeToFile(fd, line);
|
||||
for (auto const& att : VPackObjectIterator(doc)) {
|
||||
xgmmlWriteOneAtt(fd, fileName, att.value, att.key.copyString(), 2);
|
||||
xgmmlWriteOneAtt(fd, att.value, att.key.copyString(), 2);
|
||||
}
|
||||
line.clear();
|
||||
line.append("</doc>\n");
|
||||
writeToFile(fd, line, fileName);
|
||||
writeToFile(fd, line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ExportFeature::writeToFile(ManagedDirectory::File & fd, std::string const& line, std::string const& fileName) {
|
||||
void ExportFeature::writeToFile(ManagedDirectory::File & fd, std::string const& line) {
|
||||
fd.write(line.c_str(), line.size());
|
||||
}
|
||||
|
||||
|
@ -613,14 +632,14 @@ void ExportFeature::graphExport(SimpleHttpClient* httpClient) {
|
|||
std::string xmlHeader =
|
||||
R"(<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<graph label=")";
|
||||
writeToFile(*fd, xmlHeader, fileName);
|
||||
writeToFile(*fd, _graphName, fileName);
|
||||
writeToFile(*fd, xmlHeader);
|
||||
writeToFile(*fd, _graphName);
|
||||
|
||||
xmlHeader = R"("
|
||||
xmlns="http://www.cs.rpi.edu/XGMML"
|
||||
directed="1">
|
||||
)";
|
||||
writeToFile(*fd, xmlHeader, fileName);
|
||||
writeToFile(*fd, xmlHeader);
|
||||
|
||||
for (auto const& collection : _collections) {
|
||||
if (_progress) {
|
||||
|
@ -656,7 +675,7 @@ directed="1">
|
|||
}
|
||||
}
|
||||
std::string closingGraphTag = "</graph>\n";
|
||||
writeToFile(*fd, closingGraphTag, fileName);
|
||||
writeToFile(*fd, closingGraphTag);
|
||||
|
||||
if (_skippedDeepNested) {
|
||||
std::cout << "skipped " << _skippedDeepNested
|
||||
|
@ -677,21 +696,21 @@ void ExportFeature::writeGraphBatch(ManagedDirectory::File & fd, VPackArrayItera
|
|||
"\" source=\"" + encode_char_entities(doc.get("_from").copyString()) +
|
||||
"\" target=\"" +
|
||||
encode_char_entities(doc.get("_to").copyString()) + "\"";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
if (!_xgmmlLabelOnly) {
|
||||
xmlTag = ">\n";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
|
||||
for (auto const& it : VPackObjectIterator(doc)) {
|
||||
xgmmlWriteOneAtt(fd, fileName, it.value, it.key.copyString());
|
||||
xgmmlWriteOneAtt(fd, it.value, it.key.copyString());
|
||||
}
|
||||
|
||||
xmlTag = "</edge>\n";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
|
||||
} else {
|
||||
xmlTag = " />\n";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@ -702,27 +721,27 @@ void ExportFeature::writeGraphBatch(ManagedDirectory::File & fd, VPackArrayItera
|
|||
? doc.get(_xgmmlLabelAttribute).copyString()
|
||||
: "Default-Label") +
|
||||
"\" id=\"" + encode_char_entities(doc.get("_id").copyString()) + "\"";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
if (!_xgmmlLabelOnly) {
|
||||
xmlTag = ">\n";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
|
||||
for (auto const& it : VPackObjectIterator(doc)) {
|
||||
xgmmlWriteOneAtt(fd, fileName, it.value, it.key.copyString());
|
||||
xgmmlWriteOneAtt(fd, it.value, it.key.copyString());
|
||||
}
|
||||
|
||||
xmlTag = "</node>\n";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
|
||||
} else {
|
||||
xmlTag = " />\n";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ExportFeature::xgmmlWriteOneAtt(ManagedDirectory::File & fd, std::string const& fileName,
|
||||
void ExportFeature::xgmmlWriteOneAtt(ManagedDirectory::File & fd,
|
||||
VPackSlice const& slice,
|
||||
std::string const& name, int deep) {
|
||||
std::string value, type, xmlTag;
|
||||
|
@ -761,38 +780,38 @@ void ExportFeature::xgmmlWriteOneAtt(ManagedDirectory::File & fd, std::string co
|
|||
xmlTag = " <att name=\"" + encode_char_entities(name) +
|
||||
"\" type=\"string\" value=\"" +
|
||||
encode_char_entities(slice.toString()) + "\"/>\n";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!type.empty()) {
|
||||
xmlTag = " <att name=\"" + encode_char_entities(name) + "\" type=\"" +
|
||||
type + "\" value=\"" + encode_char_entities(value) + "\"/>\n";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
|
||||
} else if (slice.isArray()) {
|
||||
xmlTag =
|
||||
" <att name=\"" + encode_char_entities(name) + "\" type=\"list\">\n";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
|
||||
for (VPackSlice val : VPackArrayIterator(slice)) {
|
||||
xgmmlWriteOneAtt(fd, fileName, val, name, deep + 1);
|
||||
xgmmlWriteOneAtt(fd, val, name, deep + 1);
|
||||
}
|
||||
|
||||
xmlTag = " </att>\n";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
|
||||
} else if (slice.isObject()) {
|
||||
xmlTag =
|
||||
" <att name=\"" + encode_char_entities(name) + "\" type=\"list\">\n";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
|
||||
for (auto const& it : VPackObjectIterator(slice)) {
|
||||
xgmmlWriteOneAtt(fd, fileName, it.value, it.key.copyString(), deep + 1);
|
||||
xgmmlWriteOneAtt(fd, it.value, it.key.copyString(), deep + 1);
|
||||
}
|
||||
|
||||
xmlTag = " </att>\n";
|
||||
writeToFile(fd, xmlTag, fileName);
|
||||
writeToFile(fd, xmlTag);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -53,14 +53,14 @@ class ExportFeature final : public application_features::ApplicationFeature,
|
|||
private:
|
||||
void collectionExport(httpclient::SimpleHttpClient* httpClient);
|
||||
void queryExport(httpclient::SimpleHttpClient* httpClient);
|
||||
void writeFirstLine(ManagedDirectory::File & fd, std::string const& fileName, std::string const& collection);
|
||||
void writeBatch(ManagedDirectory::File & fd, VPackArrayIterator it, std::string const& fileName);
|
||||
void writeFirstLine(ManagedDirectory::File& fd, std::string const& fileName, std::string const& collection);
|
||||
void writeBatch(ManagedDirectory::File& fd, VPackArrayIterator it, std::string const& fileName);
|
||||
void graphExport(httpclient::SimpleHttpClient* httpClient);
|
||||
void writeGraphBatch(ManagedDirectory::File &fd, VPackArrayIterator it, std::string const& fileName);
|
||||
void xgmmlWriteOneAtt(ManagedDirectory::File & fd, std::string const& fileName, VPackSlice const& slice,
|
||||
void writeGraphBatch(ManagedDirectory::File& fd, VPackArrayIterator it, std::string const& fileName);
|
||||
void xgmmlWriteOneAtt(ManagedDirectory::File& fd, VPackSlice const& slice,
|
||||
std::string const& name, int deep = 0);
|
||||
|
||||
void writeToFile(ManagedDirectory::File & fd, std::string const& string, std::string const& fileName);
|
||||
void writeToFile(ManagedDirectory::File& fd, std::string const& string);
|
||||
std::shared_ptr<VPackBuilder> httpCall(httpclient::SimpleHttpClient* httpClient,
|
||||
std::string const& url, arangodb::rest::RequestType,
|
||||
std::string postBody = "");
|
||||
|
|
|
@ -28,8 +28,7 @@
|
|||
const functionsDocumentation = {
|
||||
'export': 'export formats tests'
|
||||
};
|
||||
const optionsDocumentation = [
|
||||
];
|
||||
const optionsDocumentation = [];
|
||||
|
||||
const fs = require('fs');
|
||||
const pu = require('@arangodb/process-utils');
|
||||
|
@ -100,7 +99,7 @@ function exportTest (options) {
|
|||
'overwrite': true,
|
||||
'output-directory': tmpPath
|
||||
};
|
||||
const results = {failed: 0};
|
||||
let results = {failed: 0};
|
||||
|
||||
function shutdown () {
|
||||
print(CYAN + 'Shutting down...' + RESET);
|
||||
|
@ -333,7 +332,7 @@ function exportTest (options) {
|
|||
results.exportQueryGz = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath, false, options.coreCheck);
|
||||
results.exportQueryGz.failed = results.exportQuery.status ? 0 : 1;
|
||||
try {
|
||||
fs.readGzip(fs.join(tmpPath, 'query.jsonl')).split('\n')
|
||||
fs.readGzip(fs.join(tmpPath, 'query.jsonl.gz')).split('\n')
|
||||
.filter(line => line.trim() !== '')
|
||||
.forEach(line => JSON.parse(line));
|
||||
results.parseQueryResultGz = {
|
||||
|
@ -341,7 +340,6 @@ function exportTest (options) {
|
|||
status: true
|
||||
};
|
||||
} catch (e) {
|
||||
print(e);
|
||||
results.failed += 1;
|
||||
results.parseQueryResultGz = {
|
||||
failed: 1,
|
||||
|
@ -350,6 +348,29 @@ function exportTest (options) {
|
|||
};
|
||||
}
|
||||
args['compress-output'] = 'false';
|
||||
|
||||
print(CYAN + Date() + ': Export data (csv)' + RESET);
|
||||
args['type'] = 'csv';
|
||||
args['query'] = 'FOR doc IN UnitTestsExport RETURN doc';
|
||||
args['fields'] = '_key,value1,value2,value3,value4';
|
||||
results.exportCsv = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath, false, options.coreCheck);
|
||||
results.exportCsv.failed = results.exportJsonl.status ? 0 : 1;
|
||||
try {
|
||||
fs.read(fs.join(tmpPath, 'query.csv'));
|
||||
|
||||
results.parseCsv = {
|
||||
failed: 0,
|
||||
status: true
|
||||
};
|
||||
} catch (e) {
|
||||
results.failed += 1;
|
||||
results.parseCsv = {
|
||||
failed: 1,
|
||||
status: false,
|
||||
message: e
|
||||
};
|
||||
}
|
||||
delete args['fields'];
|
||||
|
||||
return shutdown();
|
||||
}
|
||||
|
|
|
@ -1109,7 +1109,7 @@ char* TRI_SlurpGzipFile(char const* filename, size_t* length) {
|
|||
TRI_set_errno(TRI_ERROR_NO_ERROR);
|
||||
gzFile gzFd(gzopen(filename,"rb"));
|
||||
auto fdGuard = arangodb::scopeGuard([&gzFd](){ if (nullptr != gzFd) gzclose(gzFd); });
|
||||
char * retPtr = nullptr;
|
||||
char* retPtr = nullptr;
|
||||
|
||||
if (nullptr != gzFd) {
|
||||
TRI_string_buffer_t result;
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
for (let i = 0; i < 100; ++i) {
|
||||
col.save({ _key: "export" + i, value1: i, value2: "this is export", value3: "export" + i, value4: "%<>\"'" });
|
||||
}
|
||||
col.save({ _key: "special", value1: "abc \"def\" ghi", value2: [1, 2], value3: { foo: "bar" }, value4: "abc\r\ncd" });
|
||||
}
|
||||
|
||||
return {
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
for (let i = 0; i < 100; ++i) {
|
||||
col.save({ _key: "export" + i, value1: i, value2: "this is export", value3: "export" + i, value4: "%<>\"'" });
|
||||
}
|
||||
col.save({ _key: "special", value1: "abc \"def\" ghi", value2: [1, 2], value3: { foo: "bar" }, value4: "abc\r\ncd" });
|
||||
}
|
||||
|
||||
return {
|
||||
|
|
Loading…
Reference in New Issue