1
0
Fork 0

arangoexport: csv

This commit is contained in:
baslr 2017-02-27 11:52:18 +01:00
parent f9b9dbde8d
commit d3010b1934
2 changed files with 78 additions and 4 deletions

View File

@ -32,7 +32,9 @@
#include "SimpleHttpClient/SimpleHttpClient.h"
#include "SimpleHttpClient/SimpleHttpResult.h"
#include <regex>
#include <boost/property_tree/detail/xml_parser_utils.hpp>
#include <boost/algorithm/string.hpp>
using namespace arangodb;
using namespace arangodb::basics;
@ -47,6 +49,8 @@ ExportFeature::ExportFeature(application_features::ApplicationServer* server,
_graphName(),
_xgmmlLabelAttribute("label"),
_typeExport("json"),
_csvFieldOptions(),
_csvFields(),
_xgmmlLabelOnly(false),
_outputDirectory(),
_overwrite(false),
@ -92,7 +96,10 @@ void ExportFeature::collectOptions(
options->addOption("--progress", "show progress",
new BooleanParameter(&_progress));
std::unordered_set<std::string> exports = {"json", "jsonl", "xgmml", "xml"};
options->addOption("--fields", "comma separated list of fileds to export into a csv file",
new StringParameter(&_csvFieldOptions));
std::unordered_set<std::string> exports = {"csv", "json", "jsonl", "xgmml", "xml"};
options->addOption(
"--type", "type of export", new DiscreteValuesParameter<StringParameter>(&_typeExport, exports));
}
@ -123,10 +130,25 @@ void ExportFeature::validateOptions(
FATAL_ERROR_EXIT();
}
if (_typeExport == "xgmml" && _graphName.empty() ) {
if (_typeExport == "xgmml" && _graphName.empty()) {
LOG_TOPIC(FATAL, Logger::CONFIG) << "expecting a graph name to dump a graph";
FATAL_ERROR_EXIT();
}
if ( (_typeExport == "json" || _typeExport == "jsonl" || _typeExport == "csv") &&
_collections.empty()) {
LOG_TOPIC(FATAL, Logger::CONFIG) << "expecting at least one collection";
FATAL_ERROR_EXIT();
}
if (_typeExport == "csv") {
if (_csvFieldOptions.empty()) {
LOG_TOPIC(FATAL, Logger::CONFIG) << "expecting at least one field definition";
FATAL_ERROR_EXIT();
}
boost::split(_csvFields, _csvFieldOptions, boost::is_any_of(","));
}
}
void ExportFeature::prepare() {
@ -209,7 +231,7 @@ void ExportFeature::start() {
uint64_t exportedSize = 0;
if (_typeExport == "json" || _typeExport == "jsonl" || _typeExport == "xml") {
if (_typeExport == "json" || _typeExport == "jsonl" || _typeExport == "xml" || _typeExport == "csv") {
if (_collections.size()) {
collectionExport(httpClient.get());
@ -282,12 +304,27 @@ void ExportFeature::collectionExport(SimpleHttpClient* httpClient) {
if (_typeExport == "json") {
std::string openingBracket = "[";
writeToFile(fd, openingBracket, fileName);
} else if (_typeExport == "xml") {
std::string xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
"<collection name=\"";
xmlHeader.append(encode_char_entities(collection));
xmlHeader.append("\">\n");
writeToFile(fd, xmlHeader, fileName);
} else if (_typeExport == "csv") {
std::string firstLine = "";
bool isFirstValue = true;
for(auto const& str : _csvFields) {
if (isFirstValue) {
firstLine += str;
isFirstValue = false;
} else {
firstLine += "," + str;
}
}
firstLine += "\n";
writeToFile(fd, firstLine, fileName);
}
writeCollectionBatch(fd, VPackArrayIterator(body.get("result")), fileName);
@ -333,6 +370,41 @@ void ExportFeature::writeCollectionBatch(int fd, VPackArrayIterator it, std::str
line += doc.toJson();
writeToFile(fd, line, fileName);
}
} else if (_typeExport == "csv") {
for (auto const& doc : it) {
line.clear();
bool isFirstValue = true;
for(auto const& key : _csvFields) {
std::string value = "";
if (isFirstValue) {
isFirstValue = false;
} else {
line.append(",");
}
if (doc.hasKey(key)) {
VPackSlice val = doc.get(key);
if (val.isArray() || val.isObject()) {
value = val.toJson();
} else {
if (val.isString()) {
value = val.copyString();
} else {
value = val.toString();
}
}
value = std::regex_replace(value, std::regex("\""), "\"\"");
value = std::regex_replace(value, std::regex(","), "\",");
}
line.append(value);
}
line.append("\n");
writeToFile(fd, line, fileName);
}
} else if (_typeExport == "xml") {
for (auto const& doc : it) {
line.clear();
@ -378,7 +450,7 @@ std::shared_ptr<VPackBuilder> ExportFeature::httpCall(SimpleHttpClient* httpClie
if (_currentGraph.size()) {
LOG_TOPIC(FATAL, Logger::CONFIG) << "Graph '" << _currentGraph << "' not found.";
} else if (_currentCollection.size()) {
LOG_TOPIC(FATAL, Logger::CONFIG) << "Collection " << _currentCollection << "not found.";
LOG_TOPIC(FATAL, Logger::CONFIG) << "Collection " << _currentCollection << " not found.";
}
FATAL_ERROR_EXIT();

View File

@ -64,6 +64,8 @@ class ExportFeature final : public application_features::ApplicationFeature,
std::string _graphName;
std::string _xgmmlLabelAttribute;
std::string _typeExport;
std::string _csvFieldOptions;
std::vector<std::string> _csvFields;
bool _xgmmlLabelOnly;
std::string _outputDirectory;