1
0
Fork 0

Feature/planning 580 arangoexport with query (#2882)

* added query export capability

* test for export --query

* docu

* document export with query

* Update CHANGELOG
This commit is contained in:
Manuel B 2017-07-28 08:41:53 +02:00 committed by Frank Celler
parent 6fefe5ab31
commit fe8b9b70c4
5 changed files with 153 additions and 42 deletions

View File

@ -1,6 +1,8 @@
devel
-----
* arangoexport: added --query; pass a aql query to export the result
* fixed issue #2879: No result when querying for the last record of a query
* ui: allows now to edit default access level for collections in database

View File

@ -131,3 +131,11 @@ This exports the named graph mygraph into the xgmml file *mygraph.xgmml* without
unix> arangoexport --type xgmml --graph-name mygraph --xgmml-label-attribute name
This exports the named graph mygraph into the xgmml file *mygraph.xgmml* with a label from documents attribute *name* instead of the default attribute *label*.
Export via AQL query
--------------------
unix> arangoexport --type jsonl --query "for book in books filter book.sells > 100 return book"
Export via an aql query allows you to export the returned data as the type specified with *--type*.
The example exports all books as jsonl that are sold more than 100 times.

View File

@ -79,6 +79,9 @@ void ExportFeature::collectOptions(
"--collection",
"restrict to collection name (can be specified multiple times)",
new VectorParameter<StringParameter>(&_collections));
options->addOption("--query", "AQL query to run",
new StringParameter(&_query));
options->addOption("--graph-name", "name of a graph to export",
new StringParameter(&_graphName));
@ -132,9 +135,15 @@ void ExportFeature::validateOptions(
_outputDirectory.pop_back();
}
if (_graphName.empty() && _collections.empty()) {
if (_graphName.empty() && _collections.empty() && _query.empty()) {
LOG_TOPIC(FATAL, Logger::CONFIG)
<< "expecting at least one collection or one graph name";
<< "expecting at least one collection, a graph name or an AQL query";
FATAL_ERROR_EXIT();
}
if (!_query.empty() && (!_collections.empty() || !_graphName.empty())) {
LOG_TOPIC(FATAL, Logger::CONFIG)
<< "expecting either a list of collections or an AQL query";
FATAL_ERROR_EXIT();
}
@ -146,8 +155,8 @@ void ExportFeature::validateOptions(
if ((_typeExport == "json" || _typeExport == "jsonl" ||
_typeExport == "csv") &&
_collections.empty()) {
LOG_TOPIC(FATAL, Logger::CONFIG) << "expecting at least one collection";
_collections.empty() && _query.empty()) {
LOG_TOPIC(FATAL, Logger::CONFIG) << "expecting at least one collection or an AQL query";
FATAL_ERROR_EXIT();
}
@ -267,6 +276,11 @@ void ExportFeature::start() {
exportedSize += fileSize;
}
}
} else if (!_query.empty()) {
queryExport(httpClient.get());
std::string filePath = _outputDirectory + TRI_DIR_SEPARATOR_STR + "query." + _typeExport;
exportedSize += TRI_SizeFile(filePath.c_str());
}
} else if (_typeExport == "xgmml" && _graphName.size()) {
graphExport(httpClient.get());
@ -330,43 +344,16 @@ void ExportFeature::collectionExport(SimpleHttpClient* httpClient) {
TRI_DEFER(TRI_TRACKED_CLOSE_FILE(fd));
_firstLine = true;
if (_typeExport == "json") {
std::string openingBracket = "[";
writeToFile(fd, openingBracket, fileName);
writeFirstLine(fd, fileName, collection);
} else if (_typeExport == "xml") {
std::string xmlHeader =
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
"<collection name=\"";
xmlHeader.append(encode_char_entities(collection));
xmlHeader.append("\">\n");
writeToFile(fd, xmlHeader, fileName);
} else if (_typeExport == "csv") {
std::string firstLine = "";
bool isFirstValue = true;
for (auto const& str : _csvFields) {
if (isFirstValue) {
firstLine += str;
isFirstValue = false;
} else {
firstLine += "," + str;
}
}
firstLine += "\n";
writeToFile(fd, firstLine, fileName);
}
writeCollectionBatch(fd, VPackArrayIterator(body.get("result")), fileName);
writeBatch(fd, VPackArrayIterator(body.get("result")), fileName);
while (body.hasKey("id")) {
std::string const url = "/_api/cursor/" + body.get("id").copyString();
parsedBody = httpCall(httpClient, url, rest::RequestType::PUT);
body = parsedBody->slice();
writeCollectionBatch(fd, VPackArrayIterator(body.get("result")),
fileName);
writeBatch(fd, VPackArrayIterator(body.get("result")), fileName);
}
if (_typeExport == "json") {
@ -379,8 +366,94 @@ void ExportFeature::collectionExport(SimpleHttpClient* httpClient) {
}
}
void ExportFeature::writeCollectionBatch(int fd, VPackArrayIterator it,
std::string const& fileName) {
void ExportFeature::queryExport(SimpleHttpClient* httpClient) {
std::string errorMsg;
if (_progress) {
std::cout << "# Running AQL query '" << _query << "'..." << std::endl;
}
std::string fileName = _outputDirectory + TRI_DIR_SEPARATOR_STR + "query." + _typeExport;
// remove an existing file first
if (TRI_ExistsFile(fileName.c_str())) {
TRI_UnlinkFile(fileName.c_str());
}
std::string const url = "_api/cursor";
VPackBuilder post;
post.openObject();
post.add("query", VPackValue(_query));
post.close();
std::shared_ptr<VPackBuilder> parsedBody =
httpCall(httpClient, url, rest::RequestType::POST, post.toJson());
VPackSlice body = parsedBody->slice();
int fd =
TRI_TRACKED_CREATE_FILE(fileName.c_str(), O_CREAT | O_EXCL | O_RDWR | TRI_O_CLOEXEC,
S_IRUSR | S_IWUSR);
if (fd < 0) {
errorMsg = "cannot write to file '" + fileName + "'";
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_CANNOT_WRITE_FILE, errorMsg);
}
TRI_DEFER(TRI_TRACKED_CLOSE_FILE(fd));
writeFirstLine(fd, fileName, "");
writeBatch(fd, VPackArrayIterator(body.get("result")), fileName);
while (body.hasKey("id")) {
std::string const url = "/_api/cursor/" + body.get("id").copyString();
parsedBody = httpCall(httpClient, url, rest::RequestType::PUT);
body = parsedBody->slice();
writeBatch(fd, VPackArrayIterator(body.get("result")), fileName);
}
if (_typeExport == "json") {
std::string closingBracket = "\n]";
writeToFile(fd, closingBracket, fileName);
} else if (_typeExport == "xml") {
std::string xmlFooter = "</collection>";
writeToFile(fd, xmlFooter, fileName);
}
}
void ExportFeature::writeFirstLine(int fd, std::string const& fileName, std::string const& collection) {
_firstLine = true;
if (_typeExport == "json") {
std::string openingBracket = "[";
writeToFile(fd, openingBracket, fileName);
} else if (_typeExport == "xml") {
std::string xmlHeader =
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
"<collection name=\"";
xmlHeader.append(encode_char_entities(collection));
xmlHeader.append("\">\n");
writeToFile(fd, xmlHeader, fileName);
} else if (_typeExport == "csv") {
std::string firstLine = "";
bool isFirstValue = true;
for (auto const& str : _csvFields) {
if (isFirstValue) {
firstLine += str;
isFirstValue = false;
} else {
firstLine += "," + str;
}
}
firstLine += "\n";
writeToFile(fd, firstLine, fileName);
}
}
void ExportFeature::writeBatch(int fd, VPackArrayIterator it, std::string const& fileName) {
std::string line;
line.reserve(1024);

View File

@ -51,7 +51,9 @@ class ExportFeature final : public application_features::ApplicationFeature,
private:
void collectionExport(httpclient::SimpleHttpClient* httpClient);
void writeCollectionBatch(int fd, VPackArrayIterator it, std::string const& fileName);
void queryExport(httpclient::SimpleHttpClient* httpClient);
void writeFirstLine(int fd, std::string const& fileName, std::string const& collection);
void writeBatch(int fd, VPackArrayIterator it, std::string const& fileName);
void graphExport(httpclient::SimpleHttpClient* httpClient);
void writeGraphBatch(int fd, VPackArrayIterator it, std::string const& fileName);
void xgmmlWriteOneAtt(int fd, std::string const& fileName, VPackSlice const& slice, std::string const& name, int deep = 0);
@ -61,6 +63,7 @@ class ExportFeature final : public application_features::ApplicationFeature,
private:
std::vector<std::string> _collections;
std::string _query;
std::string _graphName;
std::string _xgmmlLabelAttribute;
std::string _typeExport;

View File

@ -118,6 +118,7 @@ function exportTest (options) {
results.exportJson.failed = results.exportJson.status ? 0 : 1;
try {
JSON.parse(fs.read(fs.join(tmpPath, 'UnitTestsExport.json')));
results.parseJson = {
failed: 0,
status: true
@ -136,11 +137,10 @@ function exportTest (options) {
results.exportJsonl = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath);
results.exportJsonl.failed = results.exportJsonl.status ? 0 : 1;
try {
const filesContent = fs.read(fs.join(tmpPath, 'UnitTestsExport.jsonl')).split('\n');
for (const line of filesContent) {
if (line.trim() === '') continue;
JSON.parse(line);
}
fs.read(fs.join(tmpPath, 'UnitTestsExport.jsonl')).split('\n')
.filter(line => line.trim() !== '')
.forEach(line => JSON.parse(line));
results.parseJsonl = {
failed: 0,
status: true
@ -183,6 +183,31 @@ function exportTest (options) {
};
}
print(CYAN + Date() + ': Export query (xgmml)' + RESET);
args['type'] = 'jsonl';
args['query'] = 'FOR doc IN UnitTestsExport RETURN doc';
delete args['graph-name'];
delete args['collection'];
results.exportQuery = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath);
results.exportQuery.failed = results.exportQuery.status ? 0 : 1;
try {
fs.read(fs.join(tmpPath, 'query.jsonl')).split('\n')
.filter(line => line.trim() !== '')
.forEach(line => JSON.parse(line));
results.parseQueryResult = {
failed: 0,
status: true
};
} catch (e) {
print(e);
results.failed += 1;
results.parseQueryResult = {
failed: 1,
status: false,
message: e
};
}
return shutdown();
}