mirror of https://gitee.com/bigwinds/arangodb
Feature/planning 580 arangoexport with query (#2882)
* added query export capability * test for export --query * docu * document export with query * Update CHANGELOG
This commit is contained in:
parent
6fefe5ab31
commit
fe8b9b70c4
|
@ -1,6 +1,8 @@
|
|||
devel
|
||||
-----
|
||||
|
||||
* arangoexport: added --query; pass a aql query to export the result
|
||||
|
||||
* fixed issue #2879: No result when querying for the last record of a query
|
||||
|
||||
* ui: allows now to edit default access level for collections in database
|
||||
|
|
|
@ -131,3 +131,11 @@ This exports the named graph mygraph into the xgmml file *mygraph.xgmml* without
|
|||
unix> arangoexport --type xgmml --graph-name mygraph --xgmml-label-attribute name
|
||||
|
||||
This exports the named graph mygraph into the xgmml file *mygraph.xgmml* with a label from documents attribute *name* instead of the default attribute *label*.
|
||||
|
||||
Export via AQL query
|
||||
--------------------
|
||||
|
||||
unix> arangoexport --type jsonl --query "for book in books filter book.sells > 100 return book"
|
||||
|
||||
Export via an aql query allows you to export the returned data as the type specified with *--type*.
|
||||
The example exports all books as jsonl that are sold more than 100 times.
|
|
@ -79,6 +79,9 @@ void ExportFeature::collectOptions(
|
|||
"--collection",
|
||||
"restrict to collection name (can be specified multiple times)",
|
||||
new VectorParameter<StringParameter>(&_collections));
|
||||
|
||||
options->addOption("--query", "AQL query to run",
|
||||
new StringParameter(&_query));
|
||||
|
||||
options->addOption("--graph-name", "name of a graph to export",
|
||||
new StringParameter(&_graphName));
|
||||
|
@ -132,9 +135,15 @@ void ExportFeature::validateOptions(
|
|||
_outputDirectory.pop_back();
|
||||
}
|
||||
|
||||
if (_graphName.empty() && _collections.empty()) {
|
||||
if (_graphName.empty() && _collections.empty() && _query.empty()) {
|
||||
LOG_TOPIC(FATAL, Logger::CONFIG)
|
||||
<< "expecting at least one collection or one graph name";
|
||||
<< "expecting at least one collection, a graph name or an AQL query";
|
||||
FATAL_ERROR_EXIT();
|
||||
}
|
||||
|
||||
if (!_query.empty() && (!_collections.empty() || !_graphName.empty())) {
|
||||
LOG_TOPIC(FATAL, Logger::CONFIG)
|
||||
<< "expecting either a list of collections or an AQL query";
|
||||
FATAL_ERROR_EXIT();
|
||||
}
|
||||
|
||||
|
@ -146,8 +155,8 @@ void ExportFeature::validateOptions(
|
|||
|
||||
if ((_typeExport == "json" || _typeExport == "jsonl" ||
|
||||
_typeExport == "csv") &&
|
||||
_collections.empty()) {
|
||||
LOG_TOPIC(FATAL, Logger::CONFIG) << "expecting at least one collection";
|
||||
_collections.empty() && _query.empty()) {
|
||||
LOG_TOPIC(FATAL, Logger::CONFIG) << "expecting at least one collection or an AQL query";
|
||||
FATAL_ERROR_EXIT();
|
||||
}
|
||||
|
||||
|
@ -267,6 +276,11 @@ void ExportFeature::start() {
|
|||
exportedSize += fileSize;
|
||||
}
|
||||
}
|
||||
} else if (!_query.empty()) {
|
||||
queryExport(httpClient.get());
|
||||
|
||||
std::string filePath = _outputDirectory + TRI_DIR_SEPARATOR_STR + "query." + _typeExport;
|
||||
exportedSize += TRI_SizeFile(filePath.c_str());
|
||||
}
|
||||
} else if (_typeExport == "xgmml" && _graphName.size()) {
|
||||
graphExport(httpClient.get());
|
||||
|
@ -330,43 +344,16 @@ void ExportFeature::collectionExport(SimpleHttpClient* httpClient) {
|
|||
|
||||
TRI_DEFER(TRI_TRACKED_CLOSE_FILE(fd));
|
||||
|
||||
_firstLine = true;
|
||||
if (_typeExport == "json") {
|
||||
std::string openingBracket = "[";
|
||||
writeToFile(fd, openingBracket, fileName);
|
||||
writeFirstLine(fd, fileName, collection);
|
||||
|
||||
} else if (_typeExport == "xml") {
|
||||
std::string xmlHeader =
|
||||
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
|
||||
"<collection name=\"";
|
||||
xmlHeader.append(encode_char_entities(collection));
|
||||
xmlHeader.append("\">\n");
|
||||
writeToFile(fd, xmlHeader, fileName);
|
||||
|
||||
} else if (_typeExport == "csv") {
|
||||
std::string firstLine = "";
|
||||
bool isFirstValue = true;
|
||||
for (auto const& str : _csvFields) {
|
||||
if (isFirstValue) {
|
||||
firstLine += str;
|
||||
isFirstValue = false;
|
||||
} else {
|
||||
firstLine += "," + str;
|
||||
}
|
||||
}
|
||||
firstLine += "\n";
|
||||
writeToFile(fd, firstLine, fileName);
|
||||
}
|
||||
|
||||
writeCollectionBatch(fd, VPackArrayIterator(body.get("result")), fileName);
|
||||
writeBatch(fd, VPackArrayIterator(body.get("result")), fileName);
|
||||
|
||||
while (body.hasKey("id")) {
|
||||
std::string const url = "/_api/cursor/" + body.get("id").copyString();
|
||||
parsedBody = httpCall(httpClient, url, rest::RequestType::PUT);
|
||||
body = parsedBody->slice();
|
||||
|
||||
writeCollectionBatch(fd, VPackArrayIterator(body.get("result")),
|
||||
fileName);
|
||||
writeBatch(fd, VPackArrayIterator(body.get("result")), fileName);
|
||||
}
|
||||
|
||||
if (_typeExport == "json") {
|
||||
|
@ -379,8 +366,94 @@ void ExportFeature::collectionExport(SimpleHttpClient* httpClient) {
|
|||
}
|
||||
}
|
||||
|
||||
void ExportFeature::writeCollectionBatch(int fd, VPackArrayIterator it,
|
||||
std::string const& fileName) {
|
||||
void ExportFeature::queryExport(SimpleHttpClient* httpClient) {
|
||||
std::string errorMsg;
|
||||
|
||||
if (_progress) {
|
||||
std::cout << "# Running AQL query '" << _query << "'..." << std::endl;
|
||||
}
|
||||
|
||||
std::string fileName = _outputDirectory + TRI_DIR_SEPARATOR_STR + "query." + _typeExport;
|
||||
|
||||
// remove an existing file first
|
||||
if (TRI_ExistsFile(fileName.c_str())) {
|
||||
TRI_UnlinkFile(fileName.c_str());
|
||||
}
|
||||
|
||||
std::string const url = "_api/cursor";
|
||||
|
||||
VPackBuilder post;
|
||||
post.openObject();
|
||||
post.add("query", VPackValue(_query));
|
||||
post.close();
|
||||
|
||||
std::shared_ptr<VPackBuilder> parsedBody =
|
||||
httpCall(httpClient, url, rest::RequestType::POST, post.toJson());
|
||||
VPackSlice body = parsedBody->slice();
|
||||
|
||||
int fd =
|
||||
TRI_TRACKED_CREATE_FILE(fileName.c_str(), O_CREAT | O_EXCL | O_RDWR | TRI_O_CLOEXEC,
|
||||
S_IRUSR | S_IWUSR);
|
||||
|
||||
if (fd < 0) {
|
||||
errorMsg = "cannot write to file '" + fileName + "'";
|
||||
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_CANNOT_WRITE_FILE, errorMsg);
|
||||
}
|
||||
|
||||
TRI_DEFER(TRI_TRACKED_CLOSE_FILE(fd));
|
||||
|
||||
writeFirstLine(fd, fileName, "");
|
||||
|
||||
writeBatch(fd, VPackArrayIterator(body.get("result")), fileName);
|
||||
|
||||
while (body.hasKey("id")) {
|
||||
std::string const url = "/_api/cursor/" + body.get("id").copyString();
|
||||
parsedBody = httpCall(httpClient, url, rest::RequestType::PUT);
|
||||
body = parsedBody->slice();
|
||||
|
||||
writeBatch(fd, VPackArrayIterator(body.get("result")), fileName);
|
||||
}
|
||||
|
||||
if (_typeExport == "json") {
|
||||
std::string closingBracket = "\n]";
|
||||
writeToFile(fd, closingBracket, fileName);
|
||||
} else if (_typeExport == "xml") {
|
||||
std::string xmlFooter = "</collection>";
|
||||
writeToFile(fd, xmlFooter, fileName);
|
||||
}
|
||||
}
|
||||
|
||||
void ExportFeature::writeFirstLine(int fd, std::string const& fileName, std::string const& collection) {
|
||||
_firstLine = true;
|
||||
if (_typeExport == "json") {
|
||||
std::string openingBracket = "[";
|
||||
writeToFile(fd, openingBracket, fileName);
|
||||
|
||||
} else if (_typeExport == "xml") {
|
||||
std::string xmlHeader =
|
||||
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
|
||||
"<collection name=\"";
|
||||
xmlHeader.append(encode_char_entities(collection));
|
||||
xmlHeader.append("\">\n");
|
||||
writeToFile(fd, xmlHeader, fileName);
|
||||
|
||||
} else if (_typeExport == "csv") {
|
||||
std::string firstLine = "";
|
||||
bool isFirstValue = true;
|
||||
for (auto const& str : _csvFields) {
|
||||
if (isFirstValue) {
|
||||
firstLine += str;
|
||||
isFirstValue = false;
|
||||
} else {
|
||||
firstLine += "," + str;
|
||||
}
|
||||
}
|
||||
firstLine += "\n";
|
||||
writeToFile(fd, firstLine, fileName);
|
||||
}
|
||||
}
|
||||
|
||||
void ExportFeature::writeBatch(int fd, VPackArrayIterator it, std::string const& fileName) {
|
||||
std::string line;
|
||||
line.reserve(1024);
|
||||
|
||||
|
|
|
@ -51,7 +51,9 @@ class ExportFeature final : public application_features::ApplicationFeature,
|
|||
|
||||
private:
|
||||
void collectionExport(httpclient::SimpleHttpClient* httpClient);
|
||||
void writeCollectionBatch(int fd, VPackArrayIterator it, std::string const& fileName);
|
||||
void queryExport(httpclient::SimpleHttpClient* httpClient);
|
||||
void writeFirstLine(int fd, std::string const& fileName, std::string const& collection);
|
||||
void writeBatch(int fd, VPackArrayIterator it, std::string const& fileName);
|
||||
void graphExport(httpclient::SimpleHttpClient* httpClient);
|
||||
void writeGraphBatch(int fd, VPackArrayIterator it, std::string const& fileName);
|
||||
void xgmmlWriteOneAtt(int fd, std::string const& fileName, VPackSlice const& slice, std::string const& name, int deep = 0);
|
||||
|
@ -61,6 +63,7 @@ class ExportFeature final : public application_features::ApplicationFeature,
|
|||
|
||||
private:
|
||||
std::vector<std::string> _collections;
|
||||
std::string _query;
|
||||
std::string _graphName;
|
||||
std::string _xgmmlLabelAttribute;
|
||||
std::string _typeExport;
|
||||
|
|
|
@ -118,6 +118,7 @@ function exportTest (options) {
|
|||
results.exportJson.failed = results.exportJson.status ? 0 : 1;
|
||||
|
||||
try {
|
||||
JSON.parse(fs.read(fs.join(tmpPath, 'UnitTestsExport.json')));
|
||||
results.parseJson = {
|
||||
failed: 0,
|
||||
status: true
|
||||
|
@ -136,11 +137,10 @@ function exportTest (options) {
|
|||
results.exportJsonl = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath);
|
||||
results.exportJsonl.failed = results.exportJsonl.status ? 0 : 1;
|
||||
try {
|
||||
const filesContent = fs.read(fs.join(tmpPath, 'UnitTestsExport.jsonl')).split('\n');
|
||||
for (const line of filesContent) {
|
||||
if (line.trim() === '') continue;
|
||||
JSON.parse(line);
|
||||
}
|
||||
fs.read(fs.join(tmpPath, 'UnitTestsExport.jsonl')).split('\n')
|
||||
.filter(line => line.trim() !== '')
|
||||
.forEach(line => JSON.parse(line));
|
||||
|
||||
results.parseJsonl = {
|
||||
failed: 0,
|
||||
status: true
|
||||
|
@ -183,6 +183,31 @@ function exportTest (options) {
|
|||
};
|
||||
}
|
||||
|
||||
print(CYAN + Date() + ': Export query (xgmml)' + RESET);
|
||||
args['type'] = 'jsonl';
|
||||
args['query'] = 'FOR doc IN UnitTestsExport RETURN doc';
|
||||
delete args['graph-name'];
|
||||
delete args['collection'];
|
||||
results.exportQuery = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath);
|
||||
results.exportQuery.failed = results.exportQuery.status ? 0 : 1;
|
||||
try {
|
||||
fs.read(fs.join(tmpPath, 'query.jsonl')).split('\n')
|
||||
.filter(line => line.trim() !== '')
|
||||
.forEach(line => JSON.parse(line));
|
||||
results.parseQueryResult = {
|
||||
failed: 0,
|
||||
status: true
|
||||
};
|
||||
} catch (e) {
|
||||
print(e);
|
||||
results.failed += 1;
|
||||
results.parseQueryResult = {
|
||||
failed: 1,
|
||||
status: false,
|
||||
message: e
|
||||
};
|
||||
}
|
||||
|
||||
return shutdown();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue