diff --git a/CHANGELOG b/CHANGELOG index 1ec2451210..6b87fc1c9a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ devel ----- +* arangoexport: added --query; pass a aql query to export the result + * fixed issue #2879: No result when querying for the last record of a query * ui: allows now to edit default access level for collections in database diff --git a/Documentation/Books/Manual/Administration/Arangoexport.md b/Documentation/Books/Manual/Administration/Arangoexport.md index c629e61163..f0cb0925b0 100644 --- a/Documentation/Books/Manual/Administration/Arangoexport.md +++ b/Documentation/Books/Manual/Administration/Arangoexport.md @@ -131,3 +131,11 @@ This exports the named graph mygraph into the xgmml file *mygraph.xgmml* without unix> arangoexport --type xgmml --graph-name mygraph --xgmml-label-attribute name This exports the named graph mygraph into the xgmml file *mygraph.xgmml* with a label from documents attribute *name* instead of the default attribute *label*. + +Export via AQL query +-------------------- + + unix> arangoexport --type jsonl --query "for book in books filter book.sells > 100 return book" + +Export via an aql query allows you to export the returned data as the type specified with *--type*. +The example exports all books as jsonl that are sold more than 100 times. \ No newline at end of file diff --git a/arangosh/Export/ExportFeature.cpp b/arangosh/Export/ExportFeature.cpp index 1e1386cfcf..be6c26ebe9 100644 --- a/arangosh/Export/ExportFeature.cpp +++ b/arangosh/Export/ExportFeature.cpp @@ -79,6 +79,9 @@ void ExportFeature::collectOptions( "--collection", "restrict to collection name (can be specified multiple times)", new VectorParameter(&_collections)); + + options->addOption("--query", "AQL query to run", + new StringParameter(&_query)); options->addOption("--graph-name", "name of a graph to export", new StringParameter(&_graphName)); @@ -132,9 +135,15 @@ void ExportFeature::validateOptions( _outputDirectory.pop_back(); } - if (_graphName.empty() && _collections.empty()) { + if (_graphName.empty() && _collections.empty() && _query.empty()) { LOG_TOPIC(FATAL, Logger::CONFIG) - << "expecting at least one collection or one graph name"; + << "expecting at least one collection, a graph name or an AQL query"; + FATAL_ERROR_EXIT(); + } + + if (!_query.empty() && (!_collections.empty() || !_graphName.empty())) { + LOG_TOPIC(FATAL, Logger::CONFIG) + << "expecting either a list of collections or an AQL query"; FATAL_ERROR_EXIT(); } @@ -146,8 +155,8 @@ void ExportFeature::validateOptions( if ((_typeExport == "json" || _typeExport == "jsonl" || _typeExport == "csv") && - _collections.empty()) { - LOG_TOPIC(FATAL, Logger::CONFIG) << "expecting at least one collection"; + _collections.empty() && _query.empty()) { + LOG_TOPIC(FATAL, Logger::CONFIG) << "expecting at least one collection or an AQL query"; FATAL_ERROR_EXIT(); } @@ -267,6 +276,11 @@ void ExportFeature::start() { exportedSize += fileSize; } } + } else if (!_query.empty()) { + queryExport(httpClient.get()); + + std::string filePath = _outputDirectory + TRI_DIR_SEPARATOR_STR + "query." + _typeExport; + exportedSize += TRI_SizeFile(filePath.c_str()); } } else if (_typeExport == "xgmml" && _graphName.size()) { graphExport(httpClient.get()); @@ -330,43 +344,16 @@ void ExportFeature::collectionExport(SimpleHttpClient* httpClient) { TRI_DEFER(TRI_TRACKED_CLOSE_FILE(fd)); - _firstLine = true; - if (_typeExport == "json") { - std::string openingBracket = "["; - writeToFile(fd, openingBracket, fileName); + writeFirstLine(fd, fileName, collection); - } else if (_typeExport == "xml") { - std::string xmlHeader = - "\n" - "\n"); - writeToFile(fd, xmlHeader, fileName); - - } else if (_typeExport == "csv") { - std::string firstLine = ""; - bool isFirstValue = true; - for (auto const& str : _csvFields) { - if (isFirstValue) { - firstLine += str; - isFirstValue = false; - } else { - firstLine += "," + str; - } - } - firstLine += "\n"; - writeToFile(fd, firstLine, fileName); - } - - writeCollectionBatch(fd, VPackArrayIterator(body.get("result")), fileName); + writeBatch(fd, VPackArrayIterator(body.get("result")), fileName); while (body.hasKey("id")) { std::string const url = "/_api/cursor/" + body.get("id").copyString(); parsedBody = httpCall(httpClient, url, rest::RequestType::PUT); body = parsedBody->slice(); - writeCollectionBatch(fd, VPackArrayIterator(body.get("result")), - fileName); + writeBatch(fd, VPackArrayIterator(body.get("result")), fileName); } if (_typeExport == "json") { @@ -379,8 +366,94 @@ void ExportFeature::collectionExport(SimpleHttpClient* httpClient) { } } -void ExportFeature::writeCollectionBatch(int fd, VPackArrayIterator it, - std::string const& fileName) { +void ExportFeature::queryExport(SimpleHttpClient* httpClient) { + std::string errorMsg; + + if (_progress) { + std::cout << "# Running AQL query '" << _query << "'..." << std::endl; + } + + std::string fileName = _outputDirectory + TRI_DIR_SEPARATOR_STR + "query." + _typeExport; + + // remove an existing file first + if (TRI_ExistsFile(fileName.c_str())) { + TRI_UnlinkFile(fileName.c_str()); + } + + std::string const url = "_api/cursor"; + + VPackBuilder post; + post.openObject(); + post.add("query", VPackValue(_query)); + post.close(); + + std::shared_ptr parsedBody = + httpCall(httpClient, url, rest::RequestType::POST, post.toJson()); + VPackSlice body = parsedBody->slice(); + + int fd = + TRI_TRACKED_CREATE_FILE(fileName.c_str(), O_CREAT | O_EXCL | O_RDWR | TRI_O_CLOEXEC, + S_IRUSR | S_IWUSR); + + if (fd < 0) { + errorMsg = "cannot write to file '" + fileName + "'"; + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_CANNOT_WRITE_FILE, errorMsg); + } + + TRI_DEFER(TRI_TRACKED_CLOSE_FILE(fd)); + + writeFirstLine(fd, fileName, ""); + + writeBatch(fd, VPackArrayIterator(body.get("result")), fileName); + + while (body.hasKey("id")) { + std::string const url = "/_api/cursor/" + body.get("id").copyString(); + parsedBody = httpCall(httpClient, url, rest::RequestType::PUT); + body = parsedBody->slice(); + + writeBatch(fd, VPackArrayIterator(body.get("result")), fileName); + } + + if (_typeExport == "json") { + std::string closingBracket = "\n]"; + writeToFile(fd, closingBracket, fileName); + } else if (_typeExport == "xml") { + std::string xmlFooter = ""; + writeToFile(fd, xmlFooter, fileName); + } +} + +void ExportFeature::writeFirstLine(int fd, std::string const& fileName, std::string const& collection) { + _firstLine = true; + if (_typeExport == "json") { + std::string openingBracket = "["; + writeToFile(fd, openingBracket, fileName); + + } else if (_typeExport == "xml") { + std::string xmlHeader = + "\n" + "\n"); + writeToFile(fd, xmlHeader, fileName); + + } else if (_typeExport == "csv") { + std::string firstLine = ""; + bool isFirstValue = true; + for (auto const& str : _csvFields) { + if (isFirstValue) { + firstLine += str; + isFirstValue = false; + } else { + firstLine += "," + str; + } + } + firstLine += "\n"; + writeToFile(fd, firstLine, fileName); + } +} + +void ExportFeature::writeBatch(int fd, VPackArrayIterator it, std::string const& fileName) { std::string line; line.reserve(1024); diff --git a/arangosh/Export/ExportFeature.h b/arangosh/Export/ExportFeature.h index ef89e8f5ae..2e97c9e349 100644 --- a/arangosh/Export/ExportFeature.h +++ b/arangosh/Export/ExportFeature.h @@ -51,7 +51,9 @@ class ExportFeature final : public application_features::ApplicationFeature, private: void collectionExport(httpclient::SimpleHttpClient* httpClient); - void writeCollectionBatch(int fd, VPackArrayIterator it, std::string const& fileName); + void queryExport(httpclient::SimpleHttpClient* httpClient); + void writeFirstLine(int fd, std::string const& fileName, std::string const& collection); + void writeBatch(int fd, VPackArrayIterator it, std::string const& fileName); void graphExport(httpclient::SimpleHttpClient* httpClient); void writeGraphBatch(int fd, VPackArrayIterator it, std::string const& fileName); void xgmmlWriteOneAtt(int fd, std::string const& fileName, VPackSlice const& slice, std::string const& name, int deep = 0); @@ -61,6 +63,7 @@ class ExportFeature final : public application_features::ApplicationFeature, private: std::vector _collections; + std::string _query; std::string _graphName; std::string _xgmmlLabelAttribute; std::string _typeExport; diff --git a/js/client/modules/@arangodb/testsuites/export.js b/js/client/modules/@arangodb/testsuites/export.js index 85b5781591..38680ef7b9 100644 --- a/js/client/modules/@arangodb/testsuites/export.js +++ b/js/client/modules/@arangodb/testsuites/export.js @@ -118,6 +118,7 @@ function exportTest (options) { results.exportJson.failed = results.exportJson.status ? 0 : 1; try { + JSON.parse(fs.read(fs.join(tmpPath, 'UnitTestsExport.json'))); results.parseJson = { failed: 0, status: true @@ -136,11 +137,10 @@ function exportTest (options) { results.exportJsonl = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath); results.exportJsonl.failed = results.exportJsonl.status ? 0 : 1; try { - const filesContent = fs.read(fs.join(tmpPath, 'UnitTestsExport.jsonl')).split('\n'); - for (const line of filesContent) { - if (line.trim() === '') continue; - JSON.parse(line); - } + fs.read(fs.join(tmpPath, 'UnitTestsExport.jsonl')).split('\n') + .filter(line => line.trim() !== '') + .forEach(line => JSON.parse(line)); + results.parseJsonl = { failed: 0, status: true @@ -183,6 +183,31 @@ function exportTest (options) { }; } + print(CYAN + Date() + ': Export query (xgmml)' + RESET); + args['type'] = 'jsonl'; + args['query'] = 'FOR doc IN UnitTestsExport RETURN doc'; + delete args['graph-name']; + delete args['collection']; + results.exportQuery = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath); + results.exportQuery.failed = results.exportQuery.status ? 0 : 1; + try { + fs.read(fs.join(tmpPath, 'query.jsonl')).split('\n') + .filter(line => line.trim() !== '') + .forEach(line => JSON.parse(line)); + results.parseQueryResult = { + failed: 0, + status: true + }; + } catch (e) { + print(e); + results.failed += 1; + results.parseQueryResult = { + failed: 1, + status: false, + message: e + }; + } + return shutdown(); }