//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2016 ArangoDB GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Manuel Baesler //////////////////////////////////////////////////////////////////////////////// #include "ExportFeature.h" #include "ApplicationFeatures/ApplicationServer.h" #include "Basics/FileUtils.h" #include "Basics/StringUtils.h" #include "Logger/Logger.h" #include "ProgramOptions/ProgramOptions.h" #include "Shell/ClientFeature.h" #include "SimpleHttpClient/GeneralClientConnection.h" #include "SimpleHttpClient/SimpleHttpClient.h" #include "SimpleHttpClient/SimpleHttpResult.h" using namespace arangodb; using namespace arangodb::basics; using namespace arangodb::httpclient; using namespace arangodb::options; ExportFeature::ExportFeature(application_features::ApplicationServer* server, int* result) : ApplicationFeature(server, "Export"), _collections(), _graphName(), _typeExport("json"), _outputDirectory(), _overwrite(false), _progress(true), _firstLine(true), _result(result) { requiresElevatedPrivileges(false); setOptional(false); startsAfter("Client"); startsAfter("Config"); startsAfter("Logger"); _outputDirectory = FileUtils::buildFilename(FileUtils::currentDirectory(), "export"); } void ExportFeature::collectOptions( std::shared_ptr options) { options->addOption( "--collection", "restrict to collection name (can be specified multiple times)", new VectorParameter(&_collections)); options->addOption("--graph-name", "name of a graph to export", new StringParameter(&_graphName)); options->addOption("--output-directory", "output directory", new StringParameter(&_outputDirectory)); options->addOption("--overwrite", "overwrite data in output directory", new BooleanParameter(&_overwrite)); options->addOption("--progress", "show progress", new BooleanParameter(&_progress)); std::unordered_set exportsWithUpperCase = {"csv", "json", "jsonl", "xgmml", "CSV", "JSON", "JSONL", "XGMML"}; std::unordered_set exports = {"csv", "json", "jsonl", "xgmml"}; std::vector exportsVector(exports.begin(), exports.end()); std::string exportsJoined = StringUtils::join(exportsVector, ", "); options->addOption( "--type", "type of export (" + exportsJoined + ")", new DiscreteValuesParameter(&_typeExport, exportsWithUpperCase)); } void ExportFeature::validateOptions( std::shared_ptr options) { auto const& positionals = options->processingResult()._positionals; size_t n = positionals.size(); if (1 == n) { _outputDirectory = positionals[0]; } else if (1 < n) { LOG(FATAL) << "expecting at most one directory, got " + StringUtils::join(positionals, ", "); FATAL_ERROR_EXIT(); } // trim trailing slash from path because it may cause problems on ... // Windows if (!_outputDirectory.empty() && _outputDirectory.back() == TRI_DIR_SEPARATOR_CHAR) { TRI_ASSERT(_outputDirectory.size() > 0); _outputDirectory.pop_back(); } if (_graphName.empty() && _collections.empty()) { LOG(FATAL) << "expecting at least one collection ore one graph name"; FATAL_ERROR_EXIT(); } std::transform(_typeExport.begin(), _typeExport.end(), _typeExport.begin(), ::tolower); } void ExportFeature::prepare() { bool isDirectory = false; bool isEmptyDirectory = false; if (!_outputDirectory.empty()) { isDirectory = TRI_IsDirectory(_outputDirectory.c_str()); if (isDirectory) { std::vector files(TRI_FullTreeDirectory(_outputDirectory.c_str())); // we don't care if the target directory is empty isEmptyDirectory = (files.size() <= 1); // TODO: TRI_FullTreeDirectory always returns at least one element (""), even if directory is empty? } } if (_outputDirectory.empty() || (TRI_ExistsFile(_outputDirectory.c_str()) && !isDirectory)) { LOG(FATAL) << "cannot write to output directory '" << _outputDirectory << "'"; FATAL_ERROR_EXIT(); } if (isDirectory && !isEmptyDirectory && !_overwrite) { LOG(FATAL) << "output directory '" << _outputDirectory << "' already exists. use \"--overwrite true\" to " "overwrite data in it"; FATAL_ERROR_EXIT(); } if (!isDirectory) { long systemError; std::string errorMessage; int res = TRI_CreateDirectory(_outputDirectory.c_str(), systemError, errorMessage); if (res != TRI_ERROR_NO_ERROR) { LOG(ERR) << "unable to create output directory '" << _outputDirectory << "': " << errorMessage; FATAL_ERROR_EXIT(); } } } void ExportFeature::start() { ClientFeature* client = application_features::ApplicationServer::getFeature("Client"); int ret = EXIT_SUCCESS; *_result = ret; std::unique_ptr httpClient; try { httpClient = client->createHttpClient(); } catch (...) { LOG(FATAL) << "cannot create server connection, giving up!"; FATAL_ERROR_EXIT(); } httpClient->setLocationRewriter(static_cast(client), &rewriteLocation); httpClient->setUserNamePassword("/", client->username(), client->password()); // must stay here in order to establish the connection httpClient->getServerVersion(); if (!httpClient->isConnected()) { LOG(ERR) << "Could not connect to endpoint '" << client->endpoint() << "', database: '" << client->databaseName() << "', username: '" << client->username() << "'"; LOG(FATAL) << httpClient->getErrorMessage() << "'"; FATAL_ERROR_EXIT(); } // successfully connected std::cout << "Connected to ArangoDB '" << httpClient->getEndpointSpecification() << "', version " << httpClient->getServerVersion() << ", database: '" << client->databaseName() << "', username: '" << client->username() << "'" << std::endl; if (_typeExport == "xgmml") { graphExport(httpClient.get()); } if (_typeExport == "json" || _typeExport == "jsonl" || _typeExport == "csv") { if (_collections.size()) { collectionExport(httpClient.get()); } else if (_graphName.size()) { graphExport(httpClient.get()); } } *_result = ret; } void ExportFeature::collectionExport(SimpleHttpClient* httpClient) { std::cout << "collection export" << std::endl; std::string errorMsg; for (auto const& collection : _collections) { if (_progress) { std::cout << "# Exporting collection '" << collection << "'..." << std::endl; } std::string fileName = _outputDirectory + TRI_DIR_SEPARATOR_STR + collection + "." + _typeExport; // remove an existing file first if (TRI_ExistsFile(fileName.c_str())) { TRI_UnlinkFile(fileName.c_str()); } int fd = TRI_CREATE(fileName.c_str(), O_CREAT | O_EXCL | O_RDWR | TRI_O_CLOEXEC, S_IRUSR | S_IWUSR); if (fd < 0) { errorMsg = "cannot write to file '" + fileName + "'"; THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_CANNOT_WRITE_FILE, errorMsg); } TRI_DEFER(TRI_CLOSE(fd)); std::string const url = "/_api/export?collection="+StringUtils::urlEncode(collection); _firstLine = true; if (_typeExport == "json") { std::string openingBracket = "[\n"; writeToFile(fd, openingBracket, fileName); } std::shared_ptr parsedBody = httpCall(httpClient, url, rest::RequestType::POST); VPackSlice body = parsedBody->slice(); writeCollectionBatch(fd, VPackArrayIterator(body.get("result")), fileName); while (body.hasKey("id")) { std::string const url = "/_api/export/"+body.get("id").copyString(); parsedBody = httpCall(httpClient, url, rest::RequestType::PUT); body = parsedBody->slice(); writeCollectionBatch(fd, VPackArrayIterator(body.get("result")), fileName); } if (_typeExport == "json") { std::string closingBracket = "]"; writeToFile(fd, closingBracket , fileName); } } } void ExportFeature::writeCollectionBatch(int fd, VPackArrayIterator it, std::string const& fileName) { std::string line; for (auto const& doc : it) { line.clear(); if (_firstLine && _typeExport == "json") { _firstLine = false; } else if(!_firstLine && _typeExport == "json") { line.push_back(','); } line += doc.toJson(); line.push_back('\n'); writeToFile(fd, line, fileName); } } void ExportFeature::writeToFile(int fd, std::string& line, std::string const& fileName) { if (!TRI_WritePointer(fd, line.c_str(), line.size())) { std::string errorMsg = "cannot write to file '" + fileName + "'"; THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_CANNOT_WRITE_FILE, errorMsg); } } std::shared_ptr ExportFeature::httpCall(SimpleHttpClient* httpClient, std::string const& url, rest::RequestType requestType) { std::string errorMsg; std::unique_ptr response( httpClient->request(requestType, url, "", 0)); if (response == nullptr || !response->isComplete()) { errorMsg = "got invalid response from server: " + httpClient->getErrorMessage(); THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, errorMsg); } if (response->wasHttpError()) { if (response->getHttpReturnCode() == 404) { LOG(FATAL) << "Collection not found."; } else { errorMsg = "got invalid response from server: HTTP " + StringUtils::itoa(response->getHttpReturnCode()) + ": " + response->getHttpReturnMessage(); THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, errorMsg); } } std::shared_ptr parsedBody; try { parsedBody = response->getBodyVelocyPack(); } catch (...) { errorMsg = "got malformed JSON response from server"; THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, errorMsg); } VPackSlice body = parsedBody->slice(); if (!body.isObject()) { errorMsg = "got malformed JSON response from server"; THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, errorMsg); } return parsedBody; } void ExportFeature::graphExport(SimpleHttpClient* httpClient) { if (_progress) { std::cout << "graph export" << std::endl; } }