diff --git a/CHANGELOG b/CHANGELOG index 7d3996b427..2c6046d12c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ v3.5.0-rc.6 (2019-XX-XX) ------------------------ +* Added gzip and encryption options to arangoimport and arangoexport. + * Added missing REST API route GET /_api/transaction for retrieving the list of currently ongoing transactions. diff --git a/arangosh/CMakeLists.txt b/arangosh/CMakeLists.txt index 92fe6af023..899734cfa1 100644 --- a/arangosh/CMakeLists.txt +++ b/arangosh/CMakeLists.txt @@ -141,6 +141,7 @@ add_executable(${BIN_ARANGOEXPORT} Export/arangoexport.cpp Shell/ClientFeature.cpp Shell/ConsoleFeature.cpp + Utils/ManagedDirectory.cpp V8Client/ArangoClientHelper.cpp ) @@ -201,6 +202,7 @@ add_executable(arangoimport Import/arangoimport.cpp Shell/ClientFeature.cpp Shell/ConsoleFeature.cpp + Utils/ManagedDirectory.cpp V8Client/ArangoClientHelper.cpp ) @@ -331,6 +333,7 @@ add_executable(${BIN_ARANGOSH} Shell/V8ClientConnection.cpp Shell/V8ShellFeature.cpp Shell/arangosh.cpp + Utils/ManagedDirectory.cpp V8Client/ArangoClientHelper.cpp ) diff --git a/arangosh/Dump/DumpFeature.h b/arangosh/Dump/DumpFeature.h index 40c223f256..d8db5ff9b3 100644 --- a/arangosh/Dump/DumpFeature.h +++ b/arangosh/Dump/DumpFeature.h @@ -80,7 +80,7 @@ class DumpFeature : public application_features::ApplicationFeature { bool includeSystemCollections{false}; bool overwrite{false}; bool progress{true}; - bool useGzip{true}; + bool useGzip{false}; }; /// @brief Stores stats about the overall dump progress diff --git a/arangosh/Export/ExportFeature.cpp b/arangosh/Export/ExportFeature.cpp index a8bd75303f..5b638d59b6 100644 --- a/arangosh/Export/ExportFeature.cpp +++ b/arangosh/Export/ExportFeature.cpp @@ -42,6 +42,10 @@ #include #include +#ifdef USE_ENTERPRISE +#include "Enterprise/Encryption/EncryptionFeature.h" +#endif + using namespace arangodb::basics; using namespace arangodb::httpclient; using namespace arangodb::options; @@ -65,6 +69,7 @@ ExportFeature::ExportFeature(application_features::ApplicationServer& server, in _outputDirectory(), _overwrite(false), _progress(true), + _useGzip(false), _firstLine(true), _skippedDeepNested(0), _httpRequestsDone(0), @@ -113,6 +118,12 @@ void ExportFeature::collectOptions(std::shared_ptr opti "xml"}; options->addOption("--type", "type of export", new DiscreteValuesParameter(&_typeExport, exports)); + + options->addOption("--compress-output", + "compress files containing collection contents using gzip format", + new BooleanParameter(&_useGzip)) + .setIntroducedIn(30408) + .setIntroducedIn(30501); } void ExportFeature::validateOptions(std::shared_ptr options) { @@ -172,46 +183,28 @@ void ExportFeature::validateOptions(std::shared_ptr opt } void ExportFeature::prepare() { - bool isDirectory = false; - bool isEmptyDirectory = false; - if (!_outputDirectory.empty()) { - isDirectory = TRI_IsDirectory(_outputDirectory.c_str()); + _directory = std::make_unique(_outputDirectory, !_overwrite, + true, _useGzip); + if (_directory->status().fail()) { + switch (_directory->status().errorNumber()) { + case TRI_ERROR_FILE_EXISTS: + LOG_TOPIC("72723",FATAL, Logger::FIXME) << "cannot write to output directory '" + << _outputDirectory << "'"; - if (isDirectory) { - std::vector files(TRI_FullTreeDirectory(_outputDirectory.c_str())); - // we don't care if the target directory is empty - // note: TRI_FullTreeDirectory always returns at least one - // element (""), even if directory is empty. - isEmptyDirectory = (files.size() <= 1); + break; + case TRI_ERROR_CANNOT_OVERWRITE_FILE: + LOG_TOPIC("81812",FATAL, Logger::FIXME) + << "output directory '" << _outputDirectory + << "' already exists. use \"--overwrite true\" to " + "overwrite data in it"; + break; + default: + LOG_TOPIC("94945",ERR, Logger::FIXME) << _directory->status().errorMessage(); + break; } - } - - if (_outputDirectory.empty() || (TRI_ExistsFile(_outputDirectory.c_str()) && !isDirectory)) { - LOG_TOPIC("e8160", FATAL, Logger::SYSCALL) - << "cannot write to output directory '" << _outputDirectory << "'"; FATAL_ERROR_EXIT(); } - - if (isDirectory && !isEmptyDirectory && !_overwrite) { - LOG_TOPIC("dafee", FATAL, Logger::SYSCALL) - << "output directory '" << _outputDirectory - << "' already exists. use \"--overwrite true\" to " - "overwrite data in it"; - FATAL_ERROR_EXIT(); - } - - if (!isDirectory) { - long systemError; - std::string errorMessage; - int res = TRI_CreateDirectory(_outputDirectory.c_str(), systemError, errorMessage); - - if (res != TRI_ERROR_NO_ERROR) { - LOG_TOPIC("1efa3", ERR, Logger::SYSCALL) << "unable to create output directory '" - << _outputDirectory << "': " << errorMessage; - FATAL_ERROR_EXIT(); - } - } } void ExportFeature::start() { @@ -258,6 +251,9 @@ void ExportFeature::start() { for (auto const& collection : _collections) { std::string filePath = _outputDirectory + TRI_DIR_SEPARATOR_STR + collection + "." + _typeExport; + if (_useGzip) { + filePath.append(".gz"); + } // if int64_t fileSize = TRI_SizeFile(filePath.c_str()); if (0 < fileSize) { @@ -269,12 +265,18 @@ void ExportFeature::start() { std::string filePath = _outputDirectory + TRI_DIR_SEPARATOR_STR + "query." + _typeExport; + if (_useGzip) { + filePath.append(".gz"); + } // if exportedSize += TRI_SizeFile(filePath.c_str()); } } else if (_typeExport == "xgmml" && _graphName.size()) { graphExport(httpClient.get()); std::string filePath = _outputDirectory + TRI_DIR_SEPARATOR_STR + _graphName + "." + _typeExport; + if (_useGzip) { + filePath.append(".gz"); + } // if int64_t fileSize = TRI_SizeFile(filePath.c_str()); if (0 < fileSize) { @@ -298,14 +300,6 @@ void ExportFeature::collectionExport(SimpleHttpClient* httpClient) { _currentCollection = collection; - std::string fileName = - _outputDirectory + TRI_DIR_SEPARATOR_STR + collection + "." + _typeExport; - - // remove an existing file first - if (TRI_ExistsFile(fileName.c_str())) { - TRI_UnlinkFile(fileName.c_str()); - } - std::string const url = "_api/cursor"; VPackBuilder post; @@ -324,34 +318,33 @@ void ExportFeature::collectionExport(SimpleHttpClient* httpClient) { httpCall(httpClient, url, rest::RequestType::POST, post.toJson()); VPackSlice body = parsedBody->slice(); - int fd = TRI_CREATE(fileName.c_str(), O_CREAT | O_EXCL | O_RDWR | TRI_O_CLOEXEC, - S_IRUSR | S_IWUSR); + std::string fileName = collection + "." + _typeExport; - if (fd < 0) { + std::unique_ptr fd = _directory->writableFile(fileName, _overwrite, 0, true); + + if (nullptr == fd.get() || !fd->status().ok()) { errorMsg = "cannot write to file '" + fileName + "'"; THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_CANNOT_WRITE_FILE, errorMsg); } - TRI_DEFER(TRI_CLOSE(fd)); + writeFirstLine(*fd, fileName, collection); - writeFirstLine(fd, fileName, collection); - - writeBatch(fd, VPackArrayIterator(body.get("result")), fileName); + writeBatch(*fd, VPackArrayIterator(body.get("result")), fileName); while (body.hasKey("id")) { std::string const url = "/_api/cursor/" + body.get("id").copyString(); parsedBody = httpCall(httpClient, url, rest::RequestType::PUT); body = parsedBody->slice(); - writeBatch(fd, VPackArrayIterator(body.get("result")), fileName); + writeBatch(*fd, VPackArrayIterator(body.get("result")), fileName); } if (_typeExport == "json") { std::string closingBracket = "\n]"; - writeToFile(fd, closingBracket, fileName); + writeToFile(*fd, closingBracket, fileName); } else if (_typeExport == "xml") { std::string xmlFooter = ""; - writeToFile(fd, xmlFooter, fileName); + writeToFile(*fd, xmlFooter, fileName); } } } @@ -363,13 +356,6 @@ void ExportFeature::queryExport(SimpleHttpClient* httpClient) { std::cout << "# Running AQL query '" << _query << "'..." << std::endl; } - std::string fileName = _outputDirectory + TRI_DIR_SEPARATOR_STR + "query." + _typeExport; - - // remove an existing file first - if (TRI_ExistsFile(fileName.c_str())) { - TRI_UnlinkFile(fileName.c_str()); - } - std::string const url = "_api/cursor"; VPackBuilder post; @@ -385,38 +371,37 @@ void ExportFeature::queryExport(SimpleHttpClient* httpClient) { httpCall(httpClient, url, rest::RequestType::POST, post.toJson()); VPackSlice body = parsedBody->slice(); - int fd = TRI_CREATE(fileName.c_str(), O_CREAT | O_EXCL | O_RDWR | TRI_O_CLOEXEC, - S_IRUSR | S_IWUSR); + std::string fileName = "query." + _typeExport; - if (fd < 0) { + std::unique_ptr fd = _directory->writableFile(fileName, _overwrite, 0, true); + + if (nullptr == fd.get() || !fd->status().ok()) { errorMsg = "cannot write to file '" + fileName + "'"; THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_CANNOT_WRITE_FILE, errorMsg); } - TRI_DEFER(TRI_CLOSE(fd)); + writeFirstLine(*fd, fileName, ""); - writeFirstLine(fd, fileName, ""); - - writeBatch(fd, VPackArrayIterator(body.get("result")), fileName); + writeBatch(*fd, VPackArrayIterator(body.get("result")), fileName); while (body.hasKey("id")) { std::string const url = "/_api/cursor/" + body.get("id").copyString(); parsedBody = httpCall(httpClient, url, rest::RequestType::PUT); body = parsedBody->slice(); - writeBatch(fd, VPackArrayIterator(body.get("result")), fileName); + writeBatch(*fd, VPackArrayIterator(body.get("result")), fileName); } if (_typeExport == "json") { std::string closingBracket = "\n]"; - writeToFile(fd, closingBracket, fileName); + writeToFile(*fd, closingBracket, fileName); } else if (_typeExport == "xml") { std::string xmlFooter = ""; - writeToFile(fd, xmlFooter, fileName); + writeToFile(*fd, xmlFooter, fileName); } } -void ExportFeature::writeFirstLine(int fd, std::string const& fileName, +void ExportFeature::writeFirstLine(ManagedDirectory::File & fd, std::string const& fileName, std::string const& collection) { _firstLine = true; if (_typeExport == "json") { @@ -447,7 +432,7 @@ void ExportFeature::writeFirstLine(int fd, std::string const& fileName, } } -void ExportFeature::writeBatch(int fd, VPackArrayIterator it, std::string const& fileName) { + void ExportFeature::writeBatch(ManagedDirectory::File & fd, VPackArrayIterator it, std::string const& fileName) { std::string line; line.reserve(1024); @@ -527,11 +512,8 @@ void ExportFeature::writeBatch(int fd, VPackArrayIterator it, std::string const& } } -void ExportFeature::writeToFile(int fd, std::string const& line, std::string const& fileName) { - if (!TRI_WritePointer(fd, line.c_str(), line.size())) { - std::string errorMsg = "cannot write to file '" + fileName + "'"; - THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_CANNOT_WRITE_FILE, errorMsg); - } +void ExportFeature::writeToFile(ManagedDirectory::File & fd, std::string const& line, std::string const& fileName) { + fd.write(line.c_str(), line.size()); } std::shared_ptr ExportFeature::httpCall(SimpleHttpClient* httpClient, @@ -619,34 +601,26 @@ void ExportFeature::graphExport(SimpleHttpClient* httpClient) { } } - std::string fileName = - _outputDirectory + TRI_DIR_SEPARATOR_STR + _graphName + "." + _typeExport; + std::string fileName = _graphName + "." + _typeExport; - // remove an existing file first - if (TRI_ExistsFile(fileName.c_str())) { - TRI_UnlinkFile(fileName.c_str()); - } + std::unique_ptr fd = _directory->writableFile(fileName, _overwrite, 0, true); - int fd = TRI_CREATE(fileName.c_str(), O_CREAT | O_EXCL | O_RDWR | TRI_O_CLOEXEC, - S_IRUSR | S_IWUSR); - - if (fd < 0) { + if (nullptr == fd.get() || !fd->status().ok()) { errorMsg = "cannot write to file '" + fileName + "'"; THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_CANNOT_WRITE_FILE, errorMsg); } - TRI_DEFER(TRI_CLOSE(fd)); std::string xmlHeader = R"( )"; - writeToFile(fd, xmlHeader, fileName); + writeToFile(*fd, xmlHeader, fileName); for (auto const& collection : _collections) { if (_progress) { @@ -671,18 +645,18 @@ directed="1"> httpCall(httpClient, url, rest::RequestType::POST, post.toJson()); VPackSlice body = parsedBody->slice(); - writeGraphBatch(fd, VPackArrayIterator(body.get("result")), fileName); + writeGraphBatch(*fd, VPackArrayIterator(body.get("result")), fileName); while (body.hasKey("id")) { std::string const url = "/_api/cursor/" + body.get("id").copyString(); parsedBody = httpCall(httpClient, url, rest::RequestType::PUT); body = parsedBody->slice(); - writeGraphBatch(fd, VPackArrayIterator(body.get("result")), fileName); + writeGraphBatch(*fd, VPackArrayIterator(body.get("result")), fileName); } } std::string closingGraphTag = "\n"; - writeToFile(fd, closingGraphTag, fileName); + writeToFile(*fd, closingGraphTag, fileName); if (_skippedDeepNested) { std::cout << "skipped " << _skippedDeepNested @@ -690,7 +664,7 @@ directed="1"> } } -void ExportFeature::writeGraphBatch(int fd, VPackArrayIterator it, std::string const& fileName) { +void ExportFeature::writeGraphBatch(ManagedDirectory::File & fd, VPackArrayIterator it, std::string const& fileName) { std::string xmlTag; for (auto const& doc : it) { @@ -748,7 +722,7 @@ void ExportFeature::writeGraphBatch(int fd, VPackArrayIterator it, std::string c } } -void ExportFeature::xgmmlWriteOneAtt(int fd, std::string const& fileName, +void ExportFeature::xgmmlWriteOneAtt(ManagedDirectory::File & fd, std::string const& fileName, VPackSlice const& slice, std::string const& name, int deep) { std::string value, type, xmlTag; diff --git a/arangosh/Export/ExportFeature.h b/arangosh/Export/ExportFeature.h index 09c7756f22..ab2b8492d5 100644 --- a/arangosh/Export/ExportFeature.h +++ b/arangosh/Export/ExportFeature.h @@ -28,6 +28,7 @@ #include "ApplicationFeatures/ApplicationFeature.h" #include "V8Client/ArangoClientHelper.h" #include "lib/Rest/CommonDefines.h" +#include "Utils/ManagedDirectory.h" namespace arangodb { @@ -52,14 +53,14 @@ class ExportFeature final : public application_features::ApplicationFeature, private: void collectionExport(httpclient::SimpleHttpClient* httpClient); void queryExport(httpclient::SimpleHttpClient* httpClient); - void writeFirstLine(int fd, std::string const& fileName, std::string const& collection); - void writeBatch(int fd, VPackArrayIterator it, std::string const& fileName); + void writeFirstLine(ManagedDirectory::File & fd, std::string const& fileName, std::string const& collection); + void writeBatch(ManagedDirectory::File & fd, VPackArrayIterator it, std::string const& fileName); void graphExport(httpclient::SimpleHttpClient* httpClient); - void writeGraphBatch(int fd, VPackArrayIterator it, std::string const& fileName); - void xgmmlWriteOneAtt(int fd, std::string const& fileName, VPackSlice const& slice, + void writeGraphBatch(ManagedDirectory::File &fd, VPackArrayIterator it, std::string const& fileName); + void xgmmlWriteOneAtt(ManagedDirectory::File & fd, std::string const& fileName, VPackSlice const& slice, std::string const& name, int deep = 0); - void writeToFile(int fd, std::string const& string, std::string const& fileName); + void writeToFile(ManagedDirectory::File & fd, std::string const& string, std::string const& fileName); std::shared_ptr httpCall(httpclient::SimpleHttpClient* httpClient, std::string const& url, arangodb::rest::RequestType, std::string postBody = ""); @@ -76,16 +77,18 @@ class ExportFeature final : public application_features::ApplicationFeature, std::string _outputDirectory; bool _overwrite; bool _progress; + bool _useGzip; bool _firstLine; uint64_t _skippedDeepNested; uint64_t _httpRequestsDone; std::string _currentCollection; std::string _currentGraph; + std::unique_ptr _directory; int* _result; }; } // namespace arangodb -#endif \ No newline at end of file +#endif diff --git a/arangosh/Export/arangoexport.cpp b/arangosh/Export/arangoexport.cpp index 6a473822ef..47a0ef9fff 100644 --- a/arangosh/Export/arangoexport.cpp +++ b/arangosh/Export/arangoexport.cpp @@ -41,6 +41,10 @@ #include "Shell/ClientFeature.h" #include "Ssl/SslFeature.h" +#ifdef USE_ENTERPRISE +#include "Enterprise/Encryption/EncryptionFeature.h" +#endif + using namespace arangodb; using namespace arangodb::application_features; @@ -70,6 +74,10 @@ int main(int argc, char* argv[]) { server.addFeature(new TempFeature(server, "arangoexport")); server.addFeature(new VersionFeature(server)); +#ifdef USE_ENTERPRISE + server.addFeature(new EncryptionFeature(server)); +#endif + try { server.run(argc, argv); if (server.helpShown()) { diff --git a/arangosh/Import/ImportFeature.cpp b/arangosh/Import/ImportFeature.cpp index 19cc4697b9..416b54a0e5 100644 --- a/arangosh/Import/ImportFeature.cpp +++ b/arangosh/Import/ImportFeature.cpp @@ -252,7 +252,7 @@ void ImportFeature::start() { *_result = ret; if (_typeImport == "auto") { - std::regex re = std::regex(".*?\\.([a-zA-Z]+)", std::regex::ECMAScript); + std::regex re = std::regex(".*?\\.([a-zA-Z]+)(.gz|)", std::regex::ECMAScript); std::smatch match; if (std::regex_match(_filename, match, re)) { std::string extension = StringUtils::tolower(match[1].str()); diff --git a/arangosh/Import/ImportHelper.cpp b/arangosh/Import/ImportHelper.cpp index 9e8d7cb82c..d1f7ea46e8 100644 --- a/arangosh/Import/ImportHelper.cpp +++ b/arangosh/Import/ImportHelper.cpp @@ -35,6 +35,7 @@ #include "Shell/ClientFeature.h" #include "SimpleHttpClient/SimpleHttpClient.h" #include "SimpleHttpClient/SimpleHttpResult.h" +#include "Utils/ManagedDirectory.h" #include #include @@ -223,8 +224,10 @@ ImportHelper::~ImportHelper() { //////////////////////////////////////////////////////////////////////////////// bool ImportHelper::importDelimited(std::string const& collectionName, - std::string const& fileName, + std::string const& pathName, DelimitedImportType typeImport) { + ManagedDirectory directory(TRI_Dirname(pathName), false, false, true); + std::string fileName(TRI_Basename(pathName.c_str())); _collectionName = collectionName; _firstLine = ""; _outputBuffer.clear(); @@ -240,19 +243,20 @@ bool ImportHelper::importDelimited(std::string const& collectionName, } // read and convert - int fd; + // read and convert int64_t totalLength; + std::unique_ptr fd; if (fileName == "-") { // we don't have a filesize totalLength = 0; - fd = STDIN_FILENO; + fd = directory.readableFile(STDIN_FILENO); } else { // read filesize - totalLength = TRI_SizeFile(fileName.c_str()); - fd = TRI_OPEN(fileName.c_str(), O_RDONLY | TRI_O_CLOEXEC); + totalLength = TRI_SizeFile(pathName.c_str()); + fd = directory.readableFile(TRI_Basename(pathName.c_str()), 0); - if (fd < 0) { + if (!fd) { _errorMessages.push_back(TRI_LAST_ERROR_STR); return false; } @@ -267,9 +271,6 @@ bool ImportHelper::importDelimited(std::string const& collectionName, &separatorLength, true); if (separator == nullptr) { - if (fd != STDIN_FILENO) { - TRI_CLOSE(fd); - } _errorMessages.push_back("out of memory"); return false; } @@ -295,14 +296,11 @@ bool ImportHelper::importDelimited(std::string const& collectionName, char buffer[32768]; while (!_hasError) { - ssize_t n = TRI_READ(fd, buffer, sizeof(buffer)); + ssize_t n = fd->read(buffer, sizeof(buffer)); if (n < 0) { TRI_Free(separator); TRI_DestroyCsvParser(&parser); - if (fd != STDIN_FILENO) { - TRI_CLOSE(fd); - } _errorMessages.push_back(TRI_LAST_ERROR_STR); return false; } else if (n == 0) { @@ -315,7 +313,7 @@ bool ImportHelper::importDelimited(std::string const& collectionName, } totalRead += static_cast(n); - reportProgress(totalLength, totalRead, nextProgress); + reportProgress(totalLength, fd->offset(), nextProgress); TRI_ParseCsvString(&parser, buffer, n); } @@ -327,19 +325,17 @@ bool ImportHelper::importDelimited(std::string const& collectionName, TRI_DestroyCsvParser(&parser); TRI_Free(separator); - if (fd != STDIN_FILENO) { - TRI_CLOSE(fd); - } - waitForSenders(); - reportProgress(totalLength, totalRead, nextProgress); + reportProgress(totalLength, fd->offset(), nextProgress); _outputBuffer.clear(); return !_hasError; } bool ImportHelper::importJson(std::string const& collectionName, - std::string const& fileName, bool assumeLinewise) { + std::string const& pathName, bool assumeLinewise) { + ManagedDirectory directory(TRI_Dirname(pathName), false, false, true); + std::string fileName(TRI_Basename(pathName.c_str())); _collectionName = collectionName; _firstLine = ""; _outputBuffer.clear(); @@ -354,19 +350,19 @@ bool ImportHelper::importJson(std::string const& collectionName, } // read and convert - int fd; int64_t totalLength; + std::unique_ptr fd; if (fileName == "-") { // we don't have a filesize totalLength = 0; - fd = STDIN_FILENO; + fd = directory.readableFile(STDIN_FILENO); } else { // read filesize - totalLength = TRI_SizeFile(fileName.c_str()); - fd = TRI_OPEN(fileName.c_str(), O_RDONLY | TRI_O_CLOEXEC); + totalLength = TRI_SizeFile(pathName.c_str()); + fd = directory.readableFile(TRI_Basename(fileName.c_str()), 0); - if (fd < 0) { + if (!fd) { _errorMessages.push_back(TRI_LAST_ERROR_STR); return false; } @@ -391,20 +387,14 @@ bool ImportHelper::importJson(std::string const& collectionName, if (_outputBuffer.reserve(BUFFER_SIZE) == TRI_ERROR_OUT_OF_MEMORY) { _errorMessages.push_back(TRI_errno_string(TRI_ERROR_OUT_OF_MEMORY)); - if (fd != STDIN_FILENO) { - TRI_CLOSE(fd); - } return false; } // read directly into string buffer - ssize_t n = TRI_READ(fd, _outputBuffer.end(), BUFFER_SIZE - 1); + ssize_t n = fd->read(_outputBuffer.end(), BUFFER_SIZE - 1); if (n < 0) { _errorMessages.push_back(TRI_LAST_ERROR_STR); - if (fd != STDIN_FILENO) { - TRI_CLOSE(fd); - } return false; } else if (n == 0) { // we're done @@ -430,13 +420,10 @@ bool ImportHelper::importJson(std::string const& collectionName, } totalRead += static_cast(n); - reportProgress(totalLength, totalRead, nextProgress); + reportProgress(totalLength, fd->offset(), nextProgress); if (_outputBuffer.length() > _maxUploadSize) { if (isObject) { - if (fd != STDIN_FILENO) { - TRI_CLOSE(fd); - } _errorMessages.push_back( "import file is too big. please increase the value of --batch-size " "(currently " + @@ -460,12 +447,8 @@ bool ImportHelper::importJson(std::string const& collectionName, sendJsonBuffer(_outputBuffer.c_str(), _outputBuffer.length(), isObject); } - if (fd != STDIN_FILENO) { - TRI_CLOSE(fd); - } - waitForSenders(); - reportProgress(totalLength, totalRead, nextProgress); + reportProgress(totalLength, fd->offset(), nextProgress); MUTEX_LOCKER(guard, _stats._mutex); // this is an approximation only. _numberLines is more meaningful for CSV @@ -947,7 +930,7 @@ void ImportHelper::waitForSenders() { if (numIdle == _senderThreads.size()) { return; } - std::this_thread::sleep_for(std::chrono::microseconds(10000)); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); } } } // namespace import diff --git a/arangosh/Import/arangoimport.cpp b/arangosh/Import/arangoimport.cpp index 7ba3b93904..3660ca7ec8 100644 --- a/arangosh/Import/arangoimport.cpp +++ b/arangosh/Import/arangoimport.cpp @@ -41,6 +41,10 @@ #include "Shell/ClientFeature.h" #include "Ssl/SslFeature.h" +#ifdef USE_ENTERPRISE +#include "Enterprise/Encryption/EncryptionFeature.h" +#endif + using namespace arangodb; using namespace arangodb::application_features; @@ -70,6 +74,10 @@ int main(int argc, char* argv[]) { server.addFeature(new TempFeature(server, "arangoimport")); server.addFeature(new VersionFeature(server)); +#ifdef USE_ENTERPRISE + server.addFeature(new EncryptionFeature(server)); +#endif + try { server.run(argc, argv); if (server.helpShown()) { diff --git a/arangosh/Shell/arangosh.cpp b/arangosh/Shell/arangosh.cpp index 2d035f57e0..e7e8202c25 100644 --- a/arangosh/Shell/arangosh.cpp +++ b/arangosh/Shell/arangosh.cpp @@ -47,6 +47,10 @@ #include "Shell/V8ShellFeature.h" #include "Ssl/SslFeature.h" +#ifdef USE_ENTERPRISE +#include "Enterprise/Encryption/EncryptionFeature.h" +#endif + using namespace arangodb; using namespace arangodb::application_features; @@ -78,13 +82,17 @@ int main(int argc, char* argv[]) { server.addFeature(new ShellColorsFeature(server)); server.addFeature(new ShellFeature(server, &ret)); server.addFeature(new ShutdownFeature(server, {"Shell"})); - // server.addFeature(new SslFeature(server)); + server.addFeature(new SslFeature(server)); server.addFeature(new TempFeature(server, name)); server.addFeature(new V8PlatformFeature(server)); server.addFeature(new V8SecurityFeature(server)); server.addFeature(new V8ShellFeature(server, name)); server.addFeature(new VersionFeature(server)); +#ifdef USE_ENTERPRISE + server.addFeature(new EncryptionFeature(server)); +#endif + server.run(argc, argv); if (server.helpShown()) { // --help was displayed diff --git a/arangosh/Utils/ManagedDirectory.cpp b/arangosh/Utils/ManagedDirectory.cpp index 5918745c17..efdb927662 100644 --- a/arangosh/Utils/ManagedDirectory.cpp +++ b/arangosh/Utils/ManagedDirectory.cpp @@ -178,13 +178,20 @@ inline ssize_t rawRead(int fd, char* buffer, size_t length, arangodb::Result& st return bytesRead; } -void readEncryptionFile(std::string const& directory, std::string& type) { +void readEncryptionFile(std::string const& directory, std::string& type, + arangodb::EncryptionFeature* encryptionFeature) { using arangodb::basics::FileUtils::slurp; using arangodb::basics::StringUtils::trim; type = ::EncryptionTypeNone; auto filename = ::filePath(directory, ::EncryptionFilename); if (TRI_ExistsFile(filename.c_str())) { type = trim(slurp(filename)); + } else { +#ifdef USE_ENTERPRISE + if (nullptr != encryptionFeature) { + type = encryptionFeature->encryptionType(); + } +#endif } } @@ -214,6 +221,8 @@ ManagedDirectory::ManagedDirectory(std::string const& path, bool requireEmpty, b #ifdef USE_ENTERPRISE _encryptionFeature{ application_features::ApplicationServer::getFeature("Encryption")}, +#else + _encryptionFeature(nullptr), #endif _path{path}, _encryptionType{::EncryptionTypeNone}, @@ -246,7 +255,7 @@ ManagedDirectory::ManagedDirectory(std::string const& path, bool requireEmpty, b "path specified is a non-empty directory"); return; } - ::readEncryptionFile(_path, _encryptionType); + ::readEncryptionFile(_path, _encryptionType, _encryptionFeature); return; } // fall through to write encryption file @@ -329,6 +338,26 @@ std::unique_ptr ManagedDirectory::readableFile(std::stri return file; } +std::unique_ptr ManagedDirectory::readableFile(int fileDescriptor) { + + std::unique_ptr file{nullptr}; + + if (_status.fail()) { // directory is in a bad state + return file; + } + + try { + file = std::make_unique(*this, fileDescriptor, false); + } catch (...) { + _status.reset(TRI_ERROR_CANNOT_READ_FILE, "error opening console pipe" + " for reading"); + return {nullptr}; + } + + return file; +} + + std::unique_ptr ManagedDirectory::writableFile( std::string const& filename, bool overwrite, int flags, bool gzipOk) { std::unique_ptr file; @@ -438,6 +467,44 @@ ManagedDirectory::File::File(ManagedDirectory const& directory, } // if } +ManagedDirectory::File::File(ManagedDirectory const& directory, + int fd, + bool isGzip) + : _directory{directory}, + _path{"stdin"}, + _flags{0}, + _fd{fd}, + _gzfd(-1), + _gzFile(nullptr), +#ifdef USE_ENTERPRISE + _context{::getContext(_directory, _fd, _flags)}, + _status { + ::initialStatus(_fd, _path, _flags, _context.get()) + } +#else + _status { + ::initialStatus(_fd, _path, _flags) + } +#endif +{ + TRI_ASSERT(::flagNotSet(_flags, O_RDWR)); // disallow read/write (encryption) + + if (isGzip) { + const char * gzFlags(nullptr); + + // gzip is going to perform a redundant close, + // simpler code to give it redundant handle + _gzfd = TRI_DUP(_fd); + + if (0 /*O_WRONLY & flags*/) { + gzFlags = "wb"; + } else { + gzFlags = "rb"; + } // else + _gzFile = gzdopen(_gzfd, gzFlags); + } // if +} + ManagedDirectory::File::~File() { try { if (_gzfd >=0) { @@ -552,4 +619,17 @@ Result const& ManagedDirectory::File::close() { return _status; } + +ssize_t ManagedDirectory::File::offset() const { + ssize_t fileBytesRead = -1; + + if (isGzip()) { + fileBytesRead = gzoffset(_gzFile); + } else { + fileBytesRead = (ssize_t)TRI_LSEEK(_fd, 0L, SEEK_CUR); + } // else + + return fileBytesRead; +} + } // namespace arangodb diff --git a/arangosh/Utils/ManagedDirectory.h b/arangosh/Utils/ManagedDirectory.h index 57eb261cdc..e6a749179a 100644 --- a/arangosh/Utils/ManagedDirectory.h +++ b/arangosh/Utils/ManagedDirectory.h @@ -43,6 +43,10 @@ #include namespace arangodb { +#ifndef USE_ENTERPRISE +class EncryptionFeature; // to reduce number of #ifdef +#endif + /** * Manages a single directory in the file system, transparently handling * encryption and decryption. Opens/creates and manages file using RAII-style @@ -71,6 +75,9 @@ class ManagedDirectory { * @param isGzip True if reads/writes should go through gzip functions */ File(ManagedDirectory const& directory, std::string const& filename, int flags, bool isGzip); + + File(ManagedDirectory const& directory, int fd, bool isGzip); + /** * @brief Closes the file if it is still open */ @@ -128,6 +135,12 @@ class ManagedDirectory { */ bool isGzip() const {return -1 != _gzfd;} + /** + * @brief Count of bytes read from regular or gzip file, not amount returned by read + */ + + ssize_t offset() const; + private: ManagedDirectory const& _directory; std::string _path; @@ -201,13 +214,11 @@ class ManagedDirectory { */ std::string const& encryptionType() const; -#ifdef USE_ENTERPRISE /** * @brief Returns a pointer to the `EncryptionFeature` instance * @return A pointer to the feature */ EncryptionFeature const* encryptionFeature() const; -#endif /** * @brief Opens a readable file @@ -216,6 +227,7 @@ class ManagedDirectory { * @return Unique pointer to file, if opened */ std::unique_ptr readableFile(std::string const& filename, int flags = 0); + std::unique_ptr readableFile(int fileDescriptor); /** * @brief Opens a writable file @@ -250,9 +262,7 @@ class ManagedDirectory { VPackBuilder vpackFromJsonFile(std::string const& filename); private: -#ifdef USE_ENTERPRISE EncryptionFeature* const _encryptionFeature; -#endif std::string const _path; std::string _encryptionType; bool _writeGzip; diff --git a/js/client/modules/@arangodb/process-utils.js b/js/client/modules/@arangodb/process-utils.js index d51a4f429b..4626f85d00 100755 --- a/js/client/modules/@arangodb/process-utils.js +++ b/js/client/modules/@arangodb/process-utils.js @@ -69,11 +69,11 @@ class ConfigBuilder { this.type = type; switch (type) { case 'restore': - this.config.configuration = fs.join(CONFIG_DIR, 'arangorestore.conf'); + this.config.configuration = fs.join(CONFIG_DIR, 'arangorestore.conf'); this.executable = ARANGORESTORE_BIN; break; case 'dump': - this.config.configuration = fs.join(CONFIG_DIR, 'arangodump.conf'); + this.config.configuration = fs.join(CONFIG_DIR, 'arangodump.conf'); this.executable = ARANGODUMP_BIN; break; default: @@ -121,7 +121,7 @@ class ConfigBuilder { } this.config['maskings'] = fs.join(TOP_DIR, "tests/js/common/test-data/maskings", dir); } - activateEncryption() { this.config['encription.keyfile'] = fs.join(this.rootDir, 'secret-key'); } + activateEncryption() { this.config['encryption.keyfile'] = fs.join(this.rootDir, 'secret-key'); } setRootDir(dir) { this.rootDir = dir; } restrictToCollection(collection) { if (this.type !== 'restore' && this.type !== 'dump') { @@ -596,7 +596,7 @@ function executeAndWait (cmd, args, options, valgrindTest, rootDir, circumventCo cmd = TOP_DIR + '/scripts/disable-cores.sh'; } } - + if (options.extremeVerbosity) { print(Date() + ' executeAndWait: cmd =', cmd, 'args =', args); } diff --git a/js/client/modules/@arangodb/testsuites/export.js b/js/client/modules/@arangodb/testsuites/export.js index 50ab709b20..6033fdbf1c 100644 --- a/js/client/modules/@arangodb/testsuites/export.js +++ b/js/client/modules/@arangodb/testsuites/export.js @@ -35,6 +35,7 @@ const fs = require('fs'); const pu = require('@arangodb/process-utils'); const tu = require('@arangodb/test-utils'); const xmldom = require('xmldom'); +const zlib = require('zlib'); // const BLUE = require('internal').COLORS.COLOR_BLUE; const CYAN = require('internal').COLORS.COLOR_CYAN; @@ -117,6 +118,17 @@ function exportTest (options) { return shutdown(); } + let skipEncrypt = true; + let keyfile = ""; + if (global.ARANGODB_CLIENT_VERSION) { + let version = global.ARANGODB_CLIENT_VERSION(true); + if (version.hasOwnProperty('enterprise-version')) { + skipEncrypt = false; + keyfile = fs.join(instanceInfo.rootDir, 'secret-key'); + fs.write(keyfile, 'DER-HUND-der-hund-der-hund-der-h'); // must be exactly 32 chars long + } + } + print(CYAN + Date() + ': Export data (json)' + RESET); results.exportJson = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath, false, options.coreCheck); results.exportJson.failed = results.exportJson.status ? 0 : 1; @@ -136,6 +148,58 @@ function exportTest (options) { }; } + print(CYAN + Date() + ': Export data (json.gz)' + RESET); + args['compress-output'] = 'true'; + results.exportJsonGz = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath, false, options.coreCheck); + results.exportJsonGz.failed = results.exportJsonGz.status ? 0 : 1; + + try { + const zipBuffer = fs.readGzip(fs.join(tmpPath, 'UnitTestsExport.json.gz')); + JSON.parse(zipBuffer); + results.parseJsonGz = { + failed: 0, + status: true + }; + } catch (e) { + results.failed += 1; + results.parseJsonGz = { + failed: 1, + status: false, + message: e + }; + } + args['compress-output'] = 'false'; + + if (!skipEncrypt) { + print(CYAN + Date() + ': Export data (json encrypt)' + RESET); + args['encryption.keyfile'] = keyfile; + if (fs.exists(fs.join(tmpPath, 'ENCRYPTION'))) { + fs.remove(fs.join(tmpPath, 'ENCRYPTION')); + } + results.exportJsonEncrypt = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath, false, options.coreCheck); + results.exportJsonEncrypt.failed = results.exportJsonGz.status ? 0 : 1; + + try { + const decBuffer = fs.readDecrypt(fs.join(tmpPath, 'UnitTestsExport.json'), keyfile); + JSON.parse(decBuffer); + results.parseJsonEncrypt = { + failed: 0, + status: true + }; + } catch (e) { + results.failed += 1; + results.parseJsonEncrypt = { + failed: 1, + status: false, + message: e + }; + } + delete args['encryption.keyfile']; + if (fs.exists(fs.join(tmpPath, 'ENCRYPTION'))) { + fs.remove(fs.join(tmpPath, 'ENCRYPTION')); + } + } + print(CYAN + Date() + ': Export data (jsonl)' + RESET); args['type'] = 'jsonl'; results.exportJsonl = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath, false, options.coreCheck); @@ -158,6 +222,29 @@ function exportTest (options) { }; } + print(CYAN + Date() + ': Export data (jsonl.gz)' + RESET); + args['compress-output'] = 'true'; + results.exportJsonlGz = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath, false, options.coreCheck); + results.exportJsonlGz.failed = results.exportJsonl.status ? 0 : 1; + try { + fs.readGzip(fs.join(tmpPath, 'UnitTestsExport.jsonl.gz')).split('\n') + .filter(line => line.trim() !== '') + .forEach(line => JSON.parse(line)); + + results.parseJsonlGz = { + failed: 0, + status: true + }; + } catch (e) { + results.failed += 1; + results.parseJsonlGz = { + failed: 1, + status: false, + message: e + }; + } + args['compress-output'] = 'false'; + print(CYAN + Date() + ': Export data (xgmml)' + RESET); args['type'] = 'xgmml'; args['graph-name'] = 'UnitTestsExport'; @@ -187,6 +274,35 @@ function exportTest (options) { }; } + print(CYAN + Date() + ': Export data (xgmml.gz)' + RESET); + args['compress-output'] = 'true'; + results.exportXgmmlGz = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath, false, options.coreCheck); + results.exportXgmmlGz.failed = results.exportXgmml.status ? 0 : 1; + try { + const filesContent = fs.readGzip(fs.join(tmpPath, 'UnitTestsExport.xgmml.gz')); + DOMParser.parseFromString(filesContent); + results.parseXgmmlGz = { + failed: 0, + status: true + }; + + if (xmlErrors !== null) { + results.parseXgmmlGz = { + failed: 1, + status: false, + message: xmlErrors + }; + } + } catch (e) { + results.failed += 1; + results.parseXgmmlGz = { + failed: 1, + status: false, + message: e + }; + } + args['compress-output'] = 'false'; + print(CYAN + Date() + ': Export query (xgmml)' + RESET); args['type'] = 'jsonl'; args['query'] = 'FOR doc IN UnitTestsExport RETURN doc'; @@ -212,6 +328,29 @@ function exportTest (options) { }; } + print(CYAN + Date() + ': Export query (xgmml.gz)' + RESET); + args['compress-output'] = 'true'; + results.exportQueryGz = pu.executeAndWait(pu.ARANGOEXPORT_BIN, toArgv(args), options, 'arangosh', tmpPath, false, options.coreCheck); + results.exportQueryGz.failed = results.exportQuery.status ? 0 : 1; + try { + fs.readGzip(fs.join(tmpPath, 'query.jsonl')).split('\n') + .filter(line => line.trim() !== '') + .forEach(line => JSON.parse(line)); + results.parseQueryResultGz = { + failed: 0, + status: true + }; + } catch (e) { + print(e); + results.failed += 1; + results.parseQueryResultGz = { + failed: 1, + status: false, + message: e + }; + } + args['compress-output'] = 'false'; + return shutdown(); } diff --git a/js/client/modules/@arangodb/testsuites/importing.js b/js/client/modules/@arangodb/testsuites/importing.js index fd42b0f100..34cf4edb0a 100644 --- a/js/client/modules/@arangodb/testsuites/importing.js +++ b/js/client/modules/@arangodb/testsuites/importing.js @@ -61,6 +61,12 @@ const impTodos = [{ coll: 'UnitTestsImportJson1', type: 'json', create: undefined +}, { + id: 'json1gz', + data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-1.json.gz')), + coll: 'UnitTestsImportJson1Gz', + type: 'json', + create: undefined }, { id: 'json2', data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-2.json')), @@ -79,6 +85,12 @@ const impTodos = [{ coll: 'UnitTestsImportJson4', type: 'json', create: undefined +}, { + id: 'json4gz', + data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-4.json.gz')), + coll: 'UnitTestsImportJson4Gz', + type: 'json', + create: undefined }, { id: 'json5', data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-5.json')), @@ -97,6 +109,12 @@ const impTodos = [{ coll: 'UnitTestsImportCsv1', type: 'csv', create: 'true' +}, { + id: 'csv1gz', + data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-1.csv.gz')), + coll: 'UnitTestsImportCsv1Gz', + type: 'csv', + create: 'true' }, { id: 'csv2', data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-2.csv')), @@ -156,6 +174,12 @@ const impTodos = [{ coll: 'UnitTestsImportTsv1', type: 'tsv', create: 'true' +}, { + id: 'tsv1gz', + data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-1.tsv.gz')), + coll: 'UnitTestsImportTsv1Gz', + type: 'tsv', + create: 'true' }, { id: 'tsv2', data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-2.tsv')), @@ -168,6 +192,12 @@ const impTodos = [{ coll: 'UnitTestsImportEdge', type: 'json', create: 'false' +}, { + id: 'edgegz', + data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-edges.json.gz')), + coll: 'UnitTestsImportEdgeGz', + type: 'json', + create: 'false' }, { id: 'unique', data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-ignore.json')), diff --git a/js/common/bootstrap/modules/fs.js b/js/common/bootstrap/modules/fs.js index 71465df66c..8bfa8ab0f5 100644 --- a/js/common/bootstrap/modules/fs.js +++ b/js/common/bootstrap/modules/fs.js @@ -476,6 +476,24 @@ global.DEFINE_MODULE('fs', (function () { }; } + // ////////////////////////////////////////////////////////////////////////////// + // / @brief readGzip + // ////////////////////////////////////////////////////////////////////////////// + + if (global.SYS_READ_GZIP) { + exports.readGzip = global.SYS_READ_GZIP; + delete global.SYS_READ_GZIP; + } + + // ////////////////////////////////////////////////////////////////////////////// + // / @brief readDecrypt + // ////////////////////////////////////////////////////////////////////////////// + + if (global.SYS_READ_DECRYPT) { + exports.readDecrypt = global.SYS_READ_DECRYPT; + delete global.SYS_READ_DECRYPT; + } + // ////////////////////////////////////////////////////////////////////////////// // / @brief read64 // ////////////////////////////////////////////////////////////////////////////// diff --git a/lib/Basics/files.cpp b/lib/Basics/files.cpp index c781b1316a..c99c539d1e 100644 --- a/lib/Basics/files.cpp +++ b/lib/Basics/files.cpp @@ -30,6 +30,8 @@ #include #endif +#include "zlib.h" + #include #include @@ -69,6 +71,10 @@ #include #endif +#ifdef USE_ENTERPRISE +#include "Enterprise/Encryption/EncryptionFeature.h" +#endif + using namespace arangodb::basics; using namespace arangodb; @@ -1041,6 +1047,131 @@ char* TRI_SlurpFile(char const* filename, size_t* length) { return result._buffer; } +//////////////////////////////////////////////////////////////////////////////// +/// @brief slurps in a file that is compressed and return uncompressed contents +//////////////////////////////////////////////////////////////////////////////// + +char* TRI_SlurpGzipFile(char const* filename, size_t* length) { + TRI_set_errno(TRI_ERROR_NO_ERROR); + gzFile gzFd(gzopen(filename,"rb")); + auto fdGuard = arangodb::scopeGuard([&gzFd](){ if (nullptr != gzFd) gzclose(gzFd); }); + char * retPtr = nullptr; + + if (nullptr != gzFd) { + TRI_string_buffer_t result; + TRI_InitStringBuffer(&result, false); + + while (true) { + int res = TRI_ReserveStringBuffer(&result, READBUFFER_SIZE); + + if (res != TRI_ERROR_NO_ERROR) { + TRI_AnnihilateStringBuffer(&result); + + TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); + return nullptr; + } + + ssize_t n = gzread(gzFd, (void*)TRI_EndStringBuffer(&result), READBUFFER_SIZE); + + if (n == 0) { + break; + } + + if (n < 0) { + TRI_AnnihilateStringBuffer(&result); + + TRI_set_errno(TRI_ERROR_SYS_ERROR); + return nullptr; + } + + TRI_IncreaseLengthStringBuffer(&result, (size_t)n); + } // while + + if (length != nullptr) { + *length = TRI_LengthStringBuffer(&result); + } + + retPtr = result._buffer; + } // if + + return retPtr; +} // TRI_SlurpGzipFile + +#ifdef USE_ENTERPRISE +//////////////////////////////////////////////////////////////////////////////// +/// @brief slurps in a file that is encrypted and return unencrypted contents +//////////////////////////////////////////////////////////////////////////////// + +char* TRI_SlurpDecryptFile(char const* filename, char const * keyfile, size_t* length) { + TRI_set_errno(TRI_ERROR_NO_ERROR); + EncryptionFeature* encryptionFeature; + + encryptionFeature = application_features::ApplicationServer::getFeature("Encryption"); + + if (nullptr == encryptionFeature) { + TRI_set_errno(TRI_ERROR_SYS_ERROR); + return nullptr; + } + + encryptionFeature->setKeyFile(keyfile); + auto keyGuard = arangodb::scopeGuard([encryptionFeature](){ encryptionFeature->clearKey(); }); + + int fd = TRI_OPEN(filename, O_RDONLY | TRI_O_CLOEXEC); + + if (fd == -1) { + TRI_set_errno(TRI_ERROR_SYS_ERROR); + return nullptr; + } + + std::unique_ptr context; + context = encryptionFeature->beginDecryption(fd); + + if (nullptr == context.get() || !context->status().ok()) { + TRI_set_errno(TRI_ERROR_SYS_ERROR); + return nullptr; + } + + TRI_string_buffer_t result; + TRI_InitStringBuffer(&result, false); + + while (true) { + int res = TRI_ReserveStringBuffer(&result, READBUFFER_SIZE); + + if (res != TRI_ERROR_NO_ERROR) { + TRI_CLOSE(fd); + TRI_AnnihilateStringBuffer(&result); + + TRI_set_errno(TRI_ERROR_OUT_OF_MEMORY); + return nullptr; + } + + ssize_t n = encryptionFeature->readData(*context, (void*)TRI_EndStringBuffer(&result), READBUFFER_SIZE); + + if (n == 0) { + break; + } + + if (n < 0) { + TRI_CLOSE(fd); + + TRI_AnnihilateStringBuffer(&result); + + TRI_set_errno(TRI_ERROR_SYS_ERROR); + return nullptr; + } + + TRI_IncreaseLengthStringBuffer(&result, (size_t)n); + } + + if (length != nullptr) { + *length = TRI_LengthStringBuffer(&result); + } + + TRI_CLOSE(fd); + return result._buffer; +} +#endif + //////////////////////////////////////////////////////////////////////////////// /// @brief creates a lock file based on the PID //////////////////////////////////////////////////////////////////////////////// diff --git a/lib/Basics/files.h b/lib/Basics/files.h index 4b5c3b89c9..a4400c9844 100644 --- a/lib/Basics/files.h +++ b/lib/Basics/files.h @@ -186,6 +186,19 @@ bool TRI_fsync(int fd); char* TRI_SlurpFile(char const* filename, size_t* length); +//////////////////////////////////////////////////////////////////////////////// +/// @brief slurps in a file that is compressed and return uncompressed contents +//////////////////////////////////////////////////////////////////////////////// + +char* TRI_SlurpGzipFile(char const* filename, size_t* length); + +#ifdef USE_ENTERPRISE +//////////////////////////////////////////////////////////////////////////////// +/// @brief slurps in a file that is encrypted and return unencrypted contents +//////////////////////////////////////////////////////////////////////////////// + +char* TRI_SlurpDecryptFile(char const* filename, char const * keyfile, size_t* length); +#endif //////////////////////////////////////////////////////////////////////////////// /// @brief creates a lock file based on the PID /// diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 88bb897c04..df95d2c74b 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -258,6 +258,7 @@ add_library(${LIB_ARANGO} STATIC Zip/ioapi.cpp Zip/unzip.cpp Zip/zip.cpp + ${ADDITIONAL_LIB_ARANGO_SOURCES} ) target_link_libraries(${LIB_ARANGO} diff --git a/lib/V8/v8-utils.cpp b/lib/V8/v8-utils.cpp index c5d95e633b..6fabb7e6de 100644 --- a/lib/V8/v8-utils.cpp +++ b/lib/V8/v8-utils.cpp @@ -3036,6 +3036,107 @@ static void JS_ReadBuffer(v8::FunctionCallbackInfo const& args) { TRI_V8_TRY_CATCH_END } +//////////////////////////////////////////////////////////////////////////////// +/// @brief was docuBlock JS_ReadGzip +//////////////////////////////////////////////////////////////////////////////// + +static void JS_ReadGzip(v8::FunctionCallbackInfo const& args) { + TRI_V8_TRY_CATCH_BEGIN(isolate) + v8::HandleScope scope(isolate); + + if (args.Length() != 1) { + TRI_V8_THROW_EXCEPTION_USAGE("readGzip()"); + } + + TRI_Utf8ValueNFC name(isolate, args[0]); + + if (*name == nullptr) { + TRI_V8_THROW_TYPE_ERROR(" must be a UTF-8 string"); + } + + V8SecurityFeature* v8security = + application_features::ApplicationServer::getFeature( + "V8Security"); + TRI_ASSERT(v8security != nullptr); + + if (!v8security->isAllowedToAccessPath(isolate, *name, FSAccessType::READ)) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_FORBIDDEN, + std::string("not allowed to read files in this path: ") + *name); + } + + size_t length; + char* content = TRI_SlurpGzipFile(*name, &length); + + if (content == nullptr) { + TRI_V8_THROW_EXCEPTION_MESSAGE(TRI_errno(), TRI_last_error()); + } + + auto result = TRI_V8_PAIR_STRING(isolate, content, length); + + TRI_FreeString(content); + + TRI_V8_RETURN(result); + TRI_V8_TRY_CATCH_END + +} + +#ifdef USE_ENTERPRISE +//////////////////////////////////////////////////////////////////////////////// +/// @brief was docuBlock JS_ReadGzip +//////////////////////////////////////////////////////////////////////////////// + +static void JS_ReadDecrypt(v8::FunctionCallbackInfo const& args) { + TRI_V8_TRY_CATCH_BEGIN(isolate) + v8::HandleScope scope(isolate); + + if (args.Length() != 2) { + TRI_V8_THROW_EXCEPTION_USAGE("readDecrypt(,)"); + } + + TRI_Utf8ValueNFC name(isolate, args[0]); + + if (*name == nullptr) { + TRI_V8_THROW_TYPE_ERROR(" must be a UTF-8 string"); + } + + TRI_Utf8ValueNFC keyfile(isolate, args[1]); + + if (*keyfile == nullptr) { + TRI_V8_THROW_TYPE_ERROR(" must be a UTF-8 string"); + } + + V8SecurityFeature* v8security = + application_features::ApplicationServer::getFeature( + "V8Security"); + TRI_ASSERT(v8security != nullptr); + + if (!v8security->isAllowedToAccessPath(isolate, *name, FSAccessType::READ)) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_FORBIDDEN, + std::string("not allowed to read files in this path: ") + *name); + } + + if (!v8security->isAllowedToAccessPath(isolate, *keyfile, FSAccessType::READ)) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_FORBIDDEN, + std::string("not allowed to read files in this path: ") + *keyfile); + } + + size_t length; + char* content = TRI_SlurpDecryptFile(*name, *keyfile, &length); + + if (content == nullptr) { + TRI_V8_THROW_EXCEPTION_MESSAGE(TRI_errno(), TRI_last_error()); + } + + auto result = TRI_V8_PAIR_STRING(isolate, content, length); + + TRI_FreeString(content); + + TRI_V8_RETURN(result); + TRI_V8_TRY_CATCH_END + +} +#endif + //////////////////////////////////////////////////////////////////////////////// /// @brief was docuBlock JS_Read64 //////////////////////////////////////////////////////////////////////////////// @@ -3838,7 +3939,7 @@ static void JS_Wait(v8::FunctionCallbackInfo const& args) { // wait without gc double until = TRI_microtime() + n; while (TRI_microtime() < until) { - std::this_thread::sleep_for(std::chrono::microseconds(1000)); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); } TRI_V8_RETURN_UNDEFINED(); @@ -5253,7 +5354,7 @@ bool TRI_RunGarbageCollectionV8(v8::Isolate* isolate, double availableTime) { } } - std::this_thread::sleep_for(std::chrono::microseconds(1000)); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); } return true; @@ -5509,6 +5610,14 @@ void TRI_InitV8Utils(v8::Isolate* isolate, v8::Handle context, TRI_AddGlobalFunctionVocbase(isolate, TRI_V8_ASCII_STRING(isolate, "SYS_READ_BUFFER"), JS_ReadBuffer); + TRI_AddGlobalFunctionVocbase(isolate, + TRI_V8_ASCII_STRING(isolate, "SYS_READ_GZIP"), + JS_ReadGzip); +#ifdef USE_ENTERPRISE + TRI_AddGlobalFunctionVocbase(isolate, + TRI_V8_ASCII_STRING(isolate, "SYS_READ_DECRYPT"), + JS_ReadDecrypt); +#endif TRI_AddGlobalFunctionVocbase(isolate, TRI_V8_ASCII_STRING(isolate, "SYS_SHA1"), JS_Sha1); TRI_AddGlobalFunctionVocbase(isolate, TRI_V8_ASCII_STRING(isolate, "SYS_SHA224"), JS_Sha224); diff --git a/tests/js/common/test-data/import/import-1.csv.gz b/tests/js/common/test-data/import/import-1.csv.gz new file mode 100644 index 0000000000..57b85f8191 Binary files /dev/null and b/tests/js/common/test-data/import/import-1.csv.gz differ diff --git a/tests/js/common/test-data/import/import-1.json.gz b/tests/js/common/test-data/import/import-1.json.gz new file mode 100644 index 0000000000..f70aaf48db Binary files /dev/null and b/tests/js/common/test-data/import/import-1.json.gz differ diff --git a/tests/js/common/test-data/import/import-1.tsv.gz b/tests/js/common/test-data/import/import-1.tsv.gz new file mode 100644 index 0000000000..f053b1be0e Binary files /dev/null and b/tests/js/common/test-data/import/import-1.tsv.gz differ diff --git a/tests/js/common/test-data/import/import-4.json.gz b/tests/js/common/test-data/import/import-4.json.gz new file mode 100644 index 0000000000..ceb11e6188 Binary files /dev/null and b/tests/js/common/test-data/import/import-4.json.gz differ diff --git a/tests/js/common/test-data/import/import-edges.json.gz b/tests/js/common/test-data/import/import-edges.json.gz new file mode 100644 index 0000000000..2a5365700a Binary files /dev/null and b/tests/js/common/test-data/import/import-edges.json.gz differ diff --git a/tests/js/server/import/import-setup.js b/tests/js/server/import/import-setup.js index e1d8915e6a..456a05b211 100644 --- a/tests/js/server/import/import-setup.js +++ b/tests/js/server/import/import-setup.js @@ -33,11 +33,14 @@ db._drop("UnitTestsImportCsvSkip"); db._drop("UnitTestsImportJson1"); + db._drop("UnitTestsImportJson1Gz"); db._drop("UnitTestsImportJson2"); db._drop("UnitTestsImportJson3"); db._drop("UnitTestsImportJson4"); + db._drop("UnitTestsImportJson4Gz"); db._drop("UnitTestsImportJson5"); db._drop("UnitTestsImportCsv1"); + db._drop("UnitTestsImportCsv1Gz"); db._drop("UnitTestsImportCsv2"); db._drop("UnitTestsImportCsv3"); db._drop("UnitTestsImportCsv4"); @@ -46,9 +49,11 @@ db._drop("UnitTestsImportCsvNoConvert"); db._drop("UnitTestsImportCsvNoEol"); db._drop("UnitTestsImportTsv1"); + db._drop("UnitTestsImportTsv1Gz"); db._drop("UnitTestsImportTsv2"); db._drop("UnitTestsImportVertex"); db._drop("UnitTestsImportEdge"); + db._drop("UnitTestsImportEdgeGz"); db._drop("UnitTestsImportIgnore"); db._drop("UnitTestsImportUniqueConstraints"); db._drop("UnitTestsImportRemoveAttribute"); @@ -58,14 +63,18 @@ } catch(err) {} db._create("UnitTestsImportJson1"); + db._create("UnitTestsImportJson1Gz"); db._create("UnitTestsImportJson2"); db._create("UnitTestsImportJson3"); db._create("UnitTestsImportJson4"); + db._create("UnitTestsImportJson4Gz"); db._create("UnitTestsImportJson5"); db._create("UnitTestsImportTsv1"); + db._create("UnitTestsImportTsv1Gz"); db._create("UnitTestsImportTsv2"); db._create("UnitTestsImportVertex"); db._createEdgeCollection("UnitTestsImportEdge"); + db._createEdgeCollection("UnitTestsImportEdgeGz"); db._create("UnitTestsImportIgnore"); db.UnitTestsImportIgnore.ensureIndex({ type: "hash", fields: [ "value" ], unique: true }); db._create("UnitTestsImportUniqueConstraints"); diff --git a/tests/js/server/import/import-teardown.js b/tests/js/server/import/import-teardown.js index e48bddb2de..6ca66ed346 100644 --- a/tests/js/server/import/import-teardown.js +++ b/tests/js/server/import/import-teardown.js @@ -33,11 +33,14 @@ db._drop("UnitTestsImportCsvSkip"); db._drop("UnitTestsImportJson1"); + db._drop("UnitTestsImportJson1Gz"); db._drop("UnitTestsImportJson2"); db._drop("UnitTestsImportJson3"); db._drop("UnitTestsImportJson4"); + db._drop("UnitTestsImportJson4Gz"); db._drop("UnitTestsImportJson5"); db._drop("UnitTestsImportCsv1"); + db._drop("UnitTestsImportCsv1Gz"); db._drop("UnitTestsImportCsv2"); db._drop("UnitTestsImportCsv3"); db._drop("UnitTestsImportCsv4"); @@ -46,9 +49,11 @@ db._drop("UnitTestsImportCsvNoConvert"); db._drop("UnitTestsImportCsvNoEol"); db._drop("UnitTestsImportTsv1"); + db._drop("UnitTestsImportTsv1Gz"); db._drop("UnitTestsImportTsv2"); db._drop("UnitTestsImportVertex"); db._drop("UnitTestsImportEdge"); + db._drop("UnitTestsImportEdgeGz"); db._drop("UnitTestsImportIgnore"); db._drop("UnitTestsImportUniqueConstraints"); try { diff --git a/tests/js/server/import/import.js b/tests/js/server/import/import.js index 556032b73e..7a645e5dfb 100644 --- a/tests/js/server/import/import.js +++ b/tests/js/server/import/import.js @@ -161,6 +161,31 @@ function importTestSuite () { assertEqual(expected, actual); }, + testJsonImport1Gz : function () { + var expected = [ { "id": 1, + "one": 1, + "three": 3, + "two": 2 }, + { "a": 1234, + "b": "the quick fox", + "id": 2, + "jumped": + "over the fox", + "null": null }, + { "id": 3, + "not": "important", + "spacing": "is" }, + { " c ": "h\"'ihi", + "a": true, + "b": false, + "d": "", + "id": 4 }, + { "id": 5 } ]; + + var actual = getQueryResults("FOR i IN UnitTestsImportJson1Gz SORT i.id RETURN i"); + assertEqual(expected, actual); + }, + //////////////////////////////////////////////////////////////////////////////// /// @brief test json import //////////////////////////////////////////////////////////////////////////////// @@ -200,6 +225,15 @@ function importTestSuite () { assertEqual(expected, actual); }, + testJsonImport4Gz : function () { + var expected = [ ]; + for (var i = 0; i < 1000; ++i) { + expected.push({ "active": true, "id": i, "value": "somerandomstuff" + i }); + } + var actual = getQueryResults("FOR i IN UnitTestsImportJson4Gz SORT i.id RETURN i"); + assertEqual(expected, actual); + }, + //////////////////////////////////////////////////////////////////////////////// /// @brief test json import //////////////////////////////////////////////////////////////////////////////// @@ -237,6 +271,19 @@ function importTestSuite () { assertEqual(expected, actual); }, + testCsvImport1Gz : function () { + var expected = [ + { "a": "1", "b": 1, "c": "1.3", "e": -5, "id": 1 }, + { "b": "", "c": 3.1, "d": -2.5, "e": "ddd \" ' ffd", "id": 2 }, + { "a": "9999999999999999999999999999999999", "b": "test", "c" : -99999999, "d": true, "e": -888.4434, "id": 5 }, + { "a": 10e4, "b": 20.5, "c": -42, "d": " null ", "e": false, "id": 6 }, + { "a": -1.05e2, "b": 1.05e-2, "c": true, "d": false, "id": 7 } + ]; + + var actual = getQueryResults("FOR i IN UnitTestsImportCsv1Gz SORT i.id RETURN i"); + assertEqual(expected, actual); + }, + //////////////////////////////////////////////////////////////////////////////// /// @brief test csv import //////////////////////////////////////////////////////////////////////////////// @@ -361,6 +408,13 @@ function importTestSuite () { assertEqual(expected, actual); }, + testTsvImport1Gz : function () { + var expected = [ { "ab": "g", "cd": "h", "ef": "i" }, { "ab" : "j", "cd" : "k", "ef" : "l" } ]; + var actual = getQueryResults("FOR i IN UnitTestsImportTsv1Gz SORT i.ab RETURN i"); + + assertEqual(expected, actual); + }, + //////////////////////////////////////////////////////////////////////////////// /// @brief test tsv import //////////////////////////////////////////////////////////////////////////////// @@ -394,6 +448,18 @@ function importTestSuite () { assertEqual(expected, actual); }, + testJsonEdgesImportGz : function () { + var expected = [ + { _from : "UnitTestsImportVertex/v1", _to: "UnitTestsImportVertex/v2", id: 1, what: "v1->v2" }, + { _from : "UnitTestsImportVertex/v2", _to: "UnitTestsImportVertex/v3", id: 2, what: "v2->v3" }, + { _from : "UnitTestsImportVertex/v9", _to: "UnitTestsImportVertex/v4", extra: "foo", id: 3, what: "v9->v4" }, + { _from : "UnitTestsImportVertex/v12", _to: "UnitTestsImportVertex/what", id: 4 } + ]; + + var actual = getQueryResults("FOR i IN UnitTestsImportEdgeGz SORT i.id RETURN i"); + assertEqual(expected, actual); + }, + //////////////////////////////////////////////////////////////////////////////// /// @brief test unique constraint violations ////////////////////////////////////////////////////////////////////////////////