mirror of https://gitee.com/bigwinds/arangodb
compress-data option for arangodump. ported from 3.4 (#8861)
This commit is contained in:
parent
c054cddfe5
commit
eef492721a
|
@ -1,6 +1,11 @@
|
|||
devel
|
||||
-----
|
||||
|
||||
* add --compress-output flag to arangodump. Activates gzip compression for
|
||||
collection data. Metadata files, such as .structure.json and .view.json,
|
||||
do not get compressed. No option is needed for arangorestore to restore
|
||||
.data.json.gz files.
|
||||
|
||||
* added options to make server more secure:
|
||||
|
||||
- `--server.harden`: denies access to certain REST APIs that return server internals
|
||||
|
|
|
@ -192,3 +192,29 @@ Using a different key will lead to the backup being non-recoverable.
|
|||
Note that encrypted backups can be used together with the already existing
|
||||
RocksDB encryption-at-rest feature, but they can also be used for the MMFiles
|
||||
engine, which does not have encryption-at-rest.
|
||||
|
||||
Compression
|
||||
-----------
|
||||
|
||||
<small>Introduced in: v3.4.6, v3.5.0</small>
|
||||
|
||||
`--compress-output`
|
||||
|
||||
Data can optionally be dumped in a compressed format to save space on disk.
|
||||
The `--compress-output` option can not be used together with [Encryption](#encryption).
|
||||
|
||||
If compression is enabled, no `.data.json` files are written. Instead, the
|
||||
collection data gets compressed using the Gzip algorithm and for each collection
|
||||
a `.data.json.gz` file is written. Metadata files such as `.structure.json` and
|
||||
`.view.json` do not get compressed.
|
||||
|
||||
```
|
||||
arangodump --output-directory "dump" --compress-output
|
||||
```
|
||||
|
||||
Compressed dumps can be restored with *arangorestore*, which automatically
|
||||
detects whether the data is compressed or not based on the file extension.
|
||||
|
||||
```
|
||||
arangorestore --input-directory "dump"
|
||||
```
|
||||
|
|
|
@ -58,6 +58,23 @@
|
|||
"section" : "",
|
||||
"type" : "string..."
|
||||
},
|
||||
"compress-output" : {
|
||||
"category" : "option",
|
||||
"default" : true,
|
||||
"deprecatedIn" : null,
|
||||
"description" : "compress files containing collection contents using gzip format",
|
||||
"dynamic" : false,
|
||||
"enterpriseOnly" : false,
|
||||
"hidden" : false,
|
||||
"introducedIn" : [
|
||||
"v3.4.6",
|
||||
"v3.5.0"
|
||||
],
|
||||
"obsolete" : false,
|
||||
"requiresValue" : false,
|
||||
"section" : "",
|
||||
"type" : "boolean"
|
||||
},
|
||||
"config" : {
|
||||
"category" : "option",
|
||||
"default" : "",
|
||||
|
|
|
@ -491,7 +491,7 @@ arangodb::Result processJob(arangodb::httpclient::SimpleHttpClient& client,
|
|||
auto file = jobData.directory.writableFile(
|
||||
jobData.name + (jobData.options.clusterMode ? "" : ("_" + hexString)) +
|
||||
".structure.json",
|
||||
true);
|
||||
true, 0, false);
|
||||
if (!::fileOk(file.get())) {
|
||||
return ::fileError(file.get(), true);
|
||||
}
|
||||
|
@ -647,6 +647,12 @@ void DumpFeature::collectOptions(std::shared_ptr<options::ProgramOptions> option
|
|||
new StringParameter(&_options.maskingsFile))
|
||||
.setIntroducedIn(30322)
|
||||
.setIntroducedIn(30402);
|
||||
|
||||
options->addOption("--compress-output",
|
||||
"compress files containing collection contents using gzip format",
|
||||
new BooleanParameter(&_options.useGzip))
|
||||
.setIntroducedIn(30406)
|
||||
.setIntroducedIn(30500);
|
||||
}
|
||||
|
||||
void DumpFeature::validateOptions(std::shared_ptr<options::ProgramOptions> options) {
|
||||
|
@ -987,7 +993,7 @@ Result DumpFeature::storeDumpJson(VPackSlice const& body, std::string const& dbN
|
|||
meta.close();
|
||||
|
||||
// save last tick in file
|
||||
auto file = _directory->writableFile("dump.json", true);
|
||||
auto file = _directory->writableFile("dump.json", true, 0, false);
|
||||
if (!::fileOk(file.get())) {
|
||||
return ::fileError(file.get(), true);
|
||||
}
|
||||
|
@ -1018,7 +1024,7 @@ Result DumpFeature::storeViews(VPackSlice const& views) const {
|
|||
std::string fname = nameSlice.copyString();
|
||||
fname.append(".view.json");
|
||||
// save last tick in file
|
||||
auto file = _directory->writableFile(fname, true);
|
||||
auto file = _directory->writableFile(fname, true, 0, false);
|
||||
if (!::fileOk(file.get())) {
|
||||
return ::fileError(file.get(), true);
|
||||
}
|
||||
|
@ -1073,7 +1079,8 @@ void DumpFeature::start() {
|
|||
|
||||
// set up the output directory, not much else
|
||||
_directory = std::make_unique<ManagedDirectory>(_options.outputPath,
|
||||
!_options.overwrite, true);
|
||||
!_options.overwrite, true,
|
||||
_options.useGzip);
|
||||
if (_directory->status().fail()) {
|
||||
switch (_directory->status().errorNumber()) {
|
||||
case TRI_ERROR_FILE_EXISTS:
|
||||
|
|
|
@ -80,6 +80,7 @@ class DumpFeature : public application_features::ApplicationFeature {
|
|||
bool includeSystemCollections{false};
|
||||
bool overwrite{false};
|
||||
bool progress{true};
|
||||
bool useGzip{true};
|
||||
};
|
||||
|
||||
/// @brief Stores stats about the overall dump progress
|
||||
|
|
|
@ -609,6 +609,7 @@ arangodb::Result restoreData(arangodb::httpclient::SimpleHttpClient& httpClient,
|
|||
|
||||
arangodb::Result result;
|
||||
StringBuffer buffer(true);
|
||||
bool isGzip(false);
|
||||
|
||||
VPackSlice const parameters = jobData.collection.get("parameters");
|
||||
std::string const cname =
|
||||
|
@ -618,12 +619,24 @@ arangodb::Result restoreData(arangodb::httpclient::SimpleHttpClient& httpClient,
|
|||
std::string const collectionType(type == 2 ? "document" : "edge");
|
||||
|
||||
// import data. check if we have a datafile
|
||||
// ... there are 4 possible names
|
||||
auto datafile = jobData.directory.readableFile(
|
||||
cname + "_" + arangodb::rest::SslInterface::sslMD5(cname) + ".data.json");
|
||||
if (!datafile || datafile->status().fail()) {
|
||||
datafile = jobData.directory.readableFile(
|
||||
cname + "_" + arangodb::rest::SslInterface::sslMD5(cname) + ".data.json.gz");
|
||||
isGzip = true;
|
||||
} // if
|
||||
if (!datafile || datafile->status().fail()) {
|
||||
datafile = jobData.directory.readableFile(
|
||||
cname + ".data.json.gz");
|
||||
isGzip = true;
|
||||
} // if
|
||||
if (!datafile || datafile->status().fail()) {
|
||||
datafile = jobData.directory.readableFile(cname + ".data.json");
|
||||
isGzip = false;
|
||||
if (!datafile || datafile->status().fail()) {
|
||||
result = {TRI_ERROR_CANNOT_READ_FILE, "could not open data file for collection " + cname + "'"};
|
||||
result = {TRI_ERROR_CANNOT_READ_FILE, "could not open file"};
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
@ -702,11 +715,21 @@ arangodb::Result restoreData(arangodb::httpclient::SimpleHttpClient& httpClient,
|
|||
if (jobData.options.progress && fileSize > 0 &&
|
||||
numReadSinceLastReport > 1024 * 1024 * 8) {
|
||||
// report every 8MB of transferred data
|
||||
// currently do not have unzipped size for .gz files
|
||||
std::stringstream percentage, ofFilesize;
|
||||
if (isGzip) {
|
||||
ofFilesize << "";
|
||||
percentage << "";
|
||||
} else {
|
||||
ofFilesize << " of " << fileSize;
|
||||
percentage << " ("
|
||||
<< int(100. * double(numReadForThisCollection) / double(fileSize)) << " %)";
|
||||
} // else
|
||||
|
||||
LOG_TOPIC("69a73", INFO, Logger::RESTORE)
|
||||
<< "# Still loading data into " << collectionType << " collection '"
|
||||
<< cname << "', " << numReadForThisCollection << " of " << fileSize
|
||||
<< " byte(s) restored ("
|
||||
<< int(100. * double(numReadForThisCollection) / double(fileSize)) << " %)";
|
||||
<< cname << "', " << numReadForThisCollection << ofFilesize.str()
|
||||
<< " byte(s) restored" << percentage.str();
|
||||
numReadSinceLastReport = 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -203,7 +203,7 @@ void writeEncryptionFile(std::string const& directory, std::string& type) {
|
|||
|
||||
namespace arangodb {
|
||||
|
||||
ManagedDirectory::ManagedDirectory(std::string const& path, bool requireEmpty, bool create)
|
||||
ManagedDirectory::ManagedDirectory(std::string const& path, bool requireEmpty, bool create, bool writeGzip)
|
||||
:
|
||||
#ifdef USE_ENTERPRISE
|
||||
_encryptionFeature{
|
||||
|
@ -211,6 +211,7 @@ ManagedDirectory::ManagedDirectory(std::string const& path, bool requireEmpty, b
|
|||
#endif
|
||||
_path{path},
|
||||
_encryptionType{::EncryptionTypeNone},
|
||||
_writeGzip(writeGzip),
|
||||
_status{TRI_ERROR_NO_ERROR} {
|
||||
if (_path.empty()) {
|
||||
_status.reset(TRI_ERROR_BAD_PARAMETER, "must specify a path");
|
||||
|
@ -264,6 +265,11 @@ ManagedDirectory::ManagedDirectory(std::string const& path, bool requireEmpty, b
|
|||
}
|
||||
}
|
||||
|
||||
// currently gzip and encryption are mutually exclusive, encryption wins
|
||||
if (::EncryptionTypeNone != _encryptionType) {
|
||||
_writeGzip = false;
|
||||
} // if
|
||||
|
||||
#ifdef USE_ENTERPRISE
|
||||
::writeEncryptionFile(_path, _encryptionType, _encryptionFeature);
|
||||
#else
|
||||
|
@ -306,8 +312,9 @@ std::unique_ptr<ManagedDirectory::File> ManagedDirectory::readableFile(std::stri
|
|||
}
|
||||
|
||||
try {
|
||||
bool gzFlag = (0 == filename.substr(filename.size() - 3).compare(".gz"));
|
||||
file = std::make_unique<File>(*this, filename,
|
||||
(ManagedDirectory::DefaultReadFlags ^ flags));
|
||||
(ManagedDirectory::DefaultReadFlags ^ flags), gzFlag);
|
||||
} catch (...) {
|
||||
_status.reset(TRI_ERROR_CANNOT_READ_FILE, "error opening file " +
|
||||
::filePath(*this, filename) +
|
||||
|
@ -319,7 +326,7 @@ std::unique_ptr<ManagedDirectory::File> ManagedDirectory::readableFile(std::stri
|
|||
}
|
||||
|
||||
std::unique_ptr<ManagedDirectory::File> ManagedDirectory::writableFile(
|
||||
std::string const& filename, bool overwrite, int flags) {
|
||||
std::string const& filename, bool overwrite, int flags, bool gzipOk) {
|
||||
std::unique_ptr<File> file{nullptr};
|
||||
|
||||
if (_status.fail()) { // directory is in a bad state
|
||||
|
@ -327,8 +334,13 @@ std::unique_ptr<ManagedDirectory::File> ManagedDirectory::writableFile(
|
|||
}
|
||||
|
||||
try {
|
||||
std::string filenameCopy = filename;
|
||||
if (_writeGzip && gzipOk) {
|
||||
filenameCopy.append(".gz");
|
||||
} // if
|
||||
|
||||
// deal with existing file first if it exists
|
||||
auto path = ::filePath(*this, filename);
|
||||
auto path = ::filePath(*this, filenameCopy);
|
||||
bool fileExists = TRI_ExistsFile(path.c_str());
|
||||
if (fileExists) {
|
||||
if (overwrite) {
|
||||
|
@ -341,7 +353,7 @@ std::unique_ptr<ManagedDirectory::File> ManagedDirectory::writableFile(
|
|||
}
|
||||
|
||||
file = std::make_unique<File>(*this, filename,
|
||||
(ManagedDirectory::DefaultWriteFlags ^ flags));
|
||||
(ManagedDirectory::DefaultWriteFlags ^ flags), _writeGzip && gzipOk);
|
||||
} catch (...) {
|
||||
return {nullptr};
|
||||
}
|
||||
|
@ -385,11 +397,14 @@ VPackBuilder ManagedDirectory::vpackFromJsonFile(std::string const& filename) {
|
|||
}
|
||||
|
||||
ManagedDirectory::File::File(ManagedDirectory const& directory,
|
||||
std::string const& filename, int flags)
|
||||
std::string const& filename, int flags,
|
||||
bool isGzip)
|
||||
: _directory{directory},
|
||||
_path{::filePath(_directory, filename)},
|
||||
_flags{flags},
|
||||
_fd{::openFile(_path, _flags)},
|
||||
_gzfd(-1),
|
||||
_gzFile(nullptr),
|
||||
#ifdef USE_ENTERPRISE
|
||||
_context{::getContext(_directory, _fd, _flags)},
|
||||
_status {
|
||||
|
@ -402,10 +417,31 @@ ManagedDirectory::File::File(ManagedDirectory const& directory,
|
|||
#endif
|
||||
{
|
||||
TRI_ASSERT(::flagNotSet(_flags, O_RDWR)); // disallow read/write (encryption)
|
||||
|
||||
if (isGzip) {
|
||||
const char * gzFlags(nullptr);
|
||||
|
||||
// gzip is going to perform a redundant close,
|
||||
// simpler code to give it redundant handle
|
||||
_gzfd = dup(_fd);
|
||||
|
||||
if (O_WRONLY & flags) {
|
||||
gzFlags = "wb";
|
||||
} else {
|
||||
gzFlags = "rb";
|
||||
} // else
|
||||
_gzFile = gzdopen(_gzfd, gzFlags);
|
||||
} // if
|
||||
}
|
||||
|
||||
ManagedDirectory::File::~File() {
|
||||
try {
|
||||
if (_gzfd >=0) {
|
||||
gzclose(_gzFile);
|
||||
_gzfd = -1;
|
||||
_gzFile = nullptr;
|
||||
} // if
|
||||
|
||||
if (_fd >= 0) {
|
||||
::closeFile(_fd, _status);
|
||||
}
|
||||
|
@ -428,11 +464,17 @@ void ManagedDirectory::File::write(char const* data, size_t length) {
|
|||
if (!written) {
|
||||
_status = _context->status();
|
||||
}
|
||||
} else if (isGzip()) {
|
||||
gzwrite(_gzFile, data, length);
|
||||
} else {
|
||||
::rawWrite(_fd, data, length, _status, _path, _flags);
|
||||
}
|
||||
#else
|
||||
::rawWrite(_fd, data, length, _status, _path, _flags);
|
||||
if (isGzip()) {
|
||||
gzwrite(_gzFile, data, length);
|
||||
} else {
|
||||
::rawWrite(_fd, data, length, _status, _path, _flags);
|
||||
} // else
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -448,11 +490,17 @@ ssize_t ManagedDirectory::File::read(char* buffer, size_t length) {
|
|||
if (bytesRead < 0) {
|
||||
_status = _context->status();
|
||||
}
|
||||
} else if (isGzip()) {
|
||||
bytesRead = gzread(_gzFile, buffer, length);
|
||||
} else {
|
||||
bytesRead = ::rawRead(_fd, buffer, length, _status, _path, _flags);
|
||||
}
|
||||
#else
|
||||
bytesRead = ::rawRead(_fd, buffer, length, _status, _path, _flags);
|
||||
if (isGzip()) {
|
||||
bytesRead = gzread(_gzFile, buffer, length);
|
||||
} else {
|
||||
bytesRead = ::rawRead(_fd, buffer, length, _status, _path, _flags);
|
||||
} // else
|
||||
#endif
|
||||
return bytesRead;
|
||||
}
|
||||
|
@ -499,6 +547,12 @@ void ManagedDirectory::File::spit(std::string const& content) {
|
|||
}
|
||||
|
||||
Result const& ManagedDirectory::File::close() {
|
||||
if (_gzfd >=0) {
|
||||
gzclose(_gzFile);
|
||||
_gzfd = -1;
|
||||
_gzFile = nullptr;
|
||||
} // if
|
||||
|
||||
if (_fd >= 0) {
|
||||
::closeFile(_fd, _status);
|
||||
}
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
#ifndef ARANGOSH_UTILS_MANAGED_DIRECTORY_H
|
||||
#define ARANGOSH_UTILS_MANAGED_DIRECTORY_H 1
|
||||
|
||||
#include "zlib.h"
|
||||
|
||||
#include <velocypack/Builder.h>
|
||||
#include <velocypack/Parser.h>
|
||||
#include <velocypack/velocypack-aliases.h>
|
||||
|
@ -60,8 +62,9 @@ class ManagedDirectory {
|
|||
* @param directory A reference to the containing directory
|
||||
* @param filename The name of the file within the directory
|
||||
* @param flags The flags to pass to the OS to open the file
|
||||
* @param isGzip True if reads/writes should go through gzip functions
|
||||
*/
|
||||
File(ManagedDirectory const& directory, std::string const& filename, int flags);
|
||||
File(ManagedDirectory const& directory, std::string const& filename, int flags, bool isGzip);
|
||||
/**
|
||||
* @brief Closes the file if it is still open
|
||||
*/
|
||||
|
@ -113,11 +116,19 @@ class ManagedDirectory {
|
|||
*/
|
||||
Result const& close();
|
||||
|
||||
/**
|
||||
* @brief Closes file (now, as opposed to when the object is destroyed)
|
||||
* @return Reference to file status
|
||||
*/
|
||||
bool isGzip() const {return -1 != _gzfd;}
|
||||
|
||||
private:
|
||||
ManagedDirectory const& _directory;
|
||||
std::string _path;
|
||||
int _flags;
|
||||
int _fd;
|
||||
int _gzfd; // duplicate fd for gzip close
|
||||
gzFile _gzFile;
|
||||
#ifdef USE_ENTERPRISE
|
||||
std::unique_ptr<EncryptionFeature::Context> _context;
|
||||
#endif
|
||||
|
@ -139,8 +150,9 @@ class ManagedDirectory {
|
|||
* @param path The path to the directory
|
||||
* @param requireEmpty If `true`, opening a non-empty directory will fail
|
||||
* @param create If `true` and directory does not exist, create it
|
||||
* @param writeGzip True if writes should use gzip (reads autodetect .gz)
|
||||
*/
|
||||
ManagedDirectory(std::string const& path, bool requireEmpty, bool create);
|
||||
ManagedDirectory(std::string const& path, bool requireEmpty, bool create, bool writeGzip = true);
|
||||
~ManagedDirectory();
|
||||
|
||||
public:
|
||||
|
@ -204,10 +216,11 @@ class ManagedDirectory {
|
|||
* @param name The filename, relative to the directory
|
||||
* @param overwrite Whether to overwrite file if it exists (otherwise fail)
|
||||
* @param flags Flags (will be XORed with `DefaultWriteFlags`
|
||||
* @param gzipOk Flag whether this file is suitable for gzip (when enabled)
|
||||
* @return Unique pointer to file, if opened
|
||||
*/
|
||||
std::unique_ptr<File> writableFile(std::string const& filename,
|
||||
bool overwrite, int flags = 0);
|
||||
bool overwrite, int flags = 0, bool gzipOk = true );
|
||||
|
||||
/**
|
||||
* @brief Write a string to file
|
||||
|
@ -236,6 +249,7 @@ class ManagedDirectory {
|
|||
#endif
|
||||
std::string const _path;
|
||||
std::string _encryptionType;
|
||||
bool _writeGzip;
|
||||
Result _status;
|
||||
};
|
||||
} // namespace arangodb
|
||||
|
|
Loading…
Reference in New Issue