mirror of https://gitee.com/bigwinds/arangodb
Feature/maskings [WIP] (#7347)
This commit is contained in:
parent
8449a185ec
commit
710ca04fb1
|
@ -2,3 +2,4 @@ BasedOnStyle: Google
|
|||
DerivePointerAlignment: false
|
||||
PointerAlignment: Left
|
||||
Standard: Cpp11
|
||||
SortUsingDeclarations: true
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
Arangodump Data Maskings
|
||||
========================
|
||||
|
||||
*--maskings path-of-config*
|
||||
|
||||
It is possible to mask certain fields during dump. A JSON config file is
|
||||
used to define with fields should be masked and how.
|
||||
|
||||
The general structure of the config file is
|
||||
|
||||
{
|
||||
"collection-name": {
|
||||
"type": MASKING_TYPE
|
||||
"maskings" : [
|
||||
MASKING1,
|
||||
MASKING2,
|
||||
...
|
||||
]
|
||||
},
|
||||
...
|
||||
}
|
||||
|
||||
Masking Types
|
||||
-------------
|
||||
|
||||
This is a string describing how to mask this collection. Possible values are
|
||||
|
||||
- "exclude": the collection is ignored completely and not even the structure data
|
||||
is dumped.
|
||||
|
||||
- "structure": only the collection structure is dumped, but no data at all
|
||||
|
||||
- "masked": the collection structure and all data is dumped. However, the data
|
||||
is subject to maskings defined in the attribute maskings.
|
||||
|
||||
- "full": the collection structure and all data is dumped. No masking at all
|
||||
is done for this collection.
|
||||
|
||||
For example:
|
||||
|
||||
{
|
||||
"private": {
|
||||
"type": "exclude"
|
||||
},
|
||||
|
||||
"log": {
|
||||
"type": "structure"
|
||||
},
|
||||
|
||||
"person": {
|
||||
"type": "masked",
|
||||
"maskings": [
|
||||
{
|
||||
"path": "name",
|
||||
"type": "xify_front",
|
||||
"unmaskedLength": 2
|
||||
},
|
||||
{
|
||||
"path": ".security_id",
|
||||
"type": "xify_front",
|
||||
"unmaskedLength": 2
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
In the example the collection "private" is completely ignored. Only the
|
||||
structure of the collection "log" is dumped, but not the data itself.
|
||||
The collection "person" is dumped completely but masking the "name" field if
|
||||
it occurs on the top-level. It masks the field "security_id" anywhere in the
|
||||
document. See below for a complete description of the parameters of
|
||||
"xify_front".
|
||||
|
||||
Path
|
||||
----
|
||||
|
||||
If the path starts with a `.` then it is considered to be a wildcard match.
|
||||
For example, `.name` will match the attribute name `name` everywhere in the
|
||||
document. `name` will only match at top level. `person.name` will match
|
||||
the attribute `name` in the top-level object `person`.
|
||||
|
||||
If you have a attribute name that contains a dot, you need to quote the
|
||||
name with either a tick or a backtick. For example
|
||||
|
||||
"path": "´name.with.dots´"
|
||||
|
||||
or
|
||||
|
||||
"path": "`name.with.dots`"
|
||||
|
||||
xify_front
|
||||
----------
|
||||
|
||||
This masking replaces characters with `x` and ` `. Alphanumeric characters,
|
||||
`_` and `-` are replaced by `x`, everything else is replaced by ` `.
|
||||
|
||||
{
|
||||
"path": ".name",
|
||||
"unmaskedLength": 2
|
||||
}
|
||||
|
||||
This will mask all alphanumeric characters of a word except the last 2.
|
||||
Words of length 1 and 2 are unmasked. If the attribute value is not a
|
||||
string the result will be `xxxx`.
|
||||
|
||||
"This is a test!Do you agree?"
|
||||
|
||||
will become
|
||||
|
||||
"xxis is a xxst Do xou xxxee "
|
||||
|
||||
There is a catch. If you have an index on the attribute the masking
|
||||
might distort the index efficiency or even cause errors in case of a
|
||||
unique index.
|
||||
|
||||
{
|
||||
"path": ".name",
|
||||
"unmaskedLength": 2,
|
||||
"hash": true
|
||||
}
|
||||
|
||||
This will add a hash at the end of the string.
|
||||
|
||||
"This is a test!Do you agree?"
|
||||
|
||||
will become
|
||||
|
||||
"xxis is a xxst Do xou xxxee NAATm8c9hVQ="
|
||||
|
||||
Note that the hash is based on a random secrect that is different for
|
||||
each run. This avoids dictionary attacks.
|
||||
|
||||
If you need reproducable results, i.e. hash that do not change between
|
||||
different runs of *arangodump*, you need to specify a seed, which must
|
||||
not be `0`.
|
||||
|
||||
{
|
||||
"path": ".name",
|
||||
"unmaskedLength": 2,
|
||||
"hash": true,
|
||||
"seed": 246781478647
|
||||
}
|
|
@ -71,6 +71,7 @@
|
|||
* [Arangodump](Programs/Arangodump/README.md)
|
||||
* [Examples](Programs/Arangodump/Examples.md)
|
||||
* [Options](Programs/Arangodump/Options.md)
|
||||
* [Maskings](Programs/Arangodump/Maskings.md)
|
||||
* [Limitations](Programs/Arangodump/Limitations.md)
|
||||
* [Arangorestore](Programs/Arangorestore/README.md)
|
||||
* [Examples](Programs/Arangorestore/Examples.md)
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include "Basics/StaticStrings.h"
|
||||
#include "Basics/StringUtils.h"
|
||||
#include "Basics/VelocyPackHelper.h"
|
||||
#include "Maskings/Maskings.h"
|
||||
#include "ProgramOptions/ProgramOptions.h"
|
||||
#include "Random/RandomGenerator.h"
|
||||
#include "Shell/ClientFeature.h"
|
||||
|
@ -223,6 +224,28 @@ bool isIgnoredHiddenEnterpriseCollection(
|
|||
return false;
|
||||
}
|
||||
|
||||
arangodb::Result dumpJsonObjects(arangodb::DumpFeature::JobData& jobData,
|
||||
arangodb::ManagedDirectory::File& file,
|
||||
arangodb::basics::StringBuffer const& body) {
|
||||
arangodb::basics::StringBuffer masked(1, false);
|
||||
arangodb::basics::StringBuffer const* result = &body;
|
||||
|
||||
if (jobData.maskings != nullptr) {
|
||||
jobData.maskings->mask(jobData.name, body, masked);
|
||||
result = &masked;
|
||||
}
|
||||
|
||||
file.write(result->c_str(), result->length());
|
||||
|
||||
if (file.status().fail()) {
|
||||
return {TRI_ERROR_CANNOT_WRITE_FILE};
|
||||
}
|
||||
|
||||
jobData.stats.totalWritten += static_cast<uint64_t>(result->length());
|
||||
|
||||
return {TRI_ERROR_NO_ERROR};
|
||||
}
|
||||
|
||||
/// @brief dump the actual data from an individual collection
|
||||
arangodb::Result dumpCollection(arangodb::httpclient::SimpleHttpClient& client,
|
||||
arangodb::DumpFeature::JobData& jobData,
|
||||
|
@ -296,11 +319,11 @@ arangodb::Result dumpCollection(arangodb::httpclient::SimpleHttpClient& client,
|
|||
|
||||
// now actually write retrieved data to dump file
|
||||
arangodb::basics::StringBuffer const& body = response->getBody();
|
||||
file.write(body.c_str(), body.length());
|
||||
if (file.status().fail()) {
|
||||
return {TRI_ERROR_CANNOT_WRITE_FILE};
|
||||
arangodb::Result result = dumpJsonObjects(jobData, file, body);
|
||||
|
||||
if (result.fail()) {
|
||||
return result;
|
||||
}
|
||||
jobData.stats.totalWritten += static_cast<uint64_t>(body.length());
|
||||
|
||||
if (!checkMore || fromTick == 0) {
|
||||
// all done, return successful
|
||||
|
@ -392,6 +415,21 @@ arangodb::Result processJob(arangodb::httpclient::SimpleHttpClient& client,
|
|||
|
||||
arangodb::Result result{TRI_ERROR_NO_ERROR};
|
||||
|
||||
bool dumpStructure = true;
|
||||
|
||||
if (dumpStructure && jobData.maskings != nullptr) {
|
||||
dumpStructure = jobData.maskings->shouldDumpStructure(jobData.name);
|
||||
}
|
||||
|
||||
if (!dumpStructure) {
|
||||
if (jobData.options.progress) {
|
||||
LOG_TOPIC(INFO, arangodb::Logger::DUMP)
|
||||
<< "# Dumping collection '" << jobData.name << "'...";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// prep hex string of collection name
|
||||
std::string const hexString(
|
||||
arangodb::rest::SslInterface::sslMD5(jobData.name));
|
||||
|
@ -436,18 +474,26 @@ arangodb::Result processJob(arangodb::httpclient::SimpleHttpClient& client,
|
|||
}
|
||||
}
|
||||
|
||||
if (result.ok() && jobData.options.dumpData) {
|
||||
// save the actual data
|
||||
auto file = jobData.directory.writableFile(
|
||||
jobData.name + "_" + hexString + ".data.json", true);
|
||||
if (!::fileOk(file.get())) {
|
||||
return ::fileError(file.get(), true);
|
||||
if (result.ok()) {
|
||||
bool dumpData = jobData.options.dumpData;
|
||||
|
||||
if (dumpData && jobData.maskings != nullptr) {
|
||||
dumpData = jobData.maskings->shouldDumpData(jobData.name);
|
||||
}
|
||||
|
||||
if (jobData.options.clusterMode) {
|
||||
result = ::handleCollectionCluster(client, jobData, *file);
|
||||
} else {
|
||||
result = ::handleCollection(client, jobData, *file);
|
||||
if (dumpData) {
|
||||
// save the actual data
|
||||
auto file = jobData.directory.writableFile(
|
||||
jobData.name + "_" + hexString + ".data.json", true);
|
||||
if (!::fileOk(file.get())) {
|
||||
return ::fileError(file.get(), true);
|
||||
}
|
||||
|
||||
if (jobData.options.clusterMode) {
|
||||
result = ::handleCollectionCluster(client, jobData, *file);
|
||||
} else {
|
||||
result = ::handleCollection(client, jobData, *file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -467,13 +513,14 @@ void handleJobResult(std::unique_ptr<arangodb::DumpFeature::JobData>&& jobData,
|
|||
namespace arangodb {
|
||||
|
||||
DumpFeature::JobData::JobData(ManagedDirectory& dir, DumpFeature& feat,
|
||||
Options const& opts, Stats& stat,
|
||||
VPackSlice const& info, uint64_t const batch,
|
||||
Options const& opts, maskings::Maskings* maskings,
|
||||
Stats& stat, VPackSlice const& info, uint64_t const batch,
|
||||
std::string const& c, std::string const& n,
|
||||
std::string const& t)
|
||||
: directory{dir},
|
||||
feature{feat},
|
||||
options{opts},
|
||||
maskings{maskings},
|
||||
stats{stat},
|
||||
collectionInfo{info},
|
||||
batchId{batch},
|
||||
|
@ -554,6 +601,9 @@ void DumpFeature::collectOptions(
|
|||
|
||||
options->addOption("--tick-end", "last tick to be included in data dump",
|
||||
new UInt64Parameter(&_options.tickEnd));
|
||||
|
||||
options->addOption("--maskings", "file with maskings definition",
|
||||
new StringParameter(&_options.maskingsFile));
|
||||
}
|
||||
|
||||
void DumpFeature::validateOptions(
|
||||
|
@ -714,7 +764,7 @@ Result DumpFeature::runDump(httpclient::SimpleHttpClient& client,
|
|||
|
||||
// queue job to actually dump collection
|
||||
auto jobData = std::make_unique<JobData>(
|
||||
*_directory, *this, _options, _stats, collection, batchId,
|
||||
*_directory, *this, _options, _maskings.get(), _stats, collection, batchId,
|
||||
std::to_string(cid), name, collectionType);
|
||||
_clientTaskQueue.queueJob(std::move(jobData));
|
||||
}
|
||||
|
@ -853,7 +903,7 @@ Result DumpFeature::runClusterDump(httpclient::SimpleHttpClient& client,
|
|||
|
||||
// queue job to actually dump collection
|
||||
auto jobData = std::make_unique<JobData>(
|
||||
*_directory, *this, _options, _stats, collection, 0 /* batchId */,
|
||||
*_directory, *this, _options, _maskings.get(), _stats, collection, 0 /* batchId */,
|
||||
std::to_string(cid), name, "" /* collectionType */);
|
||||
_clientTaskQueue.queueJob(std::move(jobData));
|
||||
}
|
||||
|
@ -951,8 +1001,18 @@ void DumpFeature::reportError(Result const& error) {
|
|||
}
|
||||
}
|
||||
|
||||
/// @brief main method to run dump
|
||||
void DumpFeature::start() {
|
||||
if (!_options.maskingsFile.empty()) {
|
||||
maskings::MaskingsResult m = maskings::Maskings::fromFile(_options.maskingsFile);
|
||||
|
||||
if (m.status != maskings::MaskingsResult::VALID) {
|
||||
LOG_TOPIC(FATAL, Logger::CONFIG) << m.message;
|
||||
FATAL_ERROR_EXIT();
|
||||
}
|
||||
|
||||
_maskings = std::move(m.maskings);
|
||||
}
|
||||
|
||||
_exitCode = EXIT_SUCCESS;
|
||||
|
||||
// generate a fake client id that we sent to the server
|
||||
|
|
|
@ -25,16 +25,20 @@
|
|||
#define ARANGODB_DUMP_DUMP_FEATURE_H 1
|
||||
|
||||
#include "ApplicationFeatures/ApplicationFeature.h"
|
||||
|
||||
#include "Basics/Mutex.h"
|
||||
#include "Utils/ClientManager.h"
|
||||
#include "Utils/ClientTaskQueue.h"
|
||||
|
||||
namespace arangodb {
|
||||
|
||||
namespace httpclient {
|
||||
class SimpleHttpResult;
|
||||
}
|
||||
|
||||
namespace maskings {
|
||||
class Maskings;
|
||||
}
|
||||
|
||||
class ManagedDirectory;
|
||||
|
||||
class DumpFeature : public application_features::ApplicationFeature {
|
||||
|
@ -64,6 +68,7 @@ class DumpFeature : public application_features::ApplicationFeature {
|
|||
struct Options {
|
||||
std::vector<std::string> collections{};
|
||||
std::string outputPath{};
|
||||
std::string maskingsFile{};
|
||||
uint64_t initialChunkSize{1024 * 1024 * 8};
|
||||
uint64_t maxChunkSize{1024 * 1024 * 64};
|
||||
uint32_t threadCount{2};
|
||||
|
@ -87,13 +92,15 @@ class DumpFeature : public application_features::ApplicationFeature {
|
|||
|
||||
/// @brief Stores all necessary data to dump a single collection or shard
|
||||
struct JobData {
|
||||
JobData(ManagedDirectory&, DumpFeature&, Options const&, Stats&,
|
||||
VPackSlice const&, uint64_t const, std::string const&,
|
||||
std::string const&, std::string const&);
|
||||
JobData(ManagedDirectory&, DumpFeature&, Options const&,
|
||||
maskings::Maskings * maskings, Stats&,VPackSlice const&,
|
||||
uint64_t const, std::string const&, std::string const&,
|
||||
std::string const&);
|
||||
|
||||
ManagedDirectory& directory;
|
||||
DumpFeature& feature;
|
||||
Options const& options;
|
||||
maskings::Maskings* maskings;
|
||||
Stats& stats;
|
||||
|
||||
VPackSlice const collectionInfo;
|
||||
|
@ -112,6 +119,7 @@ class DumpFeature : public application_features::ApplicationFeature {
|
|||
Stats _stats;
|
||||
Mutex _workerErrorLock;
|
||||
std::queue<Result> _workerErrors;
|
||||
std::unique_ptr<maskings::Maskings> _maskings;
|
||||
|
||||
Result runDump(httpclient::SimpleHttpClient& client, std::string const& dbName);
|
||||
Result runClusterDump(httpclient::SimpleHttpClient& client, std::string const& dbName);
|
||||
|
|
|
@ -232,6 +232,11 @@ add_library(${LIB_ARANGO} STATIC
|
|||
Logger/LoggerBufferFeature.cpp
|
||||
Logger/LoggerFeature.cpp
|
||||
Logger/LoggerStream.cpp
|
||||
Maskings/AttributeMasking.cpp
|
||||
Maskings/Collection.cpp
|
||||
Maskings/Maskings.cpp
|
||||
Maskings/Path.cpp
|
||||
Maskings/XifyFront.cpp
|
||||
ProgramOptions/Option.cpp
|
||||
ProgramOptions/ProgramOptions.cpp
|
||||
ProgramOptions/Section.cpp
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "AttributeMasking.h"
|
||||
|
||||
#include "Basics/StringUtils.h"
|
||||
#include "Logger/Logger.h"
|
||||
#include "Maskings/XifyFront.h"
|
||||
|
||||
using namespace arangodb;
|
||||
using namespace arangodb::maskings;
|
||||
|
||||
ParseResult<AttributeMasking> AttributeMasking::parse(Maskings* maskings,
|
||||
VPackSlice const& def) {
|
||||
if (!def.isObject()) {
|
||||
return ParseResult<AttributeMasking>(
|
||||
ParseResult<AttributeMasking>::PARSE_FAILED,
|
||||
"expecting an object for collection definition");
|
||||
}
|
||||
|
||||
std::string path = "";
|
||||
std::string type = "";
|
||||
uint64_t length = 2;
|
||||
uint64_t seed = 0;
|
||||
bool hash = false;
|
||||
|
||||
for (auto const& entry : VPackObjectIterator(def, false)) {
|
||||
std::string key = entry.key.copyString();
|
||||
|
||||
if (key == "type") {
|
||||
if (!entry.value.isString()) {
|
||||
return ParseResult<AttributeMasking>(
|
||||
ParseResult<AttributeMasking>::ILLEGAL_PARAMETER,
|
||||
"type must be a string");
|
||||
}
|
||||
|
||||
type = entry.value.copyString();
|
||||
} else if (key == "path") {
|
||||
if (!entry.value.isString()) {
|
||||
return ParseResult<AttributeMasking>(
|
||||
ParseResult<AttributeMasking>::ILLEGAL_PARAMETER,
|
||||
"path must be a string");
|
||||
}
|
||||
|
||||
path = entry.value.copyString();
|
||||
} else if (key == "unmaskedLength") {
|
||||
if (!entry.value.isInteger()) {
|
||||
return ParseResult<AttributeMasking>(
|
||||
ParseResult<AttributeMasking>::ILLEGAL_PARAMETER,
|
||||
"length must be an integer");
|
||||
}
|
||||
|
||||
length = entry.value.getInt();
|
||||
} else if (key == "hash") {
|
||||
if (!entry.value.isBool()) {
|
||||
return ParseResult<AttributeMasking>(
|
||||
ParseResult<AttributeMasking>::ILLEGAL_PARAMETER,
|
||||
"hash must be an integer");
|
||||
}
|
||||
|
||||
hash = entry.value.getBool();
|
||||
} else if (key == "seed") {
|
||||
if (!entry.value.isInteger()) {
|
||||
return ParseResult<AttributeMasking>(
|
||||
ParseResult<AttributeMasking>::ILLEGAL_PARAMETER,
|
||||
"seed must be an integer");
|
||||
}
|
||||
|
||||
seed = entry.value.getInt();
|
||||
}
|
||||
}
|
||||
|
||||
if (path.empty()) {
|
||||
return ParseResult<AttributeMasking>(
|
||||
ParseResult<AttributeMasking>::ILLEGAL_PARAMETER,
|
||||
"path must not be empty");
|
||||
}
|
||||
|
||||
ParseResult<Path> ap = Path::parse(path);
|
||||
|
||||
if (ap.status != ParseResult<Path>::VALID) {
|
||||
return ParseResult<AttributeMasking>(
|
||||
(ParseResult<AttributeMasking>::StatusCode)(int)ap.status, ap.message);
|
||||
}
|
||||
|
||||
if (type == "xify_front") {
|
||||
if (length < 1) {
|
||||
return ParseResult<AttributeMasking>(
|
||||
ParseResult<AttributeMasking>::ILLEGAL_PARAMETER,
|
||||
"expecting length to be at least for xify_front");
|
||||
}
|
||||
|
||||
return ParseResult<AttributeMasking>(AttributeMasking(
|
||||
ap.result, new XifyFront(maskings, length, hash, seed)));
|
||||
}
|
||||
|
||||
return ParseResult<AttributeMasking>(
|
||||
ParseResult<AttributeMasking>::UNKNOWN_TYPE,
|
||||
"expecting unknown attribute masking type '" + type + "'");
|
||||
}
|
||||
|
||||
bool AttributeMasking::match(std::vector<std::string> const& path) const {
|
||||
return _path.match(path);
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ARANGODB_MASKINGS_ATTRIBUTE_MASKING_H
|
||||
#define ARANGODB_MASKINGS_ATTRIBUTE_MASKING_H 1
|
||||
|
||||
#include "Basics/Common.h"
|
||||
|
||||
#include <velocypack/Builder.h>
|
||||
#include <velocypack/Iterator.h>
|
||||
#include <velocypack/Parser.h>
|
||||
#include <velocypack/Slice.h>
|
||||
#include <velocypack/velocypack-aliases.h>
|
||||
|
||||
#include "Maskings/MaskingFunction.h"
|
||||
#include "Maskings/ParseResult.h"
|
||||
#include "Maskings/Path.h"
|
||||
|
||||
namespace arangodb {
|
||||
namespace maskings {
|
||||
class AttributeMasking {
|
||||
public:
|
||||
static ParseResult<AttributeMasking> parse(Maskings*, VPackSlice const&);
|
||||
|
||||
public:
|
||||
AttributeMasking() = default;
|
||||
|
||||
AttributeMasking(Path const& path, MaskingFunction* func) : _path(path) {
|
||||
_func.reset(func);
|
||||
}
|
||||
|
||||
bool match(std::vector<std::string> const&) const;
|
||||
|
||||
MaskingFunction* func() const { return _func.get(); }
|
||||
|
||||
private:
|
||||
Path _path;
|
||||
std::shared_ptr<MaskingFunction> _func;
|
||||
};
|
||||
} // namespace maskings
|
||||
} // namespace arangodb
|
||||
|
||||
#endif
|
|
@ -0,0 +1,100 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "Collection.h"
|
||||
|
||||
#include "Logger/Logger.h"
|
||||
|
||||
using namespace arangodb;
|
||||
using namespace arangodb::maskings;
|
||||
|
||||
ParseResult<Collection> Collection::parse(Maskings* maskings,
|
||||
VPackSlice const& def) {
|
||||
if (!def.isObject()) {
|
||||
return ParseResult<Collection>(
|
||||
ParseResult<Collection>::PARSE_FAILED,
|
||||
"expecting an object for collection definition");
|
||||
}
|
||||
|
||||
std::string type = "";
|
||||
std::vector<AttributeMasking> attributes;
|
||||
|
||||
for (auto const& entry : VPackObjectIterator(def, false)) {
|
||||
std::string key = entry.key.copyString();
|
||||
|
||||
if (key == "type") {
|
||||
if (!entry.value.isString()) {
|
||||
return ParseResult<Collection>(
|
||||
ParseResult<Collection>::ILLEGAL_PARAMETER,
|
||||
"expecting a string for collection type");
|
||||
}
|
||||
|
||||
type = entry.value.copyString();
|
||||
} else if (key == "maskings") {
|
||||
if (!entry.value.isArray()) {
|
||||
return ParseResult<Collection>(
|
||||
ParseResult<Collection>::ILLEGAL_PARAMETER,
|
||||
"expecting an array for collection maskings");
|
||||
}
|
||||
|
||||
for (auto const& mask : VPackArrayIterator(entry.value)) {
|
||||
ParseResult<AttributeMasking> am =
|
||||
AttributeMasking::parse(maskings, mask);
|
||||
|
||||
if (am.status != ParseResult<AttributeMasking>::VALID) {
|
||||
return ParseResult<Collection>(
|
||||
(ParseResult<Collection>::StatusCode)(int)am.status, am.message);
|
||||
}
|
||||
|
||||
attributes.push_back(am.result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CollectionSelection selection = CollectionSelection::FULL;
|
||||
|
||||
if (type == "full") {
|
||||
selection = CollectionSelection::FULL;
|
||||
} else if (type == "exclude") {
|
||||
selection = CollectionSelection::EXCLUDE;
|
||||
} else if (type == "masked") {
|
||||
selection = CollectionSelection::MASKED;
|
||||
} else if (type == "structure") {
|
||||
selection = CollectionSelection::STRUCTURE;
|
||||
} else {
|
||||
return ParseResult<Collection>(
|
||||
ParseResult<Collection>::UNKNOWN_TYPE,
|
||||
"found unknown collection type '" + type + "'");
|
||||
}
|
||||
|
||||
return ParseResult<Collection>(Collection(selection, attributes));
|
||||
}
|
||||
|
||||
MaskingFunction* Collection::masking(std::vector<std::string> const& path) {
|
||||
for (auto const& m : _maskings) {
|
||||
if (m.match(path)) {
|
||||
return m.func();
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ARANGODB_MASKINGS_COLLECTION_H
|
||||
#define ARANGODB_MASKINGS_COLLECTION_H 1
|
||||
|
||||
#include "Basics/Common.h"
|
||||
|
||||
#include <velocypack/Builder.h>
|
||||
#include <velocypack/Iterator.h>
|
||||
#include <velocypack/Parser.h>
|
||||
#include <velocypack/Slice.h>
|
||||
#include <velocypack/velocypack-aliases.h>
|
||||
|
||||
#include "Maskings/AttributeMasking.h"
|
||||
#include "Maskings/CollectionFilter.h"
|
||||
#include "Maskings/CollectionSelection.h"
|
||||
#include "Maskings/ParseResult.h"
|
||||
|
||||
namespace arangodb {
|
||||
namespace maskings {
|
||||
class Collection {
|
||||
public:
|
||||
static ParseResult<Collection> parse(Maskings* maskings, VPackSlice const&);
|
||||
|
||||
public:
|
||||
Collection() {}
|
||||
|
||||
Collection(CollectionSelection selection,
|
||||
std::vector<AttributeMasking> const& maskings)
|
||||
: _selection(selection), _maskings(maskings) {}
|
||||
|
||||
CollectionSelection selection() const noexcept { return _selection; }
|
||||
|
||||
MaskingFunction* masking(std::vector<std::string> const& path);
|
||||
|
||||
private:
|
||||
CollectionSelection _selection;
|
||||
// LATER: CollectionFilter _filter;
|
||||
std::vector<AttributeMasking> _maskings;
|
||||
};
|
||||
} // namespace maskings
|
||||
} // namespace arangodb
|
||||
|
||||
#endif
|
|
@ -0,0 +1,34 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ARANGODB_MASKINGS_COLLECTION_FILTER_H
|
||||
#define ARANGODB_MASKINGS_COLLECTION_FILTER_H 1
|
||||
|
||||
#include "Basics/Common.h"
|
||||
|
||||
namespace arangodb {
|
||||
namespace maskings {
|
||||
class CollectionFilter {};
|
||||
} // namespace maskings
|
||||
} // namespace arangodb
|
||||
|
||||
#endif
|
|
@ -0,0 +1,34 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ARANGODB_MASKINGS_COLLECTION_SELECTION_H
|
||||
#define ARANGODB_MASKINGS_COLLECTION_SELECTION_H 1
|
||||
|
||||
#include "Basics/Common.h"
|
||||
|
||||
namespace arangodb {
|
||||
namespace maskings {
|
||||
enum class CollectionSelection { FULL, MASKED, EXCLUDE, STRUCTURE };
|
||||
} // namespace maskings
|
||||
} // namespace arangodb
|
||||
|
||||
#endif
|
|
@ -0,0 +1,75 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ARANGODB_MASKINGS_MASKING_FUNCTION_H
|
||||
#define ARANGODB_MASKINGS_MASKING_FUNCTION_H 1
|
||||
|
||||
#include "Basics/Common.h"
|
||||
|
||||
#include <velocypack/Builder.h>
|
||||
#include <velocypack/Iterator.h>
|
||||
#include <velocypack/Parser.h>
|
||||
#include <velocypack/Slice.h>
|
||||
#include <velocypack/velocypack-aliases.h>
|
||||
|
||||
namespace arangodb {
|
||||
namespace maskings {
|
||||
class Maskings;
|
||||
|
||||
class MaskingFunction {
|
||||
public:
|
||||
static bool isNameChar(char c) {
|
||||
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ||
|
||||
('0' <= c && c <= '9') || c == '_' || c == '-';
|
||||
}
|
||||
|
||||
static bool utf8Length(uint8_t c) {
|
||||
if ((c & 0x80) == 0) {
|
||||
return 1;
|
||||
} else if ((c & 0xE0) == 0xC0) {
|
||||
return 2;
|
||||
} else if ((c & 0xF0) == 0xE0) {
|
||||
return 3;
|
||||
} else if ((c & 0xF8) == 0xF0) {
|
||||
return 4;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
public:
|
||||
explicit MaskingFunction(Maskings* maskings) : _maskings(maskings) {}
|
||||
virtual ~MaskingFunction() {}
|
||||
|
||||
public:
|
||||
virtual VPackValue mask(bool) const = 0;
|
||||
virtual VPackValue mask(std::string const&, std::string& buffer) const = 0;
|
||||
virtual VPackValue mask(int64_t) const = 0;
|
||||
virtual VPackValue mask(double) const = 0;
|
||||
|
||||
protected:
|
||||
Maskings* _maskings;
|
||||
};
|
||||
} // namespace maskings
|
||||
} // namespace arangodb
|
||||
|
||||
#endif
|
|
@ -0,0 +1,319 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "Maskings.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "Basics/FileUtils.h"
|
||||
#include "Logger/Logger.h"
|
||||
#include "Random/RandomGenerator.h"
|
||||
|
||||
using namespace arangodb;
|
||||
using namespace arangodb::maskings;
|
||||
|
||||
MaskingsResult Maskings::fromFile(std::string const& filename) {
|
||||
std::string definition;
|
||||
|
||||
try {
|
||||
definition = basics::FileUtils::slurp(filename);
|
||||
} catch (std::exception const& e) {
|
||||
std::string msg =
|
||||
"cannot read maskings file '" + filename + "': " + e.what();
|
||||
LOG_TOPIC(DEBUG, Logger::CONFIG) << msg;
|
||||
|
||||
return MaskingsResult(MaskingsResult::CANNOT_READ_FILE, msg);
|
||||
}
|
||||
|
||||
LOG_TOPIC(DEBUG, Logger::CONFIG) << "found maskings file '" << filename;
|
||||
|
||||
if (definition.empty()) {
|
||||
std::string msg = "maskings file '" + filename + "' is empty";
|
||||
LOG_TOPIC(DEBUG, Logger::CONFIG) << msg;
|
||||
return MaskingsResult(MaskingsResult::CANNOT_READ_FILE, msg);
|
||||
}
|
||||
|
||||
std::unique_ptr<Maskings> maskings(new Maskings{});
|
||||
|
||||
maskings.get()->_randomSeed = RandomGenerator::interval(UINT64_MAX);
|
||||
|
||||
try {
|
||||
std::shared_ptr<VPackBuilder> parsed =
|
||||
velocypack::Parser::fromJson(definition);
|
||||
|
||||
ParseResult<Maskings> res = maskings->parse(parsed->slice());
|
||||
|
||||
if (res.status != ParseResult<Maskings>::VALID) {
|
||||
return MaskingsResult(MaskingsResult::ILLEGAL_DEFINITION, res.message);
|
||||
}
|
||||
|
||||
return MaskingsResult(std::move(maskings));
|
||||
} catch (velocypack::Exception const& e) {
|
||||
std::string msg =
|
||||
"cannot parse maskings file '" + filename + "': " + e.what();
|
||||
LOG_TOPIC(DEBUG, Logger::CONFIG) << msg << ". file content: " << definition;
|
||||
|
||||
return MaskingsResult(MaskingsResult::CANNOT_PARSE_FILE, msg);
|
||||
}
|
||||
}
|
||||
|
||||
ParseResult<Maskings> Maskings::parse(VPackSlice const& def) {
|
||||
if (!def.isObject()) {
|
||||
return ParseResult<Maskings>(ParseResult<Maskings>::DUPLICATE_COLLECTION,
|
||||
"expecting an object for masking definition");
|
||||
}
|
||||
|
||||
for (auto const& entry : VPackObjectIterator(def, false)) {
|
||||
std::string key = entry.key.copyString();
|
||||
LOG_TOPIC(TRACE, Logger::CONFIG) << "masking collection '" << key << "'";
|
||||
|
||||
if (_collections.find(key) != _collections.end()) {
|
||||
return ParseResult<Maskings>(ParseResult<Maskings>::DUPLICATE_COLLECTION,
|
||||
"duplicate collection entry '" + key + "'");
|
||||
}
|
||||
|
||||
ParseResult<Collection> c = Collection::parse(this, entry.value);
|
||||
|
||||
if (c.status != ParseResult<Collection>::VALID) {
|
||||
return ParseResult<Maskings>(
|
||||
(ParseResult<Maskings>::StatusCode)(int)c.status, c.message);
|
||||
}
|
||||
|
||||
_collections[key] = c.result;
|
||||
}
|
||||
|
||||
return ParseResult<Maskings>(ParseResult<Maskings>::VALID);
|
||||
}
|
||||
|
||||
bool Maskings::shouldDumpStructure(std::string const& name) {
|
||||
auto const itr = _collections.find(name);
|
||||
|
||||
if (itr == _collections.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (itr->second.selection()) {
|
||||
case CollectionSelection::FULL:
|
||||
return true;
|
||||
case CollectionSelection::MASKED:
|
||||
return true;
|
||||
case CollectionSelection::EXCLUDE:
|
||||
return false;
|
||||
case CollectionSelection::STRUCTURE:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool Maskings::shouldDumpData(std::string const& name) {
|
||||
auto const itr = _collections.find(name);
|
||||
|
||||
if (itr == _collections.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (itr->second.selection()) {
|
||||
case CollectionSelection::FULL:
|
||||
return true;
|
||||
case CollectionSelection::MASKED:
|
||||
return true;
|
||||
case CollectionSelection::EXCLUDE:
|
||||
return false;
|
||||
case CollectionSelection::STRUCTURE:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
VPackValue Maskings::maskedItem(Collection& collection,
|
||||
std::vector<std::string>& path,
|
||||
std::string& buffer, VPackSlice const& data) {
|
||||
static std::string xxxx("xxxx");
|
||||
|
||||
if (path.size() == 1) {
|
||||
if (path[0] == "_key" || path[0] == "_id" || path[0] == "_rev") {
|
||||
if (data.isString()) {
|
||||
velocypack::ValueLength length;
|
||||
char const* c = data.getString(length);
|
||||
buffer = std::string(c, length);
|
||||
return VPackValue(buffer);
|
||||
} else if (data.isInteger()) {
|
||||
return VPackValue(data.getInt());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MaskingFunction* func = collection.masking(path);
|
||||
|
||||
if (func == nullptr) {
|
||||
if (data.isBool()) {
|
||||
return VPackValue(data.getBool());
|
||||
} else if (data.isString()) {
|
||||
velocypack::ValueLength length;
|
||||
char const* c = data.getString(length);
|
||||
buffer = std::string(c, length);
|
||||
return VPackValue(buffer);
|
||||
} else if (data.isInteger()) {
|
||||
return VPackValue(data.getInt());
|
||||
} else if (data.isDouble()) {
|
||||
return VPackValue(data.getDouble());
|
||||
} else {
|
||||
return VPackValue(VPackValueType::Null);
|
||||
}
|
||||
} else {
|
||||
if (data.isBool()) {
|
||||
return func->mask(data.getBool());
|
||||
} else if (data.isString()) {
|
||||
velocypack::ValueLength length;
|
||||
char const* c = data.getString(length);
|
||||
return func->mask(std::string(c, length), buffer);
|
||||
} else if (data.isInteger()) {
|
||||
return func->mask(data.getInt());
|
||||
} else if (data.isDouble()) {
|
||||
return func->mask(data.getDouble());
|
||||
} else {
|
||||
return VPackValue(VPackValueType::Null);
|
||||
}
|
||||
}
|
||||
|
||||
return VPackValue(xxxx);
|
||||
}
|
||||
|
||||
void Maskings::addMaskedArray(Collection& collection, VPackBuilder& builder,
|
||||
std::vector<std::string>& path,
|
||||
VPackSlice const& data) {
|
||||
for (auto const& entry : VPackArrayIterator(data)) {
|
||||
if (entry.isObject()) {
|
||||
VPackObjectBuilder ob(&builder);
|
||||
addMaskedObject(collection, builder, path, entry);
|
||||
} else if (entry.isArray()) {
|
||||
VPackArrayBuilder ap(&builder);
|
||||
addMaskedArray(collection, builder, path, entry);
|
||||
} else {
|
||||
std::string buffer;
|
||||
builder.add(maskedItem(collection, path, buffer, entry));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Maskings::addMaskedObject(Collection& collection, VPackBuilder& builder,
|
||||
std::vector<std::string>& path,
|
||||
VPackSlice const& data) {
|
||||
for (auto const& entry : VPackObjectIterator(data, false)) {
|
||||
std::string key = entry.key.copyString();
|
||||
VPackSlice const& value = entry.value;
|
||||
|
||||
path.push_back(key);
|
||||
|
||||
if (value.isObject()) {
|
||||
VPackObjectBuilder ob(&builder, key);
|
||||
addMaskedObject(collection, builder, path, value);
|
||||
} else if (value.isArray()) {
|
||||
VPackArrayBuilder ap(&builder, key);
|
||||
addMaskedArray(collection, builder, path, value);
|
||||
} else {
|
||||
std::string buffer;
|
||||
builder.add(key, maskedItem(collection, path, buffer, value));
|
||||
}
|
||||
|
||||
path.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
void Maskings::addMasked(Collection& collection, VPackBuilder& builder,
|
||||
VPackSlice const& data) {
|
||||
if (!data.isObject()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<std::string> path;
|
||||
std::string dataStr("data");
|
||||
VPackObjectBuilder ob(&builder, dataStr);
|
||||
|
||||
addMaskedObject(collection, builder, path, data);
|
||||
}
|
||||
|
||||
void Maskings::addMasked(Collection& collection, basics::StringBuffer& data,
|
||||
VPackSlice const& slice) {
|
||||
if (!slice.isObject()) {
|
||||
return;
|
||||
}
|
||||
|
||||
velocypack::StringRef dataStrRef("data");
|
||||
|
||||
VPackBuilder builder;
|
||||
|
||||
{
|
||||
VPackObjectBuilder ob(&builder);
|
||||
|
||||
for (auto const& entry : VPackObjectIterator(slice, false)) {
|
||||
velocypack::StringRef key = entry.key.stringRef();
|
||||
|
||||
if (key.equals(dataStrRef)) {
|
||||
addMasked(collection, builder, entry.value);
|
||||
} else {
|
||||
builder.add(key, entry.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string masked = builder.toJson();
|
||||
data.appendText(masked);
|
||||
data.appendText("\n");
|
||||
}
|
||||
|
||||
void Maskings::mask(std::string const& name, basics::StringBuffer const& data,
|
||||
basics::StringBuffer& result) {
|
||||
result.clear();
|
||||
|
||||
auto const itr = _collections.find(name);
|
||||
|
||||
if (itr == _collections.end()) {
|
||||
result.copy(data);
|
||||
return;
|
||||
}
|
||||
|
||||
if (itr->second.selection() == CollectionSelection::FULL) {
|
||||
result.copy(data);
|
||||
return;
|
||||
}
|
||||
|
||||
result.reserve(data.length());
|
||||
|
||||
char const* p = data.c_str();
|
||||
char const* e = p + data.length();
|
||||
char const* q = p;
|
||||
|
||||
while (p < e) {
|
||||
while (p < e && (*p != '\n' && *p != '\r')) {
|
||||
++p;
|
||||
}
|
||||
|
||||
std::shared_ptr<VPackBuilder> builder = VPackParser::fromJson(q, p - q);
|
||||
|
||||
addMasked(itr->second, result, builder->slice());
|
||||
|
||||
while (p < e && (*p == '\n' || *p == '\r')) {
|
||||
++p;
|
||||
}
|
||||
|
||||
q = p;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ARANGODB_MASKINGS_MASKINGS_H
|
||||
#define ARANGODB_MASKINGS_MASKINGS_H 1
|
||||
|
||||
#include "Basics/Common.h"
|
||||
|
||||
#include <velocypack/Builder.h>
|
||||
#include <velocypack/Iterator.h>
|
||||
#include <velocypack/Parser.h>
|
||||
#include <velocypack/Slice.h>
|
||||
#include <velocypack/velocypack-aliases.h>
|
||||
|
||||
#include "Basics/StringBuffer.h"
|
||||
#include "Maskings/Collection.h"
|
||||
#include "Maskings/ParseResult.h"
|
||||
|
||||
namespace arangodb {
|
||||
namespace maskings {
|
||||
class Maskings;
|
||||
|
||||
struct MaskingsResult {
|
||||
enum StatusCode : int {
|
||||
VALID,
|
||||
CANNOT_PARSE_FILE,
|
||||
CANNOT_READ_FILE,
|
||||
ILLEGAL_DEFINITION
|
||||
};
|
||||
|
||||
MaskingsResult(StatusCode s, std::string m)
|
||||
: status(s), message(m), maskings(nullptr){};
|
||||
MaskingsResult(std::unique_ptr<Maskings>&& m)
|
||||
: status(StatusCode::VALID), maskings(std::move(m)){};
|
||||
|
||||
StatusCode status;
|
||||
std::string message;
|
||||
std::unique_ptr<Maskings> maskings;
|
||||
};
|
||||
|
||||
class Maskings {
|
||||
public:
|
||||
static MaskingsResult fromFile(std::string const&);
|
||||
|
||||
public:
|
||||
bool shouldDumpStructure(std::string const& name);
|
||||
bool shouldDumpData(std::string const& name);
|
||||
void mask(std::string const& name, basics::StringBuffer const& data,
|
||||
basics::StringBuffer& result);
|
||||
|
||||
uint64_t randomSeed() const noexcept { return _randomSeed; }
|
||||
|
||||
private:
|
||||
ParseResult<Maskings> parse(VPackSlice const&);
|
||||
VPackValue maskedItem(Collection& collection, std::vector<std::string>& path,
|
||||
std::string& buffer, VPackSlice const& data);
|
||||
void addMaskedArray(Collection& collection, VPackBuilder& builder,
|
||||
std::vector<std::string>& path, VPackSlice const& data);
|
||||
void addMaskedObject(Collection& collection, VPackBuilder& builder,
|
||||
std::vector<std::string>& path, VPackSlice const& data);
|
||||
void addMasked(Collection& collection, VPackBuilder& builder,
|
||||
VPackSlice const& data);
|
||||
void addMasked(Collection& collection, basics::StringBuffer&,
|
||||
VPackSlice const& data);
|
||||
|
||||
private:
|
||||
std::map<std::string, Collection> _collections;
|
||||
uint64_t _randomSeed = 0;
|
||||
};
|
||||
|
||||
} // namespace maskings
|
||||
} // namespace arangodb
|
||||
|
||||
#endif
|
|
@ -0,0 +1,51 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ARANGODB_MASKINGS_PARSE_RESULT_H
|
||||
#define ARANGODB_MASKINGS_PARSE_RESULT_H
|
||||
|
||||
#include "Basics/Common.h"
|
||||
|
||||
template <typename T>
|
||||
struct ParseResult {
|
||||
enum StatusCode : int {
|
||||
VALID,
|
||||
PARSE_FAILED,
|
||||
DUPLICATE_COLLECTION,
|
||||
UNKNOWN_TYPE,
|
||||
ILLEGAL_PARAMETER
|
||||
};
|
||||
|
||||
ParseResult(StatusCode status) : status(status) {}
|
||||
|
||||
ParseResult(StatusCode status, std::string message)
|
||||
: status(status), message(message), result(T()) {}
|
||||
|
||||
ParseResult(T&& result)
|
||||
: status(StatusCode::VALID), result(std::move(result)) {}
|
||||
|
||||
StatusCode status;
|
||||
std::string message;
|
||||
T result;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,155 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "Collection.h"
|
||||
|
||||
#include "Basics/StringUtils.h"
|
||||
#include "Logger/Logger.h"
|
||||
|
||||
using namespace arangodb;
|
||||
using namespace arangodb::maskings;
|
||||
|
||||
ParseResult<Path> Path::parse(std::string const& def) {
|
||||
if (def.empty()) {
|
||||
return ParseResult<Path>(ParseResult<Path>::ILLEGAL_PARAMETER,
|
||||
"path must not be empty");
|
||||
}
|
||||
|
||||
bool wildcard = false;
|
||||
|
||||
if (def[0] == '.') {
|
||||
wildcard = true;
|
||||
}
|
||||
|
||||
char const* p = def.c_str();
|
||||
char const* e = p + def.size();
|
||||
|
||||
if (wildcard) {
|
||||
++p;
|
||||
}
|
||||
|
||||
std::vector<std::string> components;
|
||||
std::string buffer;
|
||||
|
||||
while (p < e) {
|
||||
if (*p == '.') {
|
||||
if (buffer.size() == 0) {
|
||||
return ParseResult<Path>(
|
||||
ParseResult<Path>::ILLEGAL_PARAMETER,
|
||||
"path '" + def + "' contains an empty component");
|
||||
}
|
||||
|
||||
++p;
|
||||
components.push_back(buffer);
|
||||
buffer.clear();
|
||||
} else if (*p == 96) { // backtick `
|
||||
++p;
|
||||
|
||||
while (p < e && *p != 96) {
|
||||
buffer.push_back(*p++);
|
||||
}
|
||||
|
||||
if (p == e) {
|
||||
return ParseResult<Path>(
|
||||
ParseResult<Path>::ILLEGAL_PARAMETER,
|
||||
"path '" + def + "' contains an unbalanced quote");
|
||||
}
|
||||
|
||||
++p;
|
||||
} else if (p[0] == -62 &&
|
||||
p[1] == -76) { // there is also a 0 at *e, so p[1] is ok
|
||||
p += 2;
|
||||
|
||||
while (p < e - 1 && (p[0] != -62 || p[1] != -76)) {
|
||||
buffer.push_back(*p++);
|
||||
}
|
||||
|
||||
if (p == e) {
|
||||
return ParseResult<Path>(
|
||||
ParseResult<Path>::ILLEGAL_PARAMETER,
|
||||
"path '" + def + "' contains an unbalanced quote");
|
||||
}
|
||||
|
||||
p += 2;
|
||||
} else if (p[0] == -76 &&
|
||||
p[1] == -62) { // there is also a 0 at *e, so p[1] is ok
|
||||
p += 2;
|
||||
|
||||
while (p < e - 1 && (p[0] != -76 || p[1] != -62)) {
|
||||
buffer.push_back(*p++);
|
||||
}
|
||||
|
||||
if (p == e) {
|
||||
return ParseResult<Path>(
|
||||
ParseResult<Path>::ILLEGAL_PARAMETER,
|
||||
"path '" + def + "' contains an unbalanced quote");
|
||||
}
|
||||
|
||||
p += 2;
|
||||
} else {
|
||||
buffer.push_back(*p++);
|
||||
}
|
||||
}
|
||||
|
||||
if (buffer.size() == 0) {
|
||||
return ParseResult<Path>(ParseResult<Path>::ILLEGAL_PARAMETER,
|
||||
"path '" + def + "' contains an empty component");
|
||||
}
|
||||
|
||||
components.push_back(buffer);
|
||||
|
||||
if (components.empty()) {
|
||||
return ParseResult<Path>(ParseResult<Path>::ILLEGAL_PARAMETER,
|
||||
"path '" + def + "' contains no component");
|
||||
}
|
||||
|
||||
return ParseResult<Path>(Path(wildcard, components));
|
||||
}
|
||||
|
||||
bool Path::match(std::vector<std::string> const& path) const {
|
||||
size_t cs = _components.size();
|
||||
size_t ps = path.size();
|
||||
|
||||
if (!_wildcard) {
|
||||
if (ps != cs) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (ps < cs) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t pi = ps;
|
||||
size_t ci = cs;
|
||||
|
||||
while (0 < ci) {
|
||||
if (path[pi - 1] != _components[ci - 1]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
--pi;
|
||||
--ci;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ARANGODB_MASKINGS_PATH_H
|
||||
#define ARANGODB_MASKINGS_PATH_H 1
|
||||
|
||||
#include "Basics/Common.h"
|
||||
|
||||
#include "Maskings/ParseResult.h"
|
||||
|
||||
namespace arangodb {
|
||||
namespace maskings {
|
||||
class Path {
|
||||
public:
|
||||
static ParseResult<Path> parse(std::string const&);
|
||||
|
||||
public:
|
||||
Path() : _wildcard(false) {}
|
||||
|
||||
Path(bool wildcard, std::vector<std::string> const& components)
|
||||
: _wildcard(wildcard), _components(components) {}
|
||||
|
||||
bool match(std::vector<std::string> const& path) const;
|
||||
|
||||
private:
|
||||
bool _wildcard;
|
||||
std::vector<std::string> _components;
|
||||
};
|
||||
} // namespace maskings
|
||||
} // namespace arangodb
|
||||
|
||||
#endif
|
|
@ -0,0 +1,92 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "XifyFront.h"
|
||||
|
||||
#include "Basics/StringUtils.h"
|
||||
#include "Basics/fasthash.h"
|
||||
#include "Maskings/Maskings.h"
|
||||
|
||||
static std::string const xxxx("xxxx");
|
||||
|
||||
using namespace arangodb;
|
||||
using namespace arangodb::maskings;
|
||||
|
||||
VPackValue XifyFront::mask(bool) const { return VPackValue(xxxx); }
|
||||
|
||||
VPackValue XifyFront::mask(std::string const& data, std::string& buffer) const {
|
||||
char const* p = data.c_str();
|
||||
char const* q = p;
|
||||
char const* e = p + data.size();
|
||||
|
||||
buffer.clear();
|
||||
buffer.reserve(data.size());
|
||||
|
||||
while (p < e) {
|
||||
while (p < e && isNameChar(*p)) {
|
||||
++p;
|
||||
}
|
||||
|
||||
if (p != q) {
|
||||
char const* w = p - _length;
|
||||
|
||||
while (q < w) {
|
||||
buffer.push_back('x');
|
||||
++q;
|
||||
}
|
||||
|
||||
while (q < p) {
|
||||
buffer.push_back(*q);
|
||||
++q;
|
||||
}
|
||||
}
|
||||
|
||||
while (p < e && !isNameChar(*p)) {
|
||||
buffer.push_back(' ');
|
||||
++p;
|
||||
}
|
||||
|
||||
q = p;
|
||||
}
|
||||
|
||||
if (_hash) {
|
||||
uint64_t hash;
|
||||
|
||||
if (_randomSeed == 0) {
|
||||
hash = fasthash64(data.c_str(), data.size(), _maskings->randomSeed());
|
||||
} else {
|
||||
hash = fasthash64(data.c_str(), data.size(), _randomSeed);
|
||||
}
|
||||
|
||||
std::string hash64 =
|
||||
basics::StringUtils::encodeBase64(std::string((char const*)&hash, 8));
|
||||
|
||||
buffer.push_back(' ');
|
||||
buffer.append(hash64);
|
||||
}
|
||||
|
||||
return VPackValue(buffer);
|
||||
}
|
||||
|
||||
VPackValue XifyFront::mask(int64_t) const { return VPackValue(xxxx); }
|
||||
|
||||
VPackValue XifyFront::mask(double) const { return VPackValue(xxxx); }
|
|
@ -0,0 +1,51 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
/// DISCLAIMER
|
||||
///
|
||||
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
/// you may not use this file except in compliance with the License.
|
||||
/// You may obtain a copy of the License at
|
||||
///
|
||||
/// http://www.apache.org/licenses/LICENSE-2.0
|
||||
///
|
||||
/// Unless required by applicable law or agreed to in writing, software
|
||||
/// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
/// See the License for the specific language governing permissions and
|
||||
/// limitations under the License.
|
||||
///
|
||||
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
///
|
||||
/// @author Frank Celler
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ARANGODB_MASKINGS_ATTRIBUTE_XIFY_FRONT_H
|
||||
#define ARANGODB_MASKINGS_ATTRIBUTE_XIFY_FRONT_H 1
|
||||
|
||||
#include "Maskings/MaskingFunction.h"
|
||||
|
||||
namespace arangodb {
|
||||
namespace maskings {
|
||||
class XifyFront : public MaskingFunction {
|
||||
public:
|
||||
XifyFront(Maskings* maskings, int64_t length, bool hash, uint64_t seed)
|
||||
: MaskingFunction(maskings),
|
||||
_length((uint64_t)length),
|
||||
_randomSeed(seed),
|
||||
_hash(hash) {}
|
||||
|
||||
VPackValue mask(bool) const override;
|
||||
VPackValue mask(std::string const&, std::string& buffer) const override;
|
||||
VPackValue mask(int64_t) const override;
|
||||
VPackValue mask(double) const override;
|
||||
|
||||
private:
|
||||
uint64_t _length;
|
||||
uint64_t _randomSeed;
|
||||
bool _hash;
|
||||
};
|
||||
} // namespace maskings
|
||||
} // namespace arangodb
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue