1
0
Fork 0

Feature/data masking improvments (#8129)

This commit is contained in:
Frank Celler 2019-03-04 10:47:26 +01:00 committed by GitHub
parent 9ee60c3b38
commit 2329dd1ee2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 148 additions and 24 deletions

View File

@ -1,6 +1,8 @@
devel
-----
* added "random" masking to mask any data type, added wildcard masking
* fixed overflow in windows NowNanos in RocksDB
* add "PRUNE <condition>" to AQL Traversals. This allows to early abort searching of

View File

@ -110,6 +110,9 @@ Path
`path` defines which field to obfuscate. There can only be a single
path per masking, but an unlimited amount of maskings per collection.
Note that the top-level system attributes like `_key`, `_from` are
never masked.
To mask a top-level attribute value, the path is simply the attribute
name, for instance `"name"` to mask the value `"foobar"`:
@ -280,6 +283,9 @@ including the elements of the sub-array. The document:
To mask the email address, you could use the paths `email.address`
or `.address`.
### Match all
If the path is `"*"` then this match any leaf attribute.
Masking Functions
-----------------
@ -298,17 +304,19 @@ The following masking functions are only available in the
- [Phone Number](#phone-number)
- [Email Address](#email-address)
The masking function:
The masking functions:
- [Random String](#random-string)
- [Random](#random)
is available in the Community Edition as well as the Enterprise Edition.
are available in the Community Edition as well as the Enterprise Edition.
### Random String
This masking type will replace all values of attributes with key
`name` with an anonymized string. It is not guaranteed that the string
will be of the same length.
This masking type will replace all values of attributes whose values are strings
with key `name` with an anonymized string. It is not guaranteed that the
string will be of the same length. Attribute whose values are not strings
are not modified.
A hash of the original string is computed. If the original string is
shorter then the hash will be used. This will result in a longer
@ -391,6 +399,12 @@ A document like:
}
```
### Random
This masking type works like random string for attributes with string
values. Values Attributes with integer, decimal or boolean values are
replaced by random integers, decimals or boolean.
### Xify Front
This masking type replaces the front characters with `x` and

View File

@ -236,6 +236,7 @@ add_library(${LIB_ARANGO} STATIC
Maskings/Collection.cpp
Maskings/Maskings.cpp
Maskings/Path.cpp
Maskings/RandomMask.cpp
Maskings/RandomStringMask.cpp
ProgramOptions/Option.cpp
ProgramOptions/ProgramOptions.cpp

View File

@ -25,12 +25,14 @@
#include "Basics/StringUtils.h"
#include "Logger/Logger.h"
#include "Maskings/RandomStringMask.h"
#include "Maskings/RandomMask.h"
using namespace arangodb;
using namespace arangodb::maskings;
void arangodb::maskings::InstallMaskings() {
AttributeMasking::installMasking("randomString", RandomStringMask::create);
AttributeMasking::installMasking("random", RandomMask::create);
}
std::unordered_map<std::string, ParseResult<AttributeMasking> (*)(Path, Maskings*, VPackSlice const&)> AttributeMasking::_maskings;

View File

@ -180,17 +180,14 @@ VPackValue Maskings::maskedItem(Collection& collection, std::vector<std::string>
std::string& buffer, VPackSlice const& data) {
static std::string xxxx("xxxx");
if (path.size() == 1) {
if (path[0] == "_key" || path[0] == "_id" || path[0] == "_rev" ||
path[0] == "_from" || path[0] == "_to") {
if (data.isString()) {
velocypack::ValueLength length;
char const* c = data.getString(length);
buffer = std::string(c, length);
return VPackValue(buffer);
} else if (data.isInteger()) {
return VPackValue(data.getInt());
}
if (path.size() == 1 && path[0].size() >= 1 && path[0][0] == '_') {
if (data.isString()) {
velocypack::ValueLength length;
char const* c = data.getString(length);
buffer = std::string(c, length);
return VPackValue(buffer);
} else if (data.isInteger()) {
return VPackValue(data.getInt());
}
}

View File

@ -35,6 +35,12 @@ ParseResult<Path> Path::parse(std::string const& def) {
"path must not be empty");
}
std::vector<std::string> components;
if (def == "*") {
return ParseResult<Path>(Path(false, true, components));
}
bool wildcard = false;
if (def[0] == '.') {
@ -50,7 +56,6 @@ ParseResult<Path> Path::parse(std::string const& def) {
U8_NEXT(p, off, len, ch);
}
std::vector<std::string> components;
std::string buffer;
while (off < len) {
@ -68,7 +73,7 @@ ParseResult<Path> Path::parse(std::string const& def) {
components.push_back(buffer);
buffer.clear();
} else if (ch == 96 || ch == 180) { // windows does not like U'`' and U'´'
} else if (ch == 96 || ch == 180) { // windows does not like U'`' and U'´'
UChar32 quote = ch;
U8_NEXT(p, off, len, ch);
@ -116,13 +121,17 @@ ParseResult<Path> Path::parse(std::string const& def) {
"path '" + def + "' contains no component");
}
return ParseResult<Path>(Path(wildcard, components));
return ParseResult<Path>(Path(wildcard, false, components));
}
bool Path::match(std::vector<std::string> const& path) const {
size_t cs = _components.size();
size_t ps = path.size();
if (_any) {
return true;
}
if (!_wildcard) {
if (ps != cs) {
return false;

View File

@ -36,13 +36,14 @@ class Path {
public:
Path() : _wildcard(false) {}
Path(bool wildcard, std::vector<std::string> const& components)
: _wildcard(wildcard), _components(components) {}
Path(bool wildcard, bool any, std::vector<std::string> const& components)
: _wildcard(wildcard), _any(any), _components(components) {}
bool match(std::vector<std::string> const& path) const;
private:
bool _wildcard;
bool _any;
std::vector<std::string> _components;
};
} // namespace maskings

View File

@ -0,0 +1,55 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Frank Celler
////////////////////////////////////////////////////////////////////////////////
#include "RandomMask.h"
#include "Maskings/Maskings.h"
#include "Random/RandomGenerator.h"
using namespace arangodb;
using namespace arangodb::maskings;
ParseResult<AttributeMasking> RandomMask::create(Path path, Maskings* maskings,
VPackSlice const&) {
return ParseResult<AttributeMasking>(AttributeMasking(path, new RandomMask(maskings)));
}
VPackValue RandomMask::mask(bool value, std::string&) const {
int64_t result =
RandomGenerator::interval(static_cast<int64_t>(0), static_cast<int64_t>(1));
return VPackValue(result % 2 == 0);
}
VPackValue RandomMask::mask(int64_t, std::string&) const {
int64_t result = RandomGenerator::interval(static_cast<int64_t>(-1000),
static_cast<int64_t>(1000));
return VPackValue(result);
}
VPackValue RandomMask::mask(double, std::string&) const {
int64_t result = RandomGenerator::interval(static_cast<int64_t>(-1000),
static_cast<int64_t>(1000));
return VPackValue(1.0 * result / 100);
}

45
lib/Maskings/RandomMask.h Normal file
View File

@ -0,0 +1,45 @@
////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2018 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Frank Celler
////////////////////////////////////////////////////////////////////////////////
#ifndef ARANGODB_MASKINGS_ATTRIBUTE_RANDOM_MASK_H
#define ARANGODB_MASKINGS_ATTRIBUTE_RANDOM_MASK_H 1
#include "Maskings/RandomStringMask.h"
namespace arangodb {
namespace maskings {
class RandomMask : public RandomStringMask {
public:
static ParseResult<AttributeMasking> create(Path, Maskings*, VPackSlice const& def);
public:
VPackValue mask(bool, std::string& buffer) const override;
VPackValue mask(int64_t, std::string& buffer) const override;
VPackValue mask(double, std::string& buffer) const override;
private:
explicit RandomMask(Maskings* maskings) : RandomStringMask(maskings) {}
};
} // namespace maskings
} // namespace arangodb
#endif

View File

@ -26,8 +26,6 @@
#include "Basics/fasthash.h"
#include "Maskings/Maskings.h"
static std::string const xxxx("xxxx");
using namespace arangodb;
using namespace arangodb::maskings;

View File

@ -39,7 +39,7 @@ class RandomStringMask : public MaskingFunction {
VPackValue mask(int64_t, std::string& buffer) const override;
VPackValue mask(double, std::string& buffer) const override;
private:
protected:
explicit RandomStringMask(Maskings* maskings) : MaskingFunction(maskings) {}
};
} // namespace maskings