1
0
Fork 0

Bug fix/issues 27052019 (#9117)

This commit is contained in:
Jan 2019-05-28 12:08:25 +02:00 committed by GitHub
parent 27112a3486
commit 033f411420
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 94 additions and 36 deletions

View File

@ -76,13 +76,6 @@ class Builder {
// object(s). // object(s).
private: private:
// A struct for sorting index tables for objects:
struct SortEntry {
uint8_t const* nameStart;
uint64_t nameSize;
uint64_t offset;
};
std::shared_ptr<Buffer<uint8_t>> _buffer; // Here we collect the result std::shared_ptr<Buffer<uint8_t>> _buffer; // Here we collect the result
Buffer<uint8_t>* _bufferPtr; // used for quicker access than shared_ptr Buffer<uint8_t>* _bufferPtr; // used for quicker access than shared_ptr
uint8_t* _start; // Always points to the start of _buffer uint8_t* _start; // Always points to the start of _buffer
@ -91,8 +84,6 @@ class Builder {
// open objects/arrays // open objects/arrays
std::vector<std::vector<ValueLength>> _index; // Indices for starts std::vector<std::vector<ValueLength>> _index; // Indices for starts
// of subindex // of subindex
// temporary buffer used for sorting medium to big objects
std::vector<Builder::SortEntry> _sortEntries;
bool _keyWritten; // indicates that in the current object the key bool _keyWritten; // indicates that in the current object the key
// has been written but the value not yet // has been written but the value not yet

View File

@ -78,6 +78,13 @@
#define VELOCYPACK_UNUSED /* unused */ #define VELOCYPACK_UNUSED /* unused */
#endif #endif
// attribute used to force inlining of functions
#if defined(__GNUC__) || defined(__clang__)
#define VELOCYPACK_FORCE_INLINE inline __attribute__((__always_inline__))
#elif _WIN32
#define VELOCYPACK_FORCE_INLINE __forceinline
#endif
#ifndef VELOCYPACK_XXHASH #ifndef VELOCYPACK_XXHASH
#ifndef VELOCYPACK_FASTHASH #ifndef VELOCYPACK_FASTHASH
#define VELOCYPACK_XXHASH #define VELOCYPACK_XXHASH
@ -118,7 +125,7 @@ bool assemblerFunctionsDisabled();
std::size_t checkOverflow(ValueLength); std::size_t checkOverflow(ValueLength);
#else #else
// on a 64 bit platform, the following function is probably a no-op // on a 64 bit platform, the following function is probably a no-op
static inline constexpr std::size_t checkOverflow(ValueLength length) noexcept { static VELOCYPACK_FORCE_INLINE constexpr std::size_t checkOverflow(ValueLength length) noexcept {
return static_cast<std::size_t>(length); return static_cast<std::size_t>(length);
} }
#endif #endif

View File

@ -37,6 +37,25 @@
using namespace arangodb::velocypack; using namespace arangodb::velocypack;
namespace { namespace {
// struct used when sorting index tables for objects:
struct SortEntry {
uint8_t const* nameStart;
uint64_t nameSize;
uint64_t offset;
};
// minimum allocation done for the sortEntries vector
// this is used to overallocate memory so we can avoid some follow-up
// reallocations
constexpr size_t minSortEntriesAllocation = 32;
// thread-local, reusable buffer used for sorting medium to big index entries
thread_local std::vector<SortEntry> sortEntries;
// thread-local, reusable set to track usage of duplicate keys
thread_local std::unordered_set<StringRef> duplicateKeys;
// Find the actual bytes of the attribute name of the VPack value // Find the actual bytes of the attribute name of the VPack value
// at position base, also determine the length len of the attribute. // at position base, also determine the length len of the attribute.
// This takes into account the different possibilities for the format // This takes into account the different possibilities for the format
@ -229,20 +248,22 @@ void Builder::sortObjectIndexShort(uint8_t* objBase,
void Builder::sortObjectIndexLong(uint8_t* objBase, void Builder::sortObjectIndexLong(uint8_t* objBase,
std::vector<ValueLength>& offsets) { std::vector<ValueLength>& offsets) {
_sortEntries.clear(); // start with clean sheet in case the previous run left something
// in the vector (e.g. when bailing out with an exception)
::sortEntries.clear();
std::size_t const n = offsets.size(); std::size_t const n = offsets.size();
VELOCYPACK_ASSERT(n > 1); VELOCYPACK_ASSERT(n > 1);
_sortEntries.reserve(n); ::sortEntries.reserve(std::max(::minSortEntriesAllocation, n));
for (std::size_t i = 0; i < n; i++) { for (std::size_t i = 0; i < n; i++) {
SortEntry e; SortEntry e;
e.offset = offsets[i]; e.offset = offsets[i];
e.nameStart = ::findAttrName(objBase + e.offset, e.nameSize); e.nameStart = ::findAttrName(objBase + e.offset, e.nameSize);
_sortEntries.push_back(e); ::sortEntries.push_back(e);
} }
VELOCYPACK_ASSERT(_sortEntries.size() == n); VELOCYPACK_ASSERT(::sortEntries.size() == n);
std::sort(_sortEntries.begin(), _sortEntries.end(), [](SortEntry const& a, std::sort(::sortEntries.begin(), ::sortEntries.end(), [](SortEntry const& a,
SortEntry const& b) SortEntry const& b)
#ifdef VELOCYPACK_64BIT #ifdef VELOCYPACK_64BIT
noexcept noexcept
#endif #endif
@ -258,9 +279,19 @@ void Builder::sortObjectIndexLong(uint8_t* objBase,
// copy back the sorted offsets // copy back the sorted offsets
for (std::size_t i = 0; i < n; i++) { for (std::size_t i = 0; i < n; i++) {
offsets[i] = _sortEntries[i].offset; offsets[i] = ::sortEntries[i].offset;
}
if (::sortEntries.capacity() >= 4096) {
// if we use around 100kb or more of memory, try to free up some memory
if (::sortEntries.size() >= ::minSortEntriesAllocation) {
// leave 32 elements in the vector, so we can hopefully avoid some reallocations later
::sortEntries.erase(::sortEntries.begin() + ::minSortEntriesAllocation, ::sortEntries.end());
} else {
::sortEntries.clear();
}
::sortEntries.shrink_to_fit();
} }
_sortEntries.clear();
} }
Builder& Builder::closeEmptyArrayOrObject(ValueLength tos, bool isArray) { Builder& Builder::closeEmptyArrayOrObject(ValueLength tos, bool isArray) {
@ -1042,17 +1073,24 @@ bool Builder::checkAttributeUniquenessUnsorted(Slice obj) const {
it.next(); it.next();
} while (it.valid()); } while (it.valid());
} else { } else {
std::unordered_set<StringRef> keys; ::duplicateKeys.clear();
do { do {
Slice const key = it.key(true); Slice const key = it.key(true);
// key(true) guarantees a String as returned type // key(true) guarantees a String as returned type
VELOCYPACK_ASSERT(key.isString()); VELOCYPACK_ASSERT(key.isString());
if (VELOCYPACK_UNLIKELY(!keys.emplace(key).second)) { if (VELOCYPACK_UNLIKELY(!::duplicateKeys.emplace(key).second)) {
// identical key // identical key
return false; return false;
} }
it.next(); it.next();
} while (it.valid()); } while (it.valid());
// reclaim a bit of memory already if we have tracked a lot of keys.
// this will not free the set's top-level, but should free up the elements
// in the set
if (::duplicateKeys.size() >= 4096) {
::duplicateKeys.clear();
}
} }
// all keys unique // all keys unique

View File

@ -21,8 +21,8 @@
#ifndef VELOCYPACK_ASM_UTF8CHECK_H #ifndef VELOCYPACK_ASM_UTF8CHECK_H
#define VELOCYPACK_ASM_UTF8CHECK_H #define VELOCYPACK_ASM_UTF8CHECK_H
#include <cstdint>
#include <cstddef> #include <cstddef>
#include <cstdint>
namespace arangodb { namespace arangodb {
namespace velocypack { namespace velocypack {

View File

@ -77,28 +77,30 @@ std::string canonicalpath(std::string const& path) {
return path; return path;
} }
void convertToSingleExpression(std::vector<std::string> const& files, std::string& targetRegex) { void convertToSingleExpression(std::vector<std::string> const& values, std::string& targetRegex) {
if (files.empty()) { if (values.empty()) {
return; return;
} }
targetRegex = arangodb::basics::StringUtils::join(files, '|'); targetRegex = "(" + arangodb::basics::StringUtils::join(values, '|') + ")";
} }
void convertToSingleExpression(std::unordered_set<std::string> const& files, void convertToSingleExpression(std::unordered_set<std::string> const& values,
std::string& targetRegex) { std::string& targetRegex) {
// does not delete from the set // does not delete from the set
if (files.empty()) { if (values.empty()) {
return; return;
} }
auto last = *files.cbegin(); auto last = *values.cbegin();
std::stringstream ss; std::stringstream ss;
for (auto fileIt = std::next(files.cbegin()); fileIt != files.cend(); ++fileIt) { ss << "(";
ss << *fileIt << "|"; for (auto it = std::next(values.cbegin()); it != values.cend(); ++it) {
ss << *it << "|";
} }
ss << last; ss << last;
ss << ")";
targetRegex = ss.str(); targetRegex = ss.str();
} }
@ -134,7 +136,7 @@ bool checkBlackAndWhitelist(std::string const& value, bool hasWhitelist,
// we have neither a whitelist nor a blacklist hit => deny // we have neither a whitelist nor a blacklist hit => deny
return false; return false;
} }
// longer match or blacklist wins // longer match or blacklist wins
return white_result[0].length() > black_result[0].length(); return white_result[0].length() > black_result[0].length();
} }
@ -282,12 +284,18 @@ void V8SecurityFeature::start() {
} }
void V8SecurityFeature::dumpAccessLists() const { void V8SecurityFeature::dumpAccessLists() const {
LOG_TOPIC("2cafe", DEBUG, arangodb::Logger::SECURITY) << "files whitelisted by user:" << _filesWhitelist; LOG_TOPIC("2cafe", DEBUG, arangodb::Logger::SECURITY)
LOG_TOPIC("2bad4", DEBUG, arangodb::Logger::SECURITY) << "interal read whitelist:" << _readWhitelist; << "files whitelisted by user:" << _filesWhitelist
LOG_TOPIC("beef2", DEBUG, arangodb::Logger::SECURITY) << "internal write whitelist:" << _writeWhitelist; << ", internal read whitelist:" << _readWhitelist
<< ", internal write whitelist:" << _writeWhitelist
<< ", internal startup options whitelist:" << _startupOptionsWhitelist
<< ", internal startup options blacklist: " << _startupOptionsBlacklist
<< ", internal environment variable whitelist:" << _environmentVariablesWhitelist
<< ", internal environment variables blacklist: " << _environmentVariablesBlacklist
<< ", internal endpoints whitelist:" << _endpointsWhitelist
<< ", internal endpoints blacklist: " << _endpointsBlacklist;
} }
void V8SecurityFeature::addToInternalWhitelist(std::string const& inItem, FSAccessType type) { void V8SecurityFeature::addToInternalWhitelist(std::string const& inItem, FSAccessType type) {
// This function is not efficient and we would not need the _readWhitelist // This function is not efficient and we would not need the _readWhitelist
// to be persistent. But the persistence will help in debugging and // to be persistent. But the persistence will help in debugging and

View File

@ -521,7 +521,7 @@ std::regex const iso8601Regex(
"(" "("
"z|Z|" // trailing Z or start of timezone "z|Z|" // trailing Z or start of timezone
"(\\+|\\-)" "(\\+|\\-)"
"\\d\\d\\:\\d\\d" // timezone hh:mm "\\d?\\d\\:\\d\\d" // timezone hh:mm
")?" ")?"
")|" ")|"
"(z|Z)" // Z "(z|Z)" // Z
@ -543,7 +543,7 @@ submatch 10: '34'
*/ */
std::regex const timeRegex( std::regex const timeRegex(
"(\\d\\d)\\:(\\d\\d)(\\:(\\d\\d)(\\.(\\d{1,}))?)?((\\+|\\-)(\\d\\d)\\:" "(\\d\\d)\\:(\\d\\d)(\\:(\\d\\d)(\\.(\\d{1,}))?)?((\\+|\\-)(\\d?\\d)\\:"
"(\\d\\d))?"); "(\\d\\d))?");
/* REGEX GROUPS /* REGEX GROUPS

View File

@ -125,6 +125,16 @@ function ahuacatlDateFunctionsTestSuite () {
[ "2012-02-12 13:24:12Z", true ], [ "2012-02-12 13:24:12Z", true ],
[ "2012-02-12 23:59:59.991", true ], [ "2012-02-12 23:59:59.991", true ],
[ "2012-02-12 23:59:59.991Z", true ], [ "2012-02-12 23:59:59.991Z", true ],
[ "2012-02-12 23:59:59.991+0:00", true ],
[ "2012-02-12 23:59:59.991+1:00", true ],
[ "2012-02-12 23:59:59.991+01:00", true ],
[ "2012-02-12 23:59:59.991+08:30", true ],
[ "2012-02-12 23:59:59.991+02:00", true ],
[ "2012-02-12 23:59:59.991-0:30", true ],
[ "2012-02-12 23:59:59.991-1:30", true ],
[ "2012-02-12 23:59:59.991-01:00", true ],
[ "2012-02-12 23:59:59.991-08:30", true ],
[ "2012-02-12 23:59:59.991-02:00", true ],
[ "2012-02-12", true ], [ "2012-02-12", true ],
[ "2012-02-12Z", true ], [ "2012-02-12Z", true ],
[ "2012-02-12T13:24:12Z", true ], [ "2012-02-12T13:24:12Z", true ],
@ -150,6 +160,10 @@ function ahuacatlDateFunctionsTestSuite () {
[ "6789-12-31T23:59:58.99Z", true ], [ "6789-12-31T23:59:58.99Z", true ],
[ "6789-12-31Z", true ], [ "6789-12-31Z", true ],
[ "9999-12-31T23:59:59.999Z", true ], [ "9999-12-31T23:59:59.999Z", true ],
[ "9999-12-31T23:59:59.999-1:00", true ],
[ "9999-12-31T23:59:59.999-01:00", true ],
[ "9999-12-31T23:59:59.999+1:00", true ],
[ "9999-12-31T23:59:59.999+01:00", true ],
[ "9999-12-31Z", true ], [ "9999-12-31Z", true ],
[ "9999-12-31z", true ], [ "9999-12-31z", true ],
[ "9999-12-31", true ], [ "9999-12-31", true ],