1
0
Fork 0

Bug fix/issues 27052019 (#9117)

This commit is contained in:
Jan 2019-05-28 12:08:25 +02:00 committed by GitHub
parent 27112a3486
commit 033f411420
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 94 additions and 36 deletions

View File

@ -76,13 +76,6 @@ class Builder {
// object(s).
private:
// A struct for sorting index tables for objects:
struct SortEntry {
uint8_t const* nameStart;
uint64_t nameSize;
uint64_t offset;
};
std::shared_ptr<Buffer<uint8_t>> _buffer; // Here we collect the result
Buffer<uint8_t>* _bufferPtr; // used for quicker access than shared_ptr
uint8_t* _start; // Always points to the start of _buffer
@ -91,8 +84,6 @@ class Builder {
// open objects/arrays
std::vector<std::vector<ValueLength>> _index; // Indices for starts
// of subindex
// temporary buffer used for sorting medium to big objects
std::vector<Builder::SortEntry> _sortEntries;
bool _keyWritten; // indicates that in the current object the key
// has been written but the value not yet

View File

@ -78,6 +78,13 @@
#define VELOCYPACK_UNUSED /* unused */
#endif
// attribute used to force inlining of functions
#if defined(__GNUC__) || defined(__clang__)
#define VELOCYPACK_FORCE_INLINE inline __attribute__((__always_inline__))
#elif _WIN32
#define VELOCYPACK_FORCE_INLINE __forceinline
#endif
#ifndef VELOCYPACK_XXHASH
#ifndef VELOCYPACK_FASTHASH
#define VELOCYPACK_XXHASH
@ -118,7 +125,7 @@ bool assemblerFunctionsDisabled();
std::size_t checkOverflow(ValueLength);
#else
// on a 64 bit platform, the following function is probably a no-op
static inline constexpr std::size_t checkOverflow(ValueLength length) noexcept {
static VELOCYPACK_FORCE_INLINE constexpr std::size_t checkOverflow(ValueLength length) noexcept {
return static_cast<std::size_t>(length);
}
#endif

View File

@ -37,6 +37,25 @@
using namespace arangodb::velocypack;
namespace {
// struct used when sorting index tables for objects:
struct SortEntry {
uint8_t const* nameStart;
uint64_t nameSize;
uint64_t offset;
};
// minimum allocation done for the sortEntries vector
// this is used to overallocate memory so we can avoid some follow-up
// reallocations
constexpr size_t minSortEntriesAllocation = 32;
// thread-local, reusable buffer used for sorting medium to big index entries
thread_local std::vector<SortEntry> sortEntries;
// thread-local, reusable set to track usage of duplicate keys
thread_local std::unordered_set<StringRef> duplicateKeys;
// Find the actual bytes of the attribute name of the VPack value
// at position base, also determine the length len of the attribute.
// This takes into account the different possibilities for the format
@ -229,20 +248,22 @@ void Builder::sortObjectIndexShort(uint8_t* objBase,
void Builder::sortObjectIndexLong(uint8_t* objBase,
std::vector<ValueLength>& offsets) {
_sortEntries.clear();
// start with clean sheet in case the previous run left something
// in the vector (e.g. when bailing out with an exception)
::sortEntries.clear();
std::size_t const n = offsets.size();
VELOCYPACK_ASSERT(n > 1);
_sortEntries.reserve(n);
::sortEntries.reserve(std::max(::minSortEntriesAllocation, n));
for (std::size_t i = 0; i < n; i++) {
SortEntry e;
e.offset = offsets[i];
e.nameStart = ::findAttrName(objBase + e.offset, e.nameSize);
_sortEntries.push_back(e);
::sortEntries.push_back(e);
}
VELOCYPACK_ASSERT(_sortEntries.size() == n);
std::sort(_sortEntries.begin(), _sortEntries.end(), [](SortEntry const& a,
SortEntry const& b)
VELOCYPACK_ASSERT(::sortEntries.size() == n);
std::sort(::sortEntries.begin(), ::sortEntries.end(), [](SortEntry const& a,
SortEntry const& b)
#ifdef VELOCYPACK_64BIT
noexcept
#endif
@ -258,9 +279,19 @@ void Builder::sortObjectIndexLong(uint8_t* objBase,
// copy back the sorted offsets
for (std::size_t i = 0; i < n; i++) {
offsets[i] = _sortEntries[i].offset;
offsets[i] = ::sortEntries[i].offset;
}
if (::sortEntries.capacity() >= 4096) {
// if we use around 100kb or more of memory, try to free up some memory
if (::sortEntries.size() >= ::minSortEntriesAllocation) {
// leave 32 elements in the vector, so we can hopefully avoid some reallocations later
::sortEntries.erase(::sortEntries.begin() + ::minSortEntriesAllocation, ::sortEntries.end());
} else {
::sortEntries.clear();
}
::sortEntries.shrink_to_fit();
}
_sortEntries.clear();
}
Builder& Builder::closeEmptyArrayOrObject(ValueLength tos, bool isArray) {
@ -1042,17 +1073,24 @@ bool Builder::checkAttributeUniquenessUnsorted(Slice obj) const {
it.next();
} while (it.valid());
} else {
std::unordered_set<StringRef> keys;
::duplicateKeys.clear();
do {
Slice const key = it.key(true);
// key(true) guarantees a String as returned type
VELOCYPACK_ASSERT(key.isString());
if (VELOCYPACK_UNLIKELY(!keys.emplace(key).second)) {
if (VELOCYPACK_UNLIKELY(!::duplicateKeys.emplace(key).second)) {
// identical key
return false;
}
it.next();
} while (it.valid());
// reclaim a bit of memory already if we have tracked a lot of keys.
// this will not free the set's top-level, but should free up the elements
// in the set
if (::duplicateKeys.size() >= 4096) {
::duplicateKeys.clear();
}
}
// all keys unique

View File

@ -21,8 +21,8 @@
#ifndef VELOCYPACK_ASM_UTF8CHECK_H
#define VELOCYPACK_ASM_UTF8CHECK_H
#include <cstdint>
#include <cstddef>
#include <cstdint>
namespace arangodb {
namespace velocypack {

View File

@ -77,28 +77,30 @@ std::string canonicalpath(std::string const& path) {
return path;
}
void convertToSingleExpression(std::vector<std::string> const& files, std::string& targetRegex) {
if (files.empty()) {
void convertToSingleExpression(std::vector<std::string> const& values, std::string& targetRegex) {
if (values.empty()) {
return;
}
targetRegex = arangodb::basics::StringUtils::join(files, '|');
targetRegex = "(" + arangodb::basics::StringUtils::join(values, '|') + ")";
}
void convertToSingleExpression(std::unordered_set<std::string> const& files,
void convertToSingleExpression(std::unordered_set<std::string> const& values,
std::string& targetRegex) {
// does not delete from the set
if (files.empty()) {
if (values.empty()) {
return;
}
auto last = *files.cbegin();
auto last = *values.cbegin();
std::stringstream ss;
for (auto fileIt = std::next(files.cbegin()); fileIt != files.cend(); ++fileIt) {
ss << *fileIt << "|";
ss << "(";
for (auto it = std::next(values.cbegin()); it != values.cend(); ++it) {
ss << *it << "|";
}
ss << last;
ss << ")";
targetRegex = ss.str();
}
@ -282,12 +284,18 @@ void V8SecurityFeature::start() {
}
void V8SecurityFeature::dumpAccessLists() const {
LOG_TOPIC("2cafe", DEBUG, arangodb::Logger::SECURITY) << "files whitelisted by user:" << _filesWhitelist;
LOG_TOPIC("2bad4", DEBUG, arangodb::Logger::SECURITY) << "interal read whitelist:" << _readWhitelist;
LOG_TOPIC("beef2", DEBUG, arangodb::Logger::SECURITY) << "internal write whitelist:" << _writeWhitelist;
LOG_TOPIC("2cafe", DEBUG, arangodb::Logger::SECURITY)
<< "files whitelisted by user:" << _filesWhitelist
<< ", internal read whitelist:" << _readWhitelist
<< ", internal write whitelist:" << _writeWhitelist
<< ", internal startup options whitelist:" << _startupOptionsWhitelist
<< ", internal startup options blacklist: " << _startupOptionsBlacklist
<< ", internal environment variable whitelist:" << _environmentVariablesWhitelist
<< ", internal environment variables blacklist: " << _environmentVariablesBlacklist
<< ", internal endpoints whitelist:" << _endpointsWhitelist
<< ", internal endpoints blacklist: " << _endpointsBlacklist;
}
void V8SecurityFeature::addToInternalWhitelist(std::string const& inItem, FSAccessType type) {
// This function is not efficient and we would not need the _readWhitelist
// to be persistent. But the persistence will help in debugging and

View File

@ -521,7 +521,7 @@ std::regex const iso8601Regex(
"("
"z|Z|" // trailing Z or start of timezone
"(\\+|\\-)"
"\\d\\d\\:\\d\\d" // timezone hh:mm
"\\d?\\d\\:\\d\\d" // timezone hh:mm
")?"
")|"
"(z|Z)" // Z
@ -543,7 +543,7 @@ submatch 10: '34'
*/
std::regex const timeRegex(
"(\\d\\d)\\:(\\d\\d)(\\:(\\d\\d)(\\.(\\d{1,}))?)?((\\+|\\-)(\\d\\d)\\:"
"(\\d\\d)\\:(\\d\\d)(\\:(\\d\\d)(\\.(\\d{1,}))?)?((\\+|\\-)(\\d?\\d)\\:"
"(\\d\\d))?");
/* REGEX GROUPS

View File

@ -125,6 +125,16 @@ function ahuacatlDateFunctionsTestSuite () {
[ "2012-02-12 13:24:12Z", true ],
[ "2012-02-12 23:59:59.991", true ],
[ "2012-02-12 23:59:59.991Z", true ],
[ "2012-02-12 23:59:59.991+0:00", true ],
[ "2012-02-12 23:59:59.991+1:00", true ],
[ "2012-02-12 23:59:59.991+01:00", true ],
[ "2012-02-12 23:59:59.991+08:30", true ],
[ "2012-02-12 23:59:59.991+02:00", true ],
[ "2012-02-12 23:59:59.991-0:30", true ],
[ "2012-02-12 23:59:59.991-1:30", true ],
[ "2012-02-12 23:59:59.991-01:00", true ],
[ "2012-02-12 23:59:59.991-08:30", true ],
[ "2012-02-12 23:59:59.991-02:00", true ],
[ "2012-02-12", true ],
[ "2012-02-12Z", true ],
[ "2012-02-12T13:24:12Z", true ],
@ -150,6 +160,10 @@ function ahuacatlDateFunctionsTestSuite () {
[ "6789-12-31T23:59:58.99Z", true ],
[ "6789-12-31Z", true ],
[ "9999-12-31T23:59:59.999Z", true ],
[ "9999-12-31T23:59:59.999-1:00", true ],
[ "9999-12-31T23:59:59.999-01:00", true ],
[ "9999-12-31T23:59:59.999+1:00", true ],
[ "9999-12-31T23:59:59.999+01:00", true ],
[ "9999-12-31Z", true ],
[ "9999-12-31z", true ],
[ "9999-12-31", true ],