Bug fix/issues 27052019 (#9117)

2019-05-28 12:08:25 +02:00 · 2019-05-28 12:08:25 +02:00 · 033f411420
parent 27112a3486
commit 033f411420
7 changed files with 94 additions and 36 deletions
--- a/3rdParty/velocypack/include/velocypack/Builder.h
+++ b/3rdParty/velocypack/include/velocypack/Builder.h
@ -76,13 +76,6 @@ class Builder {
  // object(s).
 
 private:
-  // A struct for sorting index tables for objects:
-  struct SortEntry {
-    uint8_t const* nameStart;
-    uint64_t nameSize;
-    uint64_t offset;
-  };
-
  std::shared_ptr<Buffer<uint8_t>> _buffer;  // Here we collect the result
  Buffer<uint8_t>* _bufferPtr;      // used for quicker access than shared_ptr
  uint8_t* _start;                  // Always points to the start of _buffer
@ -91,8 +84,6 @@ class Builder {
                                    // open objects/arrays
  std::vector<std::vector<ValueLength>> _index;  // Indices for starts
                                                 // of subindex
-  // temporary buffer used for sorting medium to big objects
-  std::vector<Builder::SortEntry> _sortEntries; 
  bool _keyWritten;  // indicates that in the current object the key
                     // has been written but the value not yet

--- a/3rdParty/velocypack/include/velocypack/velocypack-common.h
+++ b/3rdParty/velocypack/include/velocypack/velocypack-common.h
@ -78,6 +78,13 @@
 #define VELOCYPACK_UNUSED /* unused */
 #endif

+// attribute used to force inlining of functions
+#if defined(__GNUC__) || defined(__clang__)
+#define VELOCYPACK_FORCE_INLINE inline __attribute__((__always_inline__))
+#elif _WIN32
+#define VELOCYPACK_FORCE_INLINE __forceinline
+#endif
+
 #ifndef VELOCYPACK_XXHASH
 #ifndef VELOCYPACK_FASTHASH
 #define VELOCYPACK_XXHASH
@ -118,7 +125,7 @@ bool assemblerFunctionsDisabled();
 std::size_t checkOverflow(ValueLength);
 #else
 // on a 64 bit platform, the following function is probably a no-op
-static inline constexpr std::size_t checkOverflow(ValueLength length) noexcept {
+static VELOCYPACK_FORCE_INLINE constexpr std::size_t checkOverflow(ValueLength length) noexcept {
  return static_cast<std::size_t>(length);
 }
 #endif
--- a/3rdParty/velocypack/src/Builder.cpp
+++ b/3rdParty/velocypack/src/Builder.cpp
@ -37,6 +37,25 @@
 using namespace arangodb::velocypack;

 namespace {
+  
+// struct used when sorting index tables for objects:
+struct SortEntry {
+  uint8_t const* nameStart;
+  uint64_t nameSize;
+  uint64_t offset;
+};
+
+// minimum allocation done for the sortEntries vector
+// this is used to overallocate memory so we can avoid some follow-up
+// reallocations
+constexpr size_t minSortEntriesAllocation = 32;
+
+// thread-local, reusable buffer used for sorting medium to big index entries
+thread_local std::vector<SortEntry> sortEntries; 
+
+// thread-local, reusable set to track usage of duplicate keys
+thread_local std::unordered_set<StringRef> duplicateKeys;
+
 // Find the actual bytes of the attribute name of the VPack value
 // at position base, also determine the length len of the attribute.
 // This takes into account the different possibilities for the format
@ -229,20 +248,22 @@ void Builder::sortObjectIndexShort(uint8_t* objBase,

 void Builder::sortObjectIndexLong(uint8_t* objBase,
                                  std::vector<ValueLength>& offsets) {
-  _sortEntries.clear();
+  // start with clean sheet in case the previous run left something
+  // in the vector (e.g. when bailing out with an exception)
+  ::sortEntries.clear();

  std::size_t const n = offsets.size();
  VELOCYPACK_ASSERT(n > 1);
-  _sortEntries.reserve(n);
+  ::sortEntries.reserve(std::max(::minSortEntriesAllocation, n));
  for (std::size_t i = 0; i < n; i++) {
    SortEntry e;
    e.offset = offsets[i];
    e.nameStart = ::findAttrName(objBase + e.offset, e.nameSize);
-    _sortEntries.push_back(e);
+    ::sortEntries.push_back(e);
  }
-  VELOCYPACK_ASSERT(_sortEntries.size() == n);
-  std::sort(_sortEntries.begin(), _sortEntries.end(), [](SortEntry const& a, 
-                                                         SortEntry const& b) 
+  VELOCYPACK_ASSERT(::sortEntries.size() == n);
+  std::sort(::sortEntries.begin(), ::sortEntries.end(), [](SortEntry const& a, 
+                                                           SortEntry const& b) 
 #ifdef VELOCYPACK_64BIT
    noexcept
 #endif
@ -258,9 +279,19 @@ void Builder::sortObjectIndexLong(uint8_t* objBase,

  // copy back the sorted offsets
  for (std::size_t i = 0; i < n; i++) {
-    offsets[i] = _sortEntries[i].offset;
+    offsets[i] = ::sortEntries[i].offset;
+  }
+
+  if (::sortEntries.capacity() >= 4096) {
+    // if we use around 100kb or more of memory, try to free up some memory
+    if (::sortEntries.size() >= ::minSortEntriesAllocation) {
+      // leave 32 elements in the vector, so we can hopefully avoid some reallocations later
+      ::sortEntries.erase(::sortEntries.begin() + ::minSortEntriesAllocation, ::sortEntries.end());
+    } else {
+      ::sortEntries.clear();
+    }
+    ::sortEntries.shrink_to_fit();
  }
-  _sortEntries.clear();
 }

 Builder& Builder::closeEmptyArrayOrObject(ValueLength tos, bool isArray) {
@ -1042,17 +1073,24 @@ bool Builder::checkAttributeUniquenessUnsorted(Slice obj) const {
      it.next();
    } while (it.valid());
  } else {
-    std::unordered_set<StringRef> keys;
+    ::duplicateKeys.clear();
    do {
      Slice const key = it.key(true);
      // key(true) guarantees a String as returned type
      VELOCYPACK_ASSERT(key.isString());
-      if (VELOCYPACK_UNLIKELY(!keys.emplace(key).second)) {
+      if (VELOCYPACK_UNLIKELY(!::duplicateKeys.emplace(key).second)) {
        // identical key
        return false;
      }
      it.next();
    } while (it.valid());
+ 
+    // reclaim a bit of memory already if we have tracked a lot of keys.
+    // this will not free the set's top-level, but should free up the elements
+    // in the set
+    if (::duplicateKeys.size() >= 4096) {
+      ::duplicateKeys.clear();
+    }
  }
  
  // all keys unique
--- a/3rdParty/velocypack/src/asm-utf8check.h
+++ b/3rdParty/velocypack/src/asm-utf8check.h
@ -21,8 +21,8 @@
 #ifndef VELOCYPACK_ASM_UTF8CHECK_H
 #define VELOCYPACK_ASM_UTF8CHECK_H

-#include <cstdint>
 #include <cstddef>
+#include <cstdint>

 namespace arangodb {
 namespace velocypack {
--- a/lib/ApplicationFeatures/V8SecurityFeature.cpp
+++ b/lib/ApplicationFeatures/V8SecurityFeature.cpp
@ -77,28 +77,30 @@ std::string canonicalpath(std::string const& path) {
  return path;
 }

-void convertToSingleExpression(std::vector<std::string> const& files, std::string& targetRegex) {
-  if (files.empty()) {
+void convertToSingleExpression(std::vector<std::string> const& values, std::string& targetRegex) {
+  if (values.empty()) {
    return;
  }

-  targetRegex = arangodb::basics::StringUtils::join(files, '|');
+  targetRegex = "(" + arangodb::basics::StringUtils::join(values, '|') + ")";
 }

-void convertToSingleExpression(std::unordered_set<std::string> const& files,
+void convertToSingleExpression(std::unordered_set<std::string> const& values,
                               std::string& targetRegex) {
  // does not delete from the set
-  if (files.empty()) {
+  if (values.empty()) {
    return;
  }
-  auto last = *files.cbegin();
+  auto last = *values.cbegin();

  std::stringstream ss;
-  for (auto fileIt = std::next(files.cbegin()); fileIt != files.cend(); ++fileIt) {
-    ss << *fileIt << "|";
+  ss << "(";
+  for (auto it = std::next(values.cbegin()); it != values.cend(); ++it) {
+    ss << *it << "|";
  }

  ss << last;
+  ss << ")";
  targetRegex = ss.str();
 }

@ -282,12 +284,18 @@ void V8SecurityFeature::start() {
 }

 void V8SecurityFeature::dumpAccessLists() const {
-  LOG_TOPIC("2cafe", DEBUG, arangodb::Logger::SECURITY) << "files whitelisted by user:" << _filesWhitelist;
-  LOG_TOPIC("2bad4", DEBUG, arangodb::Logger::SECURITY) << "interal read whitelist:" << _readWhitelist;
-  LOG_TOPIC("beef2", DEBUG, arangodb::Logger::SECURITY) << "internal write whitelist:" << _writeWhitelist;
+  LOG_TOPIC("2cafe", DEBUG, arangodb::Logger::SECURITY) 
+    << "files whitelisted by user:" << _filesWhitelist
+    << ", internal read whitelist:" << _readWhitelist
+    << ", internal write whitelist:" << _writeWhitelist
+    << ", internal startup options whitelist:" << _startupOptionsWhitelist 
+    << ", internal startup options blacklist: " << _startupOptionsBlacklist
+    << ", internal environment variable whitelist:" << _environmentVariablesWhitelist 
+    << ", internal environment variables blacklist: " << _environmentVariablesBlacklist
+    << ", internal endpoints whitelist:" << _endpointsWhitelist 
+    << ", internal endpoints blacklist: " << _endpointsBlacklist;
 }

-
 void V8SecurityFeature::addToInternalWhitelist(std::string const& inItem, FSAccessType type) {
  // This function is not efficient and we would not need the _readWhitelist
  // to be persistent. But the persistence will help in debugging and
--- a/lib/Basics/datetime.cpp
+++ b/lib/Basics/datetime.cpp
@ -521,7 +521,7 @@ std::regex const iso8601Regex(
    "("
    "z|Z|"  // trailing Z or start of timezone
    "(\\+|\\-)"
-    "\\d\\d\\:\\d\\d"  // timezone hh:mm
+    "\\d?\\d\\:\\d\\d"  // timezone hh:mm
    ")?"
    ")|"
    "(z|Z)"  // Z
@ -543,7 +543,7 @@ submatch 10: '34'
 */

 std::regex const timeRegex(
-    "(\\d\\d)\\:(\\d\\d)(\\:(\\d\\d)(\\.(\\d{1,}))?)?((\\+|\\-)(\\d\\d)\\:"
+    "(\\d\\d)\\:(\\d\\d)(\\:(\\d\\d)(\\.(\\d{1,}))?)?((\\+|\\-)(\\d?\\d)\\:"
    "(\\d\\d))?");

 /* REGEX GROUPS
--- a/tests/js/server/aql/aql-functions-date.js
+++ b/tests/js/server/aql/aql-functions-date.js
@ -125,6 +125,16 @@ function ahuacatlDateFunctionsTestSuite () {
        [ "2012-02-12 13:24:12Z", true ],
        [ "2012-02-12 23:59:59.991", true ],
        [ "2012-02-12 23:59:59.991Z", true ],
+        [ "2012-02-12 23:59:59.991+0:00", true ],
+        [ "2012-02-12 23:59:59.991+1:00", true ],
+        [ "2012-02-12 23:59:59.991+01:00", true ],
+        [ "2012-02-12 23:59:59.991+08:30", true ],
+        [ "2012-02-12 23:59:59.991+02:00", true ],
+        [ "2012-02-12 23:59:59.991-0:30", true ],
+        [ "2012-02-12 23:59:59.991-1:30", true ],
+        [ "2012-02-12 23:59:59.991-01:00", true ],
+        [ "2012-02-12 23:59:59.991-08:30", true ],
+        [ "2012-02-12 23:59:59.991-02:00", true ],
        [ "2012-02-12", true ],
        [ "2012-02-12Z", true ],
        [ "2012-02-12T13:24:12Z", true ],
@ -150,6 +160,10 @@ function ahuacatlDateFunctionsTestSuite () {
        [ "6789-12-31T23:59:58.99Z", true ],
        [ "6789-12-31Z", true ],
        [ "9999-12-31T23:59:59.999Z", true ],
+        [ "9999-12-31T23:59:59.999-1:00", true ],
+        [ "9999-12-31T23:59:59.999-01:00", true ],
+        [ "9999-12-31T23:59:59.999+1:00", true ],
+        [ "9999-12-31T23:59:59.999+01:00", true ],
        [ "9999-12-31Z", true ],
        [ "9999-12-31z", true ],
        [ "9999-12-31", true ],