From a36e4827c7166f4b14f1b8374602fb2d4aed5b8e Mon Sep 17 00:00:00 2001 From: Jan Steemann Date: Thu, 10 Dec 2015 10:43:59 +0100 Subject: [PATCH 01/13] build fix --- arangod/CMakeLists.txt | 3 ++- arangosh/CMakeLists.txt | 2 ++ lib/Basics/JsonHelper.h | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arangod/CMakeLists.txt b/arangod/CMakeLists.txt index e203475bae..f11921c793 100644 --- a/arangod/CMakeLists.txt +++ b/arangod/CMakeLists.txt @@ -30,6 +30,8 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin") ### @brief arangod ################################################################################ +include_directories(../3rdParty/velocypack/include) + if (MSVC) SET(ARANGO_MSVC RestServer/WindowsServiceUtils.cpp @@ -50,7 +52,6 @@ if (MSVC) ) endif () - add_executable( ${BIN_ARANGOD} ${ARANGO_MSVC} diff --git a/arangosh/CMakeLists.txt b/arangosh/CMakeLists.txt index 13061985d7..1a050685ae 100644 --- a/arangosh/CMakeLists.txt +++ b/arangosh/CMakeLists.txt @@ -21,6 +21,8 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin") ### @brief arangob ################################################################################ +include_directories(../3rdParty/velocypack/include) + if (MSVC) generate_product_version( ProductVersionFiles_arangob diff --git a/lib/Basics/JsonHelper.h b/lib/Basics/JsonHelper.h index 6cecd622a0..42cb831d3a 100644 --- a/lib/Basics/JsonHelper.h +++ b/lib/Basics/JsonHelper.h @@ -35,8 +35,8 @@ #include "Basics/json.h" #include "Basics/StringBuffer.h" -#include "velocypack/Parser.h" #include "velocypack/Builder.h" +#include "velocypack/Parser.h" namespace triagens { namespace basics { From abb39b8c64cfa3991466e0634ce431f36a059f59 Mon Sep 17 00:00:00 2001 From: Jan Steemann Date: Thu, 10 Dec 2015 10:44:17 +0100 Subject: [PATCH 02/13] fix warnings in Visual Studio --- 3rdParty/velocypack/include/velocypack/Slice.h | 12 ++++++------ .../include/velocypack/velocypack-common.h | 10 ++++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/3rdParty/velocypack/include/velocypack/Slice.h b/3rdParty/velocypack/include/velocypack/Slice.h index d781c151f3..a46f949ade 100644 --- a/3rdParty/velocypack/include/velocypack/Slice.h +++ b/3rdParty/velocypack/include/velocypack/Slice.h @@ -393,16 +393,16 @@ class Slice { // signed integral type if (isDouble()) { auto v = getDouble(); - if (v < static_cast(std::numeric_limits::min()) || - v > static_cast(std::numeric_limits::max())) { + if (v < static_cast((std::numeric_limits::min)()) || + v > static_cast((std::numeric_limits::max)())) { throw Exception(Exception::NumberOutOfRange); } return static_cast(v); } int64_t v = getInt(); - if (v < static_cast(std::numeric_limits::min()) || - v > static_cast(std::numeric_limits::max())) { + if (v < static_cast((std::numeric_limits::min)()) || + v > static_cast((std::numeric_limits::max)())) { throw Exception(Exception::NumberOutOfRange); } return static_cast(v); @@ -411,14 +411,14 @@ class Slice { if (isDouble()) { auto v = getDouble(); if (v < 0.0 || v > static_cast(UINT64_MAX) || - v > static_cast(std::numeric_limits::max())) { + v > static_cast((std::numeric_limits::max)())) { throw Exception(Exception::NumberOutOfRange); } return static_cast(v); } uint64_t v = getUInt(); - if (v > static_cast(std::numeric_limits::max())) { + if (v > static_cast((std::numeric_limits::max)())) { throw Exception(Exception::NumberOutOfRange); } return static_cast(v); diff --git a/3rdParty/velocypack/include/velocypack/velocypack-common.h b/3rdParty/velocypack/include/velocypack/velocypack-common.h index 46f7f4e82b..a2ce591a65 100644 --- a/3rdParty/velocypack/include/velocypack/velocypack-common.h +++ b/3rdParty/velocypack/include/velocypack/velocypack-common.h @@ -183,6 +183,12 @@ static inline void storeUInt64(uint8_t* start, uint64_t value) throw() { } while (start < end); } +#ifdef _WIN32 +// turn off warnings about unimplemented exception specifications +#pragma warning(push) +#pragma warning(disable : 4290) +#endif + struct NoHeapAllocation { void* operator new(std::size_t) throw(std::bad_alloc) = delete; void operator delete(void*) throw() = delete; @@ -190,6 +196,10 @@ struct NoHeapAllocation { void operator delete[](void*) throw() = delete; }; +#ifdef _WIN32 +#pragma warning(pop) +#endif + } // namespace arangodb::velocypack } // namespace arangodb From 399c44278e293ce4977d2bcf8e057561c02f2e47 Mon Sep 17 00:00:00 2001 From: Wilfried Goesgens Date: Thu, 10 Dec 2015 11:09:21 +0100 Subject: [PATCH 03/13] Add left-to-right and right-to-left guards around the hebrew bye message --- lib/Basics/messages.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/Basics/messages.h b/lib/Basics/messages.h index 1498c9528d..7de6c57fcd 100644 --- a/lib/Basics/messages.h +++ b/lib/Basics/messages.h @@ -39,6 +39,8 @@ /// @brief bye bye message //////////////////////////////////////////////////////////////////////////////// +#define TRI_UNICODE_LRM "\xE2\x80\x8E" +#define TRI_UNICODE_RLM "\xE2\x80\x8F" #define TRI_BYE_MESSAGE_CH "Uf wiederluege!" #define TRI_BYE_MESSAGE_CZ "Na shledanou!" #define TRI_BYE_MESSAGE_DE "Auf Wiedersehen!" @@ -47,8 +49,8 @@ #define TRI_BYE_MESSAGE_ES "¡Hasta luego!" #define TRI_BYE_MESSAGE_FR "Au revoir!" #define TRI_BYE_MESSAGE_GR "Εις το επανιδείν!" -#define TRI_BYE_MESSAGE_IT "Arrivederci!" #define TRI_BYE_MESSAGE_IL "להתראות!" +#define TRI_BYE_MESSAGE_IT "Arrivederci!" #define TRI_BYE_MESSAGE_JP "さようなら" #define TRI_BYE_MESSAGE_NL "Tot ziens!" #define TRI_BYE_MESSAGE_RU "До свидания!" @@ -63,8 +65,8 @@ TRI_BYE_MESSAGE_EO " " \ TRI_BYE_MESSAGE_ES " " \ TRI_BYE_MESSAGE_FR "\n" \ + TRI_UNICODE_RLM TRI_BYE_MESSAGE_IL TRI_UNICODE_LRM \ TRI_BYE_MESSAGE_IT " " \ - TRI_BYE_MESSAGE_IL " " \ TRI_BYE_MESSAGE_NL " " \ TRI_BYE_MESSAGE_SV " " \ TRI_BYE_MESSAGE_GR " " \ From 58cf3a60cdd351473c0be5ab47d00ebfb4da9d6b Mon Sep 17 00:00:00 2001 From: Alan Plum Date: Thu, 10 Dec 2015 12:02:29 +0100 Subject: [PATCH 04/13] Don't use Foxx.Repository for system sessions Fetching the collection dynamically helps avoid an error when the collection is removed and recreated after the app is mounted. --- .../system/_system/sessions/APP/storage.js | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/js/apps/system/_system/sessions/APP/storage.js b/js/apps/system/_system/sessions/APP/storage.js index d9dc15666a..14f79a1346 100644 --- a/js/apps/system/_system/sessions/APP/storage.js +++ b/js/apps/system/_system/sessions/APP/storage.js @@ -7,6 +7,10 @@ const db = arangodb.db; const Foxx = require('org/arangodb/foxx'); const errors = require('./errors'); +function getCollection() { + return db._collection('_sessions'); +} + const Session = Foxx.Model.extend({ schema: { _key: joi.string().required(), @@ -19,11 +23,6 @@ const Session = Foxx.Model.extend({ } }); -const sessions = new Foxx.Repository( - db._collection('_sessions'), - {model: Session} -); - function generateSessionId() { return internal.genRandomAlphaNumbers(20); } @@ -37,13 +36,13 @@ function createSession(sessionData, userData) { userData: userData || {}, lastAccess: Date.now() }); - sessions.save(session); + getCollection().save(session.attributes); return session; } function deleteSession(sid) { try { - sessions.removeById(sid); + getCollection().remove(sid); } catch (e) { if ( e instanceof arangodb.ArangoError @@ -58,15 +57,16 @@ function deleteSession(sid) { } Session.fromClient = function (sid) { + const collection = getCollection(); let session; db._executeTransaction({ collections: { - read: [sessions.collection.name()], - write: [sessions.collection.name()] + read: [collection.name()], + write: [collection.name()] }, action() { try { - session = sessions.byId(sid); + session = new Session(collection.document(sid)); const internalAccessTime = internal.accessSid(sid); if (internalAccessTime) { @@ -76,10 +76,11 @@ Session.fromClient = function (sid) { const now = Date.now(); session.set('lastAccess', now); - sessions.collection.update( + const meta = collection.update( session.get('_key'), {lastAccess: now} ); + session.set(meta); } catch (e) { if ( e instanceof arangodb.ArangoError @@ -132,7 +133,8 @@ _.extend(Session.prototype, { this.set('lastAccess', now); this.set('lastUpdate', now); internal.accessSid(key); - sessions.replace(this); + const meta = getCollection().replace(this.attributes, this.attributes); + this.set(meta); return this; }, delete() { From a7fa6b395ab797f6dcb8d4a8fb86df110f233bd0 Mon Sep 17 00:00:00 2001 From: Alan Plum Date: Thu, 10 Dec 2015 12:08:01 +0100 Subject: [PATCH 05/13] Add session fix to changelog --- CHANGELOG | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index deb0c3046b..20514e3642 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -271,6 +271,9 @@ v2.7.3 (XXXX-XX-XX) and drop and (re-)create collection markers were located in the same WAL file +* fixed an issue where overwriting the system sessions collection would break + the web interface when authentication is enabled + v2.7.2 (2015-12-01) ------------------- From a469e388552c38b0c9ea66a5d7d20bac4511b5cd Mon Sep 17 00:00:00 2001 From: Frank Celler Date: Thu, 10 Dec 2015 15:10:20 +0100 Subject: [PATCH 06/13] added new lib (fasthash and xxhash) --- LICENSES-OTHER-COMPONENTS.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/LICENSES-OTHER-COMPONENTS.md b/LICENSES-OTHER-COMPONENTS.md index c009da9cbf..6f8e7ab666 100644 --- a/LICENSES-OTHER-COMPONENTS.md +++ b/LICENSES-OTHER-COMPONENTS.md @@ -13,6 +13,16 @@ * GITHUB: https://github.com/night-shift/fpconv/ * License: [MIT License](https://github.com/night-shift/fpconv/blob/master/license) +### fasthash + +* Project Home: https://code.google.com/p/fast-hash/ +* License: [MIT License](https://code.google.com/p/fast-hash/) + +### xxhash + +* Project Home: https://code.google.com/p/xxhash/ +* License: [BSD 2-Clause License](https://code.google.com/p/xxhash/) + ### Google V8 4.3.61 * Project Home: https://code.google.com/p/v8/ From 105ac4d56246fafac5600776b1a530b416ea24f7 Mon Sep 17 00:00:00 2001 From: Max Neunhoeffer Date: Thu, 10 Dec 2015 15:28:18 +0100 Subject: [PATCH 07/13] Remove xxhash. --- lib/Basics/xxhash.cpp | 483 ------------------------------------------ lib/Basics/xxhash.h | 162 -------------- lib/CMakeLists.txt | 1 - lib/Makefile.files | 1 - 4 files changed, 647 deletions(-) delete mode 100644 lib/Basics/xxhash.cpp delete mode 100644 lib/Basics/xxhash.h diff --git a/lib/Basics/xxhash.cpp b/lib/Basics/xxhash.cpp deleted file mode 100644 index 8d8d8f07dc..0000000000 --- a/lib/Basics/xxhash.cpp +++ /dev/null @@ -1,483 +0,0 @@ -/* -xxHash - Fast Hash algorithm -Copyright (C) 2012-2014, Yann Collet. -BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -You can contact the author at : -- xxHash source repository : http://code.google.com/p/xxhash/ -*/ - - -//************************************** -// Tuning parameters -//************************************** -// Unaligned memory access is automatically enabled for "common" CPU, such as x86. -// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected. -// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance. -// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32). -#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) -# define XXH_USE_UNALIGNED_ACCESS 1 -#endif - -// XXH_ACCEPT_NULL_INPUT_POINTER : -// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. -// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. -// This option has a very small performance cost (only measurable on small inputs). -// By default, this option is disabled. To enable it, uncomment below define : -//#define XXH_ACCEPT_NULL_INPUT_POINTER 1 - -// XXH_FORCE_NATIVE_FORMAT : -// By default, xxHash library provides endian-independent Hash values, based on little-endian convention. -// Results are therefore identical for little-endian and big-endian CPU. -// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. -// Should endian-independence be of no importance for your application, you may set the #define below to 1. -// It will improve speed for Big-endian CPU. -// This option has no impact on Little_Endian CPU. -#define XXH_FORCE_NATIVE_FORMAT 0 - - -//************************************** -// Compiler Specific Options -//************************************** -// Disable some Visual warning messages -#ifdef _MSC_VER // Visual Studio -# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant -#endif - -#ifdef _MSC_VER // Visual Studio -# define FORCE_INLINE static __forceinline -#else -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -#endif - - -//************************************** -// Includes & Memory related functions -//************************************** -#include "xxhash.h" -// Modify the local functions below should you wish to use some other memory related routines -// for malloc(), free() -#include -FORCE_INLINE void* XXH_malloc(size_t s) { return malloc(s); } -FORCE_INLINE void XXH_free (void* p) { free(p); } -// for memcpy() -#include -FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } - - -//************************************** -// Basic Types -//************************************** -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -#else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; -#endif - -#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) -# define _PACKED __attribute__ ((packed)) -#else -# define _PACKED -#endif - -#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# ifdef __IBMC__ -# pragma pack(1) -# else -# pragma pack(push, 1) -# endif -#endif - -typedef struct _U32_S { U32 v; } _PACKED U32_S; - -#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# pragma pack(pop) -#endif - -#define A32(x) (((U32_S *)(x))->v) - - -//*************************************** -// Compiler-specific Functions and Macros -//*************************************** -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -// Note : although _rotl exists for minGW (GCC under windows), performance seems poor -#if defined(_MSC_VER) -# define XXH_rotl32(x,r) _rotl(x,r) -#else -# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) -#endif - -#if defined(_MSC_VER) // Visual Studio -# define XXH_swap32 _byteswap_ulong -#elif GCC_VERSION >= 403 -# define XXH_swap32 __builtin_bswap32 -#else -static inline U32 XXH_swap32 (U32 x) { - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff );} -#endif - - -//************************************** -// Constants -//************************************** -#define PRIME32_1 2654435761U -#define PRIME32_2 2246822519U -#define PRIME32_3 3266489917U -#define PRIME32_4 668265263U -#define PRIME32_5 374761393U - - -//************************************** -// Architecture Macros -//************************************** -typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; -#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch - static const int one = 1; -# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one)) -#endif - - -//************************************** -// Macros -//************************************** -#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations - - -//**************************** -// Memory reads -//**************************** -typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; - -FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); - else - return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); -} - -FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } - - -//**************************** -// Simple Hash Functions -//**************************** -FORCE_INLINE U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - U32 h32; - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; } -#endif - - if (len>=16) - { - const BYTE* const limit = bEnd - 16; - U32 v1 = seed + PRIME32_1 + PRIME32_2; - U32 v2 = seed + PRIME32_2; - U32 v3 = seed + 0; - U32 v4 = seed - PRIME32_1; - - do - { - v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; - } while (p<=limit); - - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); - } - else - { - h32 = seed + PRIME32_5; - } - - h32 += (U32) len; - - while (p<=bEnd-4) - { - h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} - - -U32 XXH32(const void* input, int len, U32 seed) -{ -#if 0 - // Simple version, good for code maintenance, but unfortunately slow for small inputs - void* state = XXH32_init(seed); - XXH32_update(state, input, len); - return XXH32_digest(state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - -# if !defined(XXH_USE_UNALIGNED_ACCESS) - if ((((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage - { - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } -# endif - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} - - -//**************************** -// Advanced Hash Functions -//**************************** - -struct XXH_state32_t -{ - U64 total_len; - U32 seed; - U32 v1; - U32 v2; - U32 v3; - U32 v4; - int memsize; - char memory[16]; -}; - - -int XXH32_sizeofState() -{ -// XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough - return sizeof(struct XXH_state32_t); -} - - -XXH_errorcode XXH32_resetState(void* state_in, U32 seed) -{ - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; - state->seed = seed; - state->v1 = seed + PRIME32_1 + PRIME32_2; - state->v2 = seed + PRIME32_2; - state->v3 = seed + 0; - state->v4 = seed - PRIME32_1; - state->total_len = 0; - state->memsize = 0; - return XXH_OK; -} - - -void* XXH32_init (U32 seed) -{ - void* state = XXH_malloc (sizeof(struct XXH_state32_t)); - XXH32_resetState(state, seed); - return state; -} - - -FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) -{ - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; -#endif - - state->total_len += len; - - if (state->memsize + len < 16) // fill in tmp buffer - { - XXH_memcpy(state->memory + state->memsize, input, len); - state->memsize += len; - return XXH_OK; - } - - if (state->memsize) // some data left from previous update - { - XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); - { - const U32* p32 = (const U32*)state->memory; - state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; - state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; - state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; - state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; - } - p += 16-state->memsize; - state->memsize = 0; - } - - if (p <= bEnd-16) - { - const BYTE* const limit = bEnd - 16; - U32 v1 = state->v1; - U32 v2 = state->v2; - U32 v3 = state->v3; - U32 v4 = state->v4; - - do - { - v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; - } while (p<=limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < bEnd) - { - XXH_memcpy(state->memory, p, bEnd-p); - state->memsize = (int)(bEnd-p); - } - - return XXH_OK; -} - -XXH_errorcode XXH32_update (void* state_in, const void* input, int len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH32_update_endian(state_in, input, len, XXH_bigEndian); -} - - - -FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian) -{ - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; - const BYTE * p = (const BYTE*)state->memory; - BYTE* bEnd = (BYTE*)state->memory + state->memsize; - U32 h32; - - if (state->total_len >= 16) - { - h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); - } - else - { - h32 = state->seed + PRIME32_5; - } - - h32 += (U32) state->total_len; - - while (p<=bEnd-4) - { - h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} - - -U32 XXH32_intermediateDigest (void* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian); - else - return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian); -} - - -U32 XXH32_digest (void* state_in) -{ - U32 h32 = XXH32_intermediateDigest(state_in); - - XXH_free(state_in); - - return h32; -} -// ----------------------------------------------------------------------------- -// --SECTION-- END-OF-FILE -// ----------------------------------------------------------------------------- - -// Local Variables: -// mode: outline-minor -// outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}" -// End: diff --git a/lib/Basics/xxhash.h b/lib/Basics/xxhash.h deleted file mode 100644 index c407c1f3b8..0000000000 --- a/lib/Basics/xxhash.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - xxHash - Fast Hash algorithm - Header File - Copyright (C) 2012-2014, Yann Collet. - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - xxHash source repository : http://code.google.com/p/xxhash/ -*/ - -/* Notice extracted from xxHash homepage : - -xxHash is an extremely fast Hash algorithm, running at RAM speed limits. -It also successfully passes all tests from the SMHasher suite. - -Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) - -Name Speed Q.Score Author -xxHash 5.4 GB/s 10 -CrapWow 3.2 GB/s 2 Andrew -MumurHash 3a 2.7 GB/s 10 Austin Appleby -SpookyHash 2.0 GB/s 10 Bob Jenkins -SBox 1.4 GB/s 9 Bret Mulvey -Lookup3 1.2 GB/s 9 Bob Jenkins -SuperFastHash 1.2 GB/s 1 Paul Hsieh -CityHash64 1.05 GB/s 10 Pike & Alakuijala -FNV 0.55 GB/s 5 Fowler, Noll, Vo -CRC32 0.43 GB/s 9 -MD5-32 0.33 GB/s 10 Ronald L. Rivest -SHA1-32 0.28 GB/s 10 - -Q.Score is a measure of quality of the hash function. -It depends on successfully passing SMHasher test set. -10 is a perfect score. -*/ - -#pragma once - -//**************************** -// Type -//**************************** -typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; - - - -//**************************** -// Simple Hash Functions -//**************************** - -unsigned int XXH32 (const void* input, int len, unsigned int seed); - -/* -XXH32() : - Calculate the 32-bits hash of sequence of length "len" stored at memory address "input". - The memory between input & input+len must be valid (allocated and read-accessible). - "seed" can be used to alter the result predictably. - This function successfully passes all SMHasher tests. - Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s - Note that "len" is type "int", which means it is limited to 2^31-1. - If your data is larger, use the advanced functions below. -*/ - - - -//**************************** -// Advanced Hash Functions -//**************************** - -void* XXH32_init (unsigned int seed); -XXH_errorcode XXH32_update (void* state, const void* input, int len); -unsigned int XXH32_digest (void* state); - -/* -These functions calculate the xxhash of an input provided in several small packets, -as opposed to an input provided as a single block. - -It must be started with : -void* XXH32_init() -The function returns a pointer which holds the state of calculation. - -This pointer must be provided as "void* state" parameter for XXH32_update(). -XXH32_update() can be called as many times as necessary. -The user must provide a valid (allocated) input. -The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. -Note that "len" is type "int", which means it is limited to 2^31-1. -If your data is larger, it is recommended to chunk your data into blocks -of size for example 2^30 (1GB) to avoid any "int" overflow issue. - -Finally, you can end the calculation anytime, by using XXH32_digest(). -This function returns the final 32-bits hash. -You must provide the same "void* state" parameter created by XXH32_init(). -Memory will be freed by XXH32_digest(). -*/ - - -int XXH32_sizeofState(void); -XXH_errorcode XXH32_resetState(void* state, unsigned int seed); - -#define XXH32_SIZEOFSTATE 48 -typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t; -/* -These functions allow user application to make its own allocation for state. - -XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state. -Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer. -This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state. - -For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()), -use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields. -*/ - - -unsigned int XXH32_intermediateDigest (void* state); -/* -This function does the same as XXH32_digest(), generating a 32-bit hash, -but preserve memory context. -This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update(). -To free memory context, use XXH32_digest(), or free(). -*/ - - - -//**************************** -// Deprecated function names -//**************************** -// The following translations are provided to ease code transition -// You are encouraged to no longer this function names -#define XXH32_feed XXH32_update -#define XXH32_result XXH32_digest -#define XXH32_getIntermediateResult XXH32_intermediateDigest - -// ----------------------------------------------------------------------------- -// --SECTION-- END-OF-FILE -// ----------------------------------------------------------------------------- - -// Local Variables: -// mode: outline-minor -// outline-regexp: "/// @brief\\|/// {@inheritDoc}\\|/// @page\\|// --SECTION--\\|/// @\\}" -// End: diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 0bf5fccb5c..ab29202efd 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -135,7 +135,6 @@ add_library( Basics/voc-mimetypes.cpp Basics/WriteLocker.cpp Basics/WriteUnlocker.cpp - Basics/xxhash.cpp JsonParser/json-parser.cpp ProgramOptions/program-options.cpp Rest/EndpointList.cpp diff --git a/lib/Makefile.files b/lib/Makefile.files index 88094ef40d..428f3a3a71 100644 --- a/lib/Makefile.files +++ b/lib/Makefile.files @@ -75,7 +75,6 @@ lib_libarango_a_SOURCES = \ lib/Basics/voc-mimetypes.cpp \ lib/Basics/WriteLocker.cpp \ lib/Basics/WriteUnlocker.cpp \ - lib/Basics/xxhash.cpp \ lib/JsonParser/json-parser.cpp \ lib/ProgramOptions/program-options.cpp \ lib/Rest/EndpointList.cpp \ From fcf978a4222bb3ea76d73cef41aa67e8a0e5367c Mon Sep 17 00:00:00 2001 From: Jan Steemann Date: Thu, 10 Dec 2015 17:05:39 +0100 Subject: [PATCH 08/13] do not print overly long string values --- .../frontend/js/modules/common/org/arangodb/aql/explainer.js | 3 +++ js/common/modules/org/arangodb/aql/explainer.js | 3 +++ 2 files changed, 6 insertions(+) diff --git a/js/apps/system/_admin/aardvark/APP/frontend/js/modules/common/org/arangodb/aql/explainer.js b/js/apps/system/_admin/aardvark/APP/frontend/js/modules/common/org/arangodb/aql/explainer.js index ce8b832904..492b68ee01 100644 --- a/js/apps/system/_admin/aardvark/APP/frontend/js/modules/common/org/arangodb/aql/explainer.js +++ b/js/apps/system/_admin/aardvark/APP/frontend/js/modules/common/org/arangodb/aql/explainer.js @@ -66,6 +66,9 @@ function annotation (v) { function value (v) { 'use strict'; + if (typeof v === 'string' && v.length > 1024) { + return colors.COLOR_GREEN + v.substr(0, 1024) + "..." + colors.COLOR_RESET; + } return colors.COLOR_GREEN + v + colors.COLOR_RESET; } diff --git a/js/common/modules/org/arangodb/aql/explainer.js b/js/common/modules/org/arangodb/aql/explainer.js index a4348ecdc7..ea737d1c47 100644 --- a/js/common/modules/org/arangodb/aql/explainer.js +++ b/js/common/modules/org/arangodb/aql/explainer.js @@ -65,6 +65,9 @@ function annotation (v) { function value (v) { 'use strict'; + if (typeof v === 'string' && v.length > 1024) { + return colors.COLOR_GREEN + v.substr(0, 1024) + "..." + colors.COLOR_RESET; + } return colors.COLOR_GREEN + v + colors.COLOR_RESET; } From 09d34087ea08cb2b7d8de1fae859c4bee703ca09 Mon Sep 17 00:00:00 2001 From: Jan Steemann Date: Thu, 10 Dec 2015 17:07:25 +0100 Subject: [PATCH 09/13] rename ditch type --- arangod/VocBase/Ditch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arangod/VocBase/Ditch.h b/arangod/VocBase/Ditch.h index 5e6f7055e3..a1b141cf16 100644 --- a/arangod/VocBase/Ditch.h +++ b/arangod/VocBase/Ditch.h @@ -195,7 +195,7 @@ namespace triagens { } char const* typeName () const override final { - return "document"; + return "document-reference"; } void setUsedByTransaction (); From 9f408a81ee206c756b1c90cba4b75ab726a86a92 Mon Sep 17 00:00:00 2001 From: Frank Celler Date: Thu, 10 Dec 2015 18:35:39 +0100 Subject: [PATCH 10/13] removed xxhash --- LICENSES-OTHER-COMPONENTS.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/LICENSES-OTHER-COMPONENTS.md b/LICENSES-OTHER-COMPONENTS.md index 6f8e7ab666..59fe146f0b 100644 --- a/LICENSES-OTHER-COMPONENTS.md +++ b/LICENSES-OTHER-COMPONENTS.md @@ -18,11 +18,6 @@ * Project Home: https://code.google.com/p/fast-hash/ * License: [MIT License](https://code.google.com/p/fast-hash/) -### xxhash - -* Project Home: https://code.google.com/p/xxhash/ -* License: [BSD 2-Clause License](https://code.google.com/p/xxhash/) - ### Google V8 4.3.61 * Project Home: https://code.google.com/p/v8/ From f2b9e1139b350bee1485386ade014953fe426b76 Mon Sep 17 00:00:00 2001 From: Jan Steemann Date: Thu, 10 Dec 2015 18:43:44 +0100 Subject: [PATCH 11/13] replaced levenshtein distance implementation --- LICENSES-OTHER-COMPONENTS.md | 5 + lib/Basics/levenshtein.cpp | 154 ++++++++++++------------- lib/Basics/levenshtein.h | 8 +- lib/ProgramOptions/program-options.cpp | 2 +- 4 files changed, 82 insertions(+), 87 deletions(-) diff --git a/LICENSES-OTHER-COMPONENTS.md b/LICENSES-OTHER-COMPONENTS.md index 59fe146f0b..3b619f18c4 100644 --- a/LICENSES-OTHER-COMPONENTS.md +++ b/LICENSES-OTHER-COMPONENTS.md @@ -18,6 +18,11 @@ * Project Home: https://code.google.com/p/fast-hash/ * License: [MIT License](https://code.google.com/p/fast-hash/) +### levenshtein distance + +* Project Home: https://github.com/bennybp/stringmatch/ +* License: [MIT License](https://github.com/bennybp/stringmatch/blob/master/stringmatch.h) + ### Google V8 4.3.61 * Project Home: https://code.google.com/p/v8/ diff --git a/lib/Basics/levenshtein.cpp b/lib/Basics/levenshtein.cpp index 7cc92943c6..1161c613b9 100644 --- a/lib/Basics/levenshtein.cpp +++ b/lib/Basics/levenshtein.cpp @@ -1,98 +1,92 @@ +//////////////////////////////////////////////////////////////////////////////// +/// @brief levenshtein function +/// +/// @file +/// +/// DISCLAIMER +/// +/// Copyright 2014 ArangoDB GmbH, Cologne, Germany +/// Copyright 2004-2014 triAGENS GmbH, Cologne, Germany +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// +/// Copyright holder is ArangoDB GmbH, Cologne, Germany +/// +/// @author Dr. Frank Celler +/// @author Copyright 2014, ArangoDB GmbH, Cologne, Germany +/// @author Copyright 2011-2013, triAGENS GmbH, Cologne, Germany +//////////////////////////////////////////////////////////////////////////////// + #include "levenshtein.h" //////////////////////////////////////////////////////////////////////////////// /// @brief calculate the levenshtein distance of the two strings -/// -/// This function implements the Damerau-Levenshtein algorithm to -/// calculate a distance between strings. -/// -/// Basically, it says how many letters need to be swapped, substituted, -/// deleted from, or added to string1, at least, to get string2. -/// -/// The idea is to build a distance matrix for the substrings of both -/// strings. To avoid a large space complexity, only the last three rows -/// are kept in memory (if swaps had the same or higher cost as one deletion -/// plus one insertion, only two rows would be needed). -/// -/// At any stage, "i + 1" denotes the length of the current substring of -/// string1 that the distance is calculated for. -/// -/// row2 holds the current row, row1 the previous row (i.e. for the substring -/// of string1 of length "i"), and row0 the row before that. -/// -/// In other words, at the start of the big loop, row2[j + 1] contains the -/// Damerau-Levenshtein distance between the substring of string1 of length -/// "i" and the substring of string2 of length "j + 1". -/// -/// All the big loop does is determine the partial minimum-cost paths. -/// -/// It does so by calculating the costs of the path ending in characters -/// i (in string1) and j (in string2), respectively, given that the last -/// operation is a substitution, a swap, a deletion, or an insertion. -/// -/// This implementation allows the costs to be weighted: -/// -/// - w (as in "sWap") -/// - s (as in "Substitution") -/// - a (for insertion, AKA "Add") -/// - d (as in "Deletion") -/// -/// Note that this algorithm calculates a distance _iff_ d == a. -/// -/// @author https://github.com/git/git/blob/master/levenshtein.c +/// @author Benjamin Pritchard (ben@bennyp.org) +/// copyright 2013 Benjamin Pritchard. Released under the MIT License +/// copyright The MIT License +/// From https://raw.githubusercontent.com/bennybp/stringmatch/master/stringmatch.cpp //////////////////////////////////////////////////////////////////////////////// -int TRI_Levenshtein (char const* string1, - char const* string2, - int w, - int s, - int a, - int d) { - int len1 = static_cast(strlen(string1)); - int len2 = static_cast(strlen(string2)); - int* row0 = new int[len2 + 1]; - int* row1 = new int[len2 + 1]; - int* row2 = new int[len2 + 1]; +int TRI_Levenshtein (std::string const& str1, + std::string const& str2) { + // for all i and j, d[i,j] will hold the Levenshtein distance between + // the first i characters of s and the first j characters of t; + // note that d has (m+1)x(n+1) values + size_t m = str1.size(); + size_t n = str2.size(); - for (int j = 0; j <= len2; j++) { - row1[j] = j * a; + int** d = new int*[m + 1]; + for (size_t i = 0; i <= m; i++) { + d[i] = new int[n + 1]; } - int i; - for (i = 0; i < len1; i++) { - int *dummy; + for (size_t i = 0; i <= m; i++) { + d[i][0] = i; // the distance of any first string to an empty second string + } - row2[0] = (i + 1) * d; - for (int j = 0; j < len2; j++) { - /* substitution */ - row2[j + 1] = row1[j] + s * (string1[i] != string2[j]); - /* swap */ - if (i > 0 && j > 0 && string1[i - 1] == string2[j] && - string1[i] == string2[j - 1] && - row2[j + 1] > row0[j - 1] + w) { - row2[j + 1] = row0[j - 1] + w; + for (size_t j = 0; j <= n; j++) { + d[0][j] = j; // the distance of any second string to an empty first string + } + + int min; + + for (size_t j = 1; j <= n; j++) { + for (size_t i = 1; i <= m; i++) { + if (str1[i - 1] == str2[j - 1]) { + d[i][j] = d[i - 1][j - 1]; // no operation required } - /* deletion */ - if (row2[j + 1] > row1[j + 1] + d) { - row2[j + 1] = row1[j + 1] + d; - } - /* insertion */ - if (row2[j + 1] > row2[j] + a) { - row2[j + 1] = row2[j] + a; + else { + //find a minimum + min = d[i - 1][j] + /*1*/3; // a deletion + if( (d[i][j - 1] + 1) < min) { // an insertion + min = (d[i][j - 1] + 1); + } + if( (d[i - 1][j - 1] + 1) < min) { // a substitution + min = (d[i - 1][j - 1] + /*1*/2); + } + + d[i][j] = min; } } - - dummy = row0; - row0 = row1; - row1 = row2; - row2 = dummy; } - i = row1[len2]; - delete[] row0; - delete[] row1; - delete[] row2; + int result = d[m][n]; - return i; + for(size_t i = 0; i <= m; i++) { + delete[] d[i]; + } + delete[] d; + + return result; } diff --git a/lib/Basics/levenshtein.h b/lib/Basics/levenshtein.h index 5ef4585aa5..5d05628dbc 100644 --- a/lib/Basics/levenshtein.h +++ b/lib/Basics/levenshtein.h @@ -40,12 +40,8 @@ /// @brief calculate the levenshtein distance of the two strings //////////////////////////////////////////////////////////////////////////////// -int TRI_Levenshtein (char const* string1, - char const* string2, - int swap_penalty = 0, - int substitution_penalty = 2, - int insertion_penalty = 1, - int deletion_penalty = 3); +int TRI_Levenshtein (std::string const&, + std::string const&); #endif diff --git a/lib/ProgramOptions/program-options.cpp b/lib/ProgramOptions/program-options.cpp index 2f726519fe..6e11695ae4 100644 --- a/lib/ProgramOptions/program-options.cpp +++ b/lib/ProgramOptions/program-options.cpp @@ -199,7 +199,7 @@ static void printUnrecognizedOption (TRI_program_options_t const* options, for (size_t i = 0; i < TRI_LengthVector(&options->_items); ++i) { auto item = static_cast(TRI_AtVector(&options->_items, i)); - distances.emplace(TRI_Levenshtein(option, item->_desc->_name), item->_desc->_name); + distances.emplace(TRI_Levenshtein(std::string(option), std::string(item->_desc->_name)), item->_desc->_name); } if (! distances.empty()) { From 4335af50e7f9606f4dabc4cb28e2750e2cd1fb56 Mon Sep 17 00:00:00 2001 From: Jan Steemann Date: Thu, 10 Dec 2015 19:06:12 +0100 Subject: [PATCH 12/13] replication improvements --- .../Books/Users/Replication/Components.mdpp | 13 ++++- .../Books/Users/Replication/ExampleSetup.mdpp | 6 ++ arangod/Replication/ContinuousSyncer.cpp | 55 ++++++++++++++++--- .../RestHandler/RestReplicationHandler.cpp | 22 ++++++++ arangod/V8Server/v8-replication.cpp | 7 +++ arangod/VocBase/replication-applier.cpp | 13 +++++ arangod/VocBase/replication-applier.h | 1 + arangod/VocBase/replication-common.cpp | 3 +- js/common/tests/replication.js | 20 ++++++- js/server/tests/replication-sync.js | 1 + 10 files changed, 127 insertions(+), 14 deletions(-) diff --git a/Documentation/Books/Users/Replication/Components.mdpp b/Documentation/Books/Users/Replication/Components.mdpp index fecfcfa2f3..5c6b401284 100644 --- a/Documentation/Books/Users/Replication/Components.mdpp +++ b/Documentation/Books/Users/Replication/Components.mdpp @@ -260,6 +260,8 @@ The result might look like this: "adaptivePolling" : true, "includeSystem" : true, "requireFromPresent" : false, + "autoResync" : false, + "autoResyncRetries" : 2, "verbose" : false } ``` @@ -342,6 +344,11 @@ Automatic re-synchronization may transfer a lot of data from the master to the s expensive. It is therefore turned off by default. When turned off, the slave will never perform an automatic re-synchronization with the master. +The *autoResyncRetries* option can be used to control the number of resynchronization retries that +will be performed in a row when automatic resynchronization is enabled and kicks in. Setting this to +*0* will effectively disable *autoResync*. Setting it to some other value will limit the number of retries +that are performed. This helps preventing endless retries in case resynchronizations always fail. + The *verbose* attribute controls the verbosity of the replication logger. Setting it to `true` will make the replication applier write a line to the log for every operation it performs. This should only be used for diagnosing replication problems. @@ -351,14 +358,16 @@ The following example will set most of the discussed properties for the current ```js require("org/arangodb/replication").applier.properties({ endpoint: "tcp://master.domain.org:8529", - username: "root", + username: "root", password: "secret", adaptivePolling: true, connectTimeout: 15, maxConnectRetries: 100, chunkSize: 262144, autoStart: true, - includeSystem: true + includeSystem: true, + autoResync: true, + autoResyncRetries: 2, }); ``` diff --git a/Documentation/Books/Users/Replication/ExampleSetup.mdpp b/Documentation/Books/Users/Replication/ExampleSetup.mdpp index 11d374b2f5..df864b1a7f 100644 --- a/Documentation/Books/Users/Replication/ExampleSetup.mdpp +++ b/Documentation/Books/Users/Replication/ExampleSetup.mdpp @@ -77,6 +77,7 @@ require("org/arangodb/replication").applier.properties({ password: "mypasswd", autoStart: true, autoResync: true, + autoResyncRetries: 2, adaptivePolling: true, includeSystem: false, requireFromPresent: false, @@ -114,6 +115,11 @@ Note that automatic re-synchronization (*autoResync* option set to *true*) may t data from the master to the slave and can therefore be expensive. Still it's turned on here so there's less need for manual intervention. +The *autoResyncRetries* option can be used to control the number of resynchronization retries that +will be performed in a row when automatic resynchronization is enabled and kicks in. Setting this to +*0* will effectively disable *autoResync*. Setting it to some other value will limit the number of retries +that are performed. This helps preventing endless retries in case resynchronizations always fail. + Now it's time to start the replication applier on the slave using the last log tick we got before: diff --git a/arangod/Replication/ContinuousSyncer.cpp b/arangod/Replication/ContinuousSyncer.cpp index 87e709960e..305c7cee80 100644 --- a/arangod/Replication/ContinuousSyncer.cpp +++ b/arangod/Replication/ContinuousSyncer.cpp @@ -119,7 +119,10 @@ int ContinuousSyncer::run () { return TRI_ERROR_INTERNAL; } + uint64_t shortTermFailsInRow = 0; + retry: + double const start = TRI_microtime(); string errorMsg; int res = TRI_ERROR_NO_ERROR; @@ -226,10 +229,32 @@ retry: if (! _configuration._autoResync) { return res; } + + if (TRI_microtime() - start < 120.0) { + // the applier only ran for less than 2 minutes. probably auto-restarting it won't help much + shortTermFailsInRow++; + } + else { + shortTermFailsInRow = 0; + } + + // check if we've made too many retries + if (shortTermFailsInRow > _configuration._autoResyncRetries) { + if (_configuration._autoResyncRetries > 0) { + // message only makes sense if there's at least one retry + LOG_WARNING("aborting automatic resynchronization for database '%s' after %d retries", + _vocbase->_name, + (int) _configuration._autoResyncRetries); + } + + // always abort if we get here + return res; + } // do an automatic full resync - LOG_WARNING("restarting initial synchronization for database '%s' because autoResync option is set", - _vocbase->_name); + LOG_WARNING("restarting initial synchronization for database '%s' because autoResync option is set. retry #%d", + _vocbase->_name, + (int) shortTermFailsInRow); // start initial synchronization errorMsg = ""; @@ -245,7 +270,7 @@ retry: if (res == TRI_ERROR_NO_ERROR) { TRI_voc_tick_t lastLogTick = syncer.getLastLogTick(); - LOG_INFO("automatic resynchronization for database '%s' finished. restarting continous replication applier from tick %llu", + LOG_INFO("automatic resynchronization for database '%s' finished. restarting continuous replication applier from tick %llu", _vocbase->_name, (unsigned long long) lastLogTick); _initialTick = lastLogTick; @@ -1159,7 +1184,9 @@ int ContinuousSyncer::fetchMasterState (string& errorMsg, "&from=" + StringUtils::itoa(fromTick) + "&to=" + StringUtils::itoa(toTick); - string const progress = "fetching initial master state with from tick " + StringUtils::itoa(fromTick) + ", toTick " + StringUtils::itoa(toTick); + string const progress = "fetching initial master state with from tick " + StringUtils::itoa(fromTick) + ", to tick " + StringUtils::itoa(toTick); + + setProgress(progress); LOG_TRACE("fetching initial master state with from tick %llu, to tick %llu, url %s", (unsigned long long) fromTick, @@ -1167,8 +1194,6 @@ int ContinuousSyncer::fetchMasterState (string& errorMsg, url.c_str()); // send request - setProgress(progress); - std::unique_ptr response(_client->request(HttpRequest::HTTP_REQUEST_GET, url, nullptr, @@ -1220,7 +1245,11 @@ int ContinuousSyncer::fetchMasterState (string& errorMsg, return TRI_ERROR_REPLICATION_INVALID_RESPONSE; } - startTick = StringUtils::uint64(header); + TRI_voc_tick_t readTick = StringUtils::uint64(header); + startTick = readTick; + if (startTick == 0) { + startTick = toTick; + } StringBuffer& data = response->getBody(); std::unique_ptr json(TRI_JsonString(TRI_UNKNOWN_MEM_ZONE, data.begin())); @@ -1244,6 +1273,15 @@ int ContinuousSyncer::fetchMasterState (string& errorMsg, _ongoingTransactions.emplace(StringUtils::uint64(id->_value._string.data, id->_value._string.length - 1), nullptr); } + + { + string const progress = "fetched initial master state for from tick " + StringUtils::itoa(fromTick) + + ", to tick " + StringUtils::itoa(toTick) + + ", got start tick: " + StringUtils::itoa(readTick) + ", open transactions: " + + std::to_string(_ongoingTransactions.size()); + + setProgress(progress); + } return TRI_ERROR_NO_ERROR; } @@ -1274,7 +1312,8 @@ int ContinuousSyncer::followMasterLog (string& errorMsg, url.c_str()); // send request - string const progress = "fetching master log from tick " + StringUtils::itoa(fetchTick); + string const progress = "fetching master log from tick " + StringUtils::itoa(fetchTick) + + ", open transactions: " + std::to_string(_ongoingTransactions.size()); setProgress(progress); std::string body; diff --git a/arangod/RestHandler/RestReplicationHandler.cpp b/arangod/RestHandler/RestReplicationHandler.cpp index d0872c30ea..71797e1ef9 100644 --- a/arangod/RestHandler/RestReplicationHandler.cpp +++ b/arangod/RestHandler/RestReplicationHandler.cpp @@ -3750,6 +3750,13 @@ void RestReplicationHandler::handleCommandDump () { /// the master in case the master cannot serve log data requested by the slave, /// or when the replication is started and no tick value can be found. /// +/// @RESTBODYPARAM{autoResyncRetries,integer,optional,int64} +/// number of resynchronization retries that will be performed in a row when +/// automatic resynchronization is enabled and kicks in. Setting this to *0* will +/// effectively disable *autoResync*. Setting it to some other value will limit +/// the number of retries that are performed. This helps preventing endless retries +/// in case resynchronizations always fail. +/// /// @RESTBODYPARAM{initialSyncMaxWaitTime,integer,optional,int64} /// the maximum wait time (in seconds) that the initial synchronization will /// wait for a response from the master when fetching initial collection data. @@ -3953,6 +3960,7 @@ void RestReplicationHandler::handleCommandMakeSlave () { config._initialSyncMaxWaitTime = static_cast(JsonHelper::getNumericValue(json.get(), "initialSyncMaxWaitTime", static_cast(defaults._initialSyncMaxWaitTime) / (1000.0 * 1000.0))); config._idleMinWaitTime = static_cast(JsonHelper::getNumericValue(json.get(), "idleMinWaitTime", static_cast(defaults._idleMinWaitTime) / (1000.0 * 1000.0))); config._idleMaxWaitTime = static_cast(JsonHelper::getNumericValue(json.get(), "idleMaxWaitTime", static_cast(defaults._idleMaxWaitTime) / (1000.0 * 1000.0))); + config._autoResyncRetries = static_cast(JsonHelper::getNumericValue(json.get(), "autoResyncRetries", defaults._autoResyncRetries)); TRI_json_t* restriction = JsonHelper::getObjectElement(json.get(), "restrictCollections"); @@ -4353,6 +4361,12 @@ void RestReplicationHandler::handleCommandServerId () { /// requested by the slave, or when the replication is started and no tick value /// can be found. /// +/// - *autoResyncRetries*: umber of resynchronization retries that will be performed +/// in a row when automatic resynchronization is enabled and kicks in. Setting this +/// to *0* will effectively disable *autoResync*. Setting it to some other value +/// will limit the number of retries that are performed. This helps preventing endless +/// retries in case resynchronizations always fail. +/// /// - *initialSyncMaxWaitTime*: the maximum wait time (in seconds) that the initial /// synchronization will wait for a response from the master when fetching initial /// collection data. @@ -4507,6 +4521,13 @@ void RestReplicationHandler::handleCommandApplierGetConfig () { /// with the master in case the master cannot serve log data requested by the slave, /// or when the replication is started and no tick value can be found. /// +/// @RESTBODYPARAM{autoResyncRetries,integer,optional,int64} +/// number of resynchronization retries that will be performed in a row when +/// automatic resynchronization is enabled and kicks in. Setting this to *0* will +/// effectively disable *autoResync*. Setting it to some other value will limit +/// the number of retries that are performed. This helps preventing endless retries +/// in case resynchronizations always fail. +/// /// @RESTBODYPARAM{initialSyncMaxWaitTime,integer,optional,int64} /// the maximum wait time (in seconds) that the initial synchronization will /// wait for a response from the master when fetching initial collection data. @@ -4683,6 +4704,7 @@ void RestReplicationHandler::handleCommandApplierSetConfig () { config._initialSyncMaxWaitTime = static_cast(JsonHelper::getNumericValue(json.get(), "initialSyncMaxWaitTime", static_cast(config._initialSyncMaxWaitTime) / (1000.0 * 1000.0))); config._idleMinWaitTime = static_cast(JsonHelper::getNumericValue(json.get(), "idleMinWaitTime", static_cast(config._idleMinWaitTime) / (1000.0 * 1000.0))); config._idleMaxWaitTime = static_cast(JsonHelper::getNumericValue(json.get(), "idleMaxWaitTime", static_cast(config._idleMaxWaitTime) / (1000.0 * 1000.0))); + config._autoResyncRetries = static_cast(JsonHelper::getNumericValue(json.get(), "autoResyncRetries", config._autoResyncRetries)); value = JsonHelper::getObjectElement(json.get(), "restrictCollections"); diff --git a/arangod/V8Server/v8-replication.cpp b/arangod/V8Server/v8-replication.cpp index 865b837f8f..952cc7ee08 100644 --- a/arangod/V8Server/v8-replication.cpp +++ b/arangod/V8Server/v8-replication.cpp @@ -590,6 +590,13 @@ static void JS_ConfigureApplierReplication (const v8::FunctionCallbackInfoHas(TRI_V8_ASCII_STRING("autoResyncRetries"))) { + if (object->Get(TRI_V8_ASCII_STRING("autoResyncRetries"))->IsNumber()) { + double value = TRI_ObjectToDouble(object->Get(TRI_V8_ASCII_STRING("autoResyncRetries"))); + config._autoResyncRetries = static_cast(value); + } + } int res = TRI_ConfigureReplicationApplier(vocbase->_replicationApplier, &config); diff --git a/arangod/VocBase/replication-applier.cpp b/arangod/VocBase/replication-applier.cpp index bb34518d9d..857f3a4648 100644 --- a/arangod/VocBase/replication-applier.cpp +++ b/arangod/VocBase/replication-applier.cpp @@ -227,6 +227,11 @@ static TRI_json_t* JsonConfiguration (TRI_replication_applier_configuration_t co json, "idleMaxWaitTime", TRI_CreateNumberJson(TRI_CORE_MEM_ZONE, (double) config->_idleMaxWaitTime / (1000 * 1000))); + + TRI_Insert3ObjectJson(TRI_CORE_MEM_ZONE, + json, + "autoResyncRetries", + TRI_CreateNumberJson(TRI_CORE_MEM_ZONE, (double) config->_autoResyncRetries)); return json; } @@ -430,6 +435,12 @@ static int LoadConfiguration (TRI_vocbase_t* vocbase, config->_idleMaxWaitTime = (uint64_t) (value->_value._number * 1000 * 1000); } + value = TRI_LookupObjectJson(json.get(), "autoResyncRetries"); + + if (TRI_IsNumberJson(value)) { + config->_autoResyncRetries = (uint64_t) value->_value._number; + } + // read the endpoint value = TRI_LookupObjectJson(json.get(), "endpoint"); @@ -988,6 +999,7 @@ void TRI_InitConfigurationReplicationApplier (TRI_replication_applier_configurat config->_initialSyncMaxWaitTime = 300 * 1000 * 1000; config->_idleMinWaitTime = 500 * 1000; config->_idleMaxWaitTime = 5 * 500 * 1000; + config->_autoResyncRetries = 2; } //////////////////////////////////////////////////////////////////////////////// @@ -1068,6 +1080,7 @@ void TRI_CopyConfigurationReplicationApplier (TRI_replication_applier_configurat dst->_initialSyncMaxWaitTime = src->_initialSyncMaxWaitTime; dst->_idleMinWaitTime = src->_idleMinWaitTime; dst->_idleMaxWaitTime = src->_idleMaxWaitTime; + dst->_autoResyncRetries = src->_autoResyncRetries; } //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/VocBase/replication-applier.h b/arangod/VocBase/replication-applier.h index 721de4166c..3ab3ef94af 100644 --- a/arangod/VocBase/replication-applier.h +++ b/arangod/VocBase/replication-applier.h @@ -71,6 +71,7 @@ struct TRI_replication_applier_configuration_t { uint64_t _idleMinWaitTime; // 500 * 1000 uint64_t _idleMaxWaitTime; // 5 * 500 * 1000 uint64_t _initialSyncMaxWaitTime; + uint64_t _autoResyncRetries; uint32_t _sslProtocol; bool _autoStart; bool _adaptivePolling; diff --git a/arangod/VocBase/replication-common.cpp b/arangod/VocBase/replication-common.cpp index dabdbcd101..739efeca82 100644 --- a/arangod/VocBase/replication-common.cpp +++ b/arangod/VocBase/replication-common.cpp @@ -97,7 +97,8 @@ bool TRI_ExcludeCollectionReplication (char const* name, TRI_EqualString(name, "_configuration") || TRI_EqualString(name, "_cluster_kickstarter_plans") || TRI_EqualString(name, "_jobs") || - TRI_EqualString(name, "_queues")) { + TRI_EqualString(name, "_queues") || + TRI_EqualString(name, "_sessions")) { // these system collections will always be excluded return true; } diff --git a/js/common/tests/replication.js b/js/common/tests/replication.js index c74e172106..f6945b35c5 100644 --- a/js/common/tests/replication.js +++ b/js/common/tests/replication.js @@ -2017,6 +2017,8 @@ function ReplicationApplierSuite () { assertEqual(15, properties.connectionRetryWaitTime); assertEqual(0.5, properties.idleMinWaitTime); assertEqual(2.5, properties.idleMaxWaitTime); + assertFalse(properties.autoResync); + assertEqual(2, properties.autoResyncRetries); try { replication.applier.properties({ }); @@ -2041,6 +2043,8 @@ function ReplicationApplierSuite () { assertEqual(15, properties.connectionRetryWaitTime); assertEqual(0.5, properties.idleMinWaitTime); assertEqual(2.5, properties.idleMaxWaitTime); + assertFalse(properties.autoResync); + assertEqual(2, properties.autoResyncRetries); replication.applier.properties({ endpoint: "tcp://9.9.9.9:9998", @@ -2055,7 +2059,9 @@ function ReplicationApplierSuite () { restrictCollections: [ "_users" ], connectionRetryWaitTime: 60.2, idleMinWaitTime: 0.1, - idleMaxWaitTime: 42.44 + idleMaxWaitTime: 42.44, + autoResync: true, + autoResyncRetries: 13 }); properties = replication.applier.properties(); @@ -2072,6 +2078,8 @@ function ReplicationApplierSuite () { assertEqual(60.2, properties.connectionRetryWaitTime); assertEqual(0.1, properties.idleMinWaitTime); assertEqual(42.44, properties.idleMaxWaitTime); + assertTrue(properties.autoResync); + assertEqual(13, properties.autoResyncRetries); replication.applier.properties({ endpoint: "tcp://9.9.9.9:9998", @@ -2081,7 +2089,9 @@ function ReplicationApplierSuite () { includeSystem: true, restrictType: "exclude", restrictCollections: [ "foo", "bar", "baz" ], - idleMinWaitTime: 7 + idleMinWaitTime: 7, + autoResync: false, + autoResyncRetries: 22 }); properties = replication.applier.properties(); @@ -2098,11 +2108,14 @@ function ReplicationApplierSuite () { assertEqual(60.2, properties.connectionRetryWaitTime); assertEqual(7, properties.idleMinWaitTime); assertEqual(42.44, properties.idleMaxWaitTime); + assertFalse(properties.autoResync); + assertEqual(22, properties.autoResyncRetries); replication.applier.properties({ restrictType: "", restrictCollections: [ ], - idleMaxWaitTime: 33 + idleMaxWaitTime: 33, + autoResyncRetries: 0 }); properties = replication.applier.properties(); @@ -2111,6 +2124,7 @@ function ReplicationApplierSuite () { assertEqual(60.2, properties.connectionRetryWaitTime); assertEqual(7, properties.idleMinWaitTime); assertEqual(33, properties.idleMaxWaitTime); + assertEqual(0, properties.autoResyncRetries); }, //////////////////////////////////////////////////////////////////////////////// diff --git a/js/server/tests/replication-sync.js b/js/server/tests/replication-sync.js index 59c71ef00a..6acc326759 100644 --- a/js/server/tests/replication-sync.js +++ b/js/server/tests/replication-sync.js @@ -76,6 +76,7 @@ function ReplicationSuite () { db._flushCache(); masterFunc(state); + require("internal").wal.flush(true, true); db._flushCache(); connectToSlave(); From a1a95f227f6a18e1e2508af058660b3bf18a7b7d Mon Sep 17 00:00:00 2001 From: Jan Steemann Date: Thu, 10 Dec 2015 21:16:03 +0100 Subject: [PATCH 13/13] fix VS warning --- lib/Basics/levenshtein.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Basics/levenshtein.cpp b/lib/Basics/levenshtein.cpp index 1161c613b9..88de7540dd 100644 --- a/lib/Basics/levenshtein.cpp +++ b/lib/Basics/levenshtein.cpp @@ -51,11 +51,11 @@ int TRI_Levenshtein (std::string const& str1, } for (size_t i = 0; i <= m; i++) { - d[i][0] = i; // the distance of any first string to an empty second string + d[i][0] = static_cast(i); // the distance of any first string to an empty second string } for (size_t j = 0; j <= n; j++) { - d[0][j] = j; // the distance of any second string to an empty first string + d[0][j] = static_cast(j); // the distance of any second string to an empty first string } int min;