reorganization

2016-12-13 13:54:14 +01:00 · 2016-12-13 13:54:14 +01:00 · 63f87fbf52
parent 3a4a07f13d
commit 63f87fbf52
51 changed files with 4090 additions and 468 deletions
--- a/3rdParty/CMakeLists.txt
+++ b/3rdParty/CMakeLists.txt
@ -111,6 +111,12 @@ set(SKIP_INSTALL_ALL ON)
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/snappy/google-snappy-d53de18)
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/rocksdb)
 ################################################################################
 ## LIBCUCKOO
 ################################################################################
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libcuckoo/)
 ################################################################################
 ## LINK_DIRECTORIES
 ################################################################################
--- a/3rdParty/libcuckoo/.gitignore
+++ b/3rdParty/libcuckoo/.gitignore
@ -0,0 +1,37 @@
 *.a
 *.in
 *.la
 *.lo
 *.log
 *.o
 *.out
 *.trs
 *~
 .DS_Store
 .deps
 .libs
 Makefile
 aclocal.m4
 autom4te.cache
 cityhash_unittest
 compile
 config.guess
 config.h
 config.log
 config.status
 config.sub
 config.sub
 configure
 depcomp
 depcomp
 examples/count_freq
 examples/hellohash
 examples/nested_table
 install-sh
 libtool
 libtool.m4
 lt*.m4
 ltmain.sh
 missing
 stamp-h1
 test-driver
--- a/3rdParty/libcuckoo/CMakeLists.txt
+++ b/3rdParty/libcuckoo/CMakeLists.txt
@ -0,0 +1,5 @@
 cmake_minimum_required (VERSION 2.8)
 include_directories("${PROJECT_BINARY_DIR}/include/")
 #target_link_libraries(LIBCUCKOO pthread)
 add_library(libcuckoo include/cityhash/city.cc)
--- a/3rdParty/libcuckoo/LICENSE
+++ b/3rdParty/libcuckoo/LICENSE
@ -0,0 +1,18 @@
 Copyright (C) 2013, Carnegie Mellon University and Intel Corporation
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ---------------------------
 CityHash (lib/city.h, lib/city.cc) is Copyright (c) Google, Inc. and
 has its own license, as detailed in the source files.
--- a/3rdParty/libcuckoo/README.md
+++ b/3rdParty/libcuckoo/README.md
@ -0,0 +1,107 @@
 Note to existing users: the iterator implementation has changed significantly
 since we introduced the `locked_table` in [this
 commit](https://github.com/efficient/libcuckoo/commit/2bedb3d0c811cd8b3adb3e78e2d2a28c66ba1d1d).
 Please see the [`locked_table`
 documentation](http://efficient.github.io/libcuckoo/classcuckoohash__map_1_1locked__table.html)
 and [examples
 directory](https://github.com/efficient/libcuckoo/tree/master/examples) for
 information and examples of how to use iterators.
 libcuckoo
 =========
 libcuckoo provides a high-performance, compact hash table that allows
 multiple concurrent reader and writer threads.
 The Doxygen-generated documentation is available at the
 [project page](http://efficient.github.io/libcuckoo/).
 Authors: Manu Goyal, Bin Fan, Xiaozhou Li, David G. Andersen, and Michael Kaminsky
 For details about this algorithm and citations, please refer to
 our papers in [NSDI 2013][1] and [EuroSys 2014][2]. Some of the details of the hashing
 algorithm have been improved since that work (e.g., the previous algorithm
 in [1] serializes all writer threads, while our current
 implementation supports multiple concurrent writers), however, and this source
 code is now the definitive reference.
   [1]: http://www.cs.cmu.edu/~dga/papers/memc3-nsdi2013.pdf "MemC3: Compact and Concurrent Memcache with Dumber Caching and Smarter Hashing"
   [2]: http://www.cs.princeton.edu/~mfreed/docs/cuckoo-eurosys14.pdf "Algorithmic Improvements for Fast Concurrent Cuckoo Hashing"
 Requirements
 ================
 This library has been tested on Mac OSX >= 10.8 and Ubuntu >= 12.04.
 It compiles with clang++ >= 3.3 and g++ >= 4.7, however we strongly suggest
 using the latest versions of both compilers, as they have greatly improved
 support for atomic operations. Building the library requires the
 autotools. Install them on Ubuntu
    $ sudo apt-get update && sudo apt-get install build-essential autoconf libtool
 Building
 ==========
    $ autoreconf -fis
    $ ./configure
    $ make
    $ make install
 Usage
 ==========
 To build a program with the hash table, include
 `libcuckoo/cuckoohash_map.hh` into your source file. If you want to
 use CityHash, which we recommend, we have provided a wrapper
 compatible with the `std::hash` type around it in the
 `libcuckoo/city_hasher.hh` file. If compiling with CityHash, add the
 `-lcityhash` flag. You must also enable C++11 features on your
 compiler. Compiling the file `examples/count_freq.cc` with g++
 might look like this:
    $ g++ -std=c++11 examples/count_freq.cc -lcityhash
 The
 [examples directory](https://github.com/efficient/libcuckoo/tree/master/examples)
 contains some simple demonstrations of some of the basic features of the hash
 table.
 Tests
 ==========
 The [tests directory](https://github.com/efficient/libcuckoo/tree/master/tests)
 directory contains a number of tests and benchmarks of the hash table, which
 also can serve as useful examples of how to use the table's various features.
 After running `make all`, the entire test suite can be run with the `make check`
 command. This will not run the benchmarks, which must be run individually. The
 test executables, which have the suffix `.out`, can be run individually as well.
 Issue Report
 ============
 To let us know your questions or issues, we recommend you
 [report an issue](https://github.com/efficient/libcuckoo/issues) on
 github. You can also email us at
 [libcuckoo-dev@googlegroups.com](mailto:libcuckoo-dev@googlegroups.com).
 Licence
 ===========
 Copyright (C) 2013, Carnegie Mellon University and Intel Corporation
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ---------------------------
 CityHash (lib/city.h, lib/city.cc) is Copyright (c) Google, Inc. and
 has its own license, as detailed in the source files.
--- a/3rdParty/libcuckoo/include/cityhash/COPYING
+++ b/3rdParty/libcuckoo/include/cityhash/COPYING
@ -0,0 +1,19 @@
 // Copyright (c) 2011 Google, Inc.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.
--- a/3rdParty/libcuckoo/include/cityhash/city.cc
+++ b/3rdParty/libcuckoo/include/cityhash/city.cc
@ -0,0 +1,627 @@
 // Copyright (c) 2011 Google, Inc.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.
 //
 // CityHash, by Geoff Pike and Jyrki Alakuijala
 //
 // This file provides CityHash64() and related functions.
 //
 // It's probably possible to create even faster hash functions by
 // writing a program that systematically explores some of the space of
 // possible hash functions, by using SIMD instructions, or by
 // compromising on hash quality.
 #include "city.h"
 #include <algorithm>
 #include <string.h>  // for memcpy and memset
 using namespace std;
 static uint64 UNALIGNED_LOAD64(const char *p) {
  uint64 result;
  memcpy(&result, p, sizeof(result));
  return result;
 }
 static uint32 UNALIGNED_LOAD32(const char *p) {
  uint32 result;
  memcpy(&result, p, sizeof(result));
  return result;
 }
 #ifdef _MSC_VER
 #include <stdlib.h>
 #define bswap_32(x) _byteswap_ulong(x)
 #define bswap_64(x) _byteswap_uint64(x)
 #elif defined(__APPLE__)
 // Mac OS X / Darwin features
 #include <libkern/OSByteOrder.h>
 #define bswap_32(x) OSSwapInt32(x)
 #define bswap_64(x) OSSwapInt64(x)
 #elif defined(__NetBSD__)
 #include <sys/types.h>
 #include <machine/bswap.h>
 #if defined(__BSWAP_RENAME) && !defined(__bswap_32)
 #define bswap_32(x) bswap32(x)
 #define bswap_64(x) bswap64(x)
 #endif
 #else
 #include <byteswap.h>
 #endif
 #ifdef WORDS_BIGENDIAN
 #define uint32_in_expected_order(x) (bswap_32(x))
 #define uint64_in_expected_order(x) (bswap_64(x))
 #else
 #define uint32_in_expected_order(x) (x)
 #define uint64_in_expected_order(x) (x)
 #endif
 #if !defined(LIKELY)
 #if HAVE_BUILTIN_EXPECT
 #define LIKELY(x) (__builtin_expect(!!(x), 1))
 #else
 #define LIKELY(x) (x)
 #endif
 #endif
 static uint64 Fetch64(const char *p) {
  return uint64_in_expected_order(UNALIGNED_LOAD64(p));
 }
 static uint32 Fetch32(const char *p) {
  return uint32_in_expected_order(UNALIGNED_LOAD32(p));
 }
 // Some primes between 2^63 and 2^64 for various uses.
 static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
 static const uint64 k1 = 0xb492b66fbe98f273ULL;
 static const uint64 k2 = 0x9ae16a3b2f90404fULL;
 // Magic numbers for 32-bit hashing.  Copied from Murmur3.
 static const uint32_t c1 = 0xcc9e2d51;
 static const uint32_t c2 = 0x1b873593;
 // A 32-bit to 32-bit integer hash copied from Murmur3.
 static uint32 fmix(uint32 h)
 {
  h ^= h >> 16;
  h *= 0x85ebca6b;
  h ^= h >> 13;
  h *= 0xc2b2ae35;
  h ^= h >> 16;
  return h;
 }
 static uint32 Rotate32(uint32 val, int shift) {
  // Avoid shifting by 32: doing so yields an undefined result.
  return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
 }
 #undef PERMUTE3
 #define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0)
 static uint32 Mur(uint32 a, uint32 h) {
  // Helper from Murmur3 for combining two 32-bit values.
  a *= c1;
  a = Rotate32(a, 17);
  a *= c2;
  h ^= a;
  h = Rotate32(h, 19);
  return h * 5 + 0xe6546b64;
 }
 static uint32 Hash32Len13to24(const char *s, size_t len) {
  uint32 a = Fetch32(s - 4 + (len >> 1));
  uint32 b = Fetch32(s + 4);
  uint32 c = Fetch32(s + len - 8);
  uint32 d = Fetch32(s + (len >> 1));
  uint32 e = Fetch32(s);
  uint32 f = Fetch32(s + len - 4);
  uint32 h = len;
  return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
 }
 static uint32 Hash32Len0to4(const char *s, size_t len) {
  uint32 b = 0;
  uint32 c = 9;
  for (uint32 i = 0; i < len; i++) {
    signed char v = s[i];
    b = b * c1 + v;
    c ^= b;
  }
  return fmix(Mur(b, Mur(len, c)));
 }
 static uint32 Hash32Len5to12(const char *s, size_t len) {
  uint32 a = len, b = len * 5, c = 9, d = b;
  a += Fetch32(s);
  b += Fetch32(s + len - 4);
  c += Fetch32(s + ((len >> 1) & 4));
  return fmix(Mur(c, Mur(b, Mur(a, d))));
 }
 uint32 CityHash32(const char *s, size_t len) {
  if (len <= 24) {
    return len <= 12 ?
        (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
        Hash32Len13to24(s, len);
  }
  // len > 24
  uint32 h = len, g = c1 * len, f = g;
  uint32 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
  uint32 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
  uint32 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
  uint32 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
  uint32 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
  h ^= a0;
  h = Rotate32(h, 19);
  h = h * 5 + 0xe6546b64;
  h ^= a2;
  h = Rotate32(h, 19);
  h = h * 5 + 0xe6546b64;
  g ^= a1;
  g = Rotate32(g, 19);
  g = g * 5 + 0xe6546b64;
  g ^= a3;
  g = Rotate32(g, 19);
  g = g * 5 + 0xe6546b64;
  f += a4;
  f = Rotate32(f, 19);
  f = f * 5 + 0xe6546b64;
  size_t iters = (len - 1) / 20;
  do {
    uint32 a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
    uint32 a1 = Fetch32(s + 4);
    uint32 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
    uint32 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
    uint32 a4 = Fetch32(s + 16);
    h ^= a0;
    h = Rotate32(h, 18);
    h = h * 5 + 0xe6546b64;
    f += a1;
    f = Rotate32(f, 19);
    f = f * c1;
    g += a2;
    g = Rotate32(g, 18);
    g = g * 5 + 0xe6546b64;
    h ^= a3 + a1;
    h = Rotate32(h, 19);
    h = h * 5 + 0xe6546b64;
    g ^= a4;
    g = bswap_32(g) * 5;
    h += a4 * 5;
    h = bswap_32(h);
    f += a0;
    PERMUTE3(f, h, g);
    s += 20;
  } while (--iters != 0);
  g = Rotate32(g, 11) * c1;
  g = Rotate32(g, 17) * c1;
  f = Rotate32(f, 11) * c1;
  f = Rotate32(f, 17) * c1;
  h = Rotate32(h + g, 19);
  h = h * 5 + 0xe6546b64;
  h = Rotate32(h, 17) * c1;
  h = Rotate32(h + f, 19);
  h = h * 5 + 0xe6546b64;
  h = Rotate32(h, 17) * c1;
  return h;
 }
 // Bitwise right rotate.  Normally this will compile to a single
 // instruction, especially if the shift is a manifest constant.
 static uint64 Rotate(uint64 val, int shift) {
  // Avoid shifting by 64: doing so yields an undefined result.
  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
 }
 static uint64 ShiftMix(uint64 val) {
  return val ^ (val >> 47);
 }
 static uint64 HashLen16(uint64 u, uint64 v) {
  return Hash128to64(uint128(u, v));
 }
 static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
  // Murmur-inspired hashing.
  uint64 a = (u ^ v) * mul;
  a ^= (a >> 47);
  uint64 b = (v ^ a) * mul;
  b ^= (b >> 47);
  b *= mul;
  return b;
 }
 static uint64 HashLen0to16(const char *s, size_t len) {
  if (len >= 8) {
    uint64 mul = k2 + len * 2;
    uint64 a = Fetch64(s) + k2;
    uint64 b = Fetch64(s + len - 8);
    uint64 c = Rotate(b, 37) * mul + a;
    uint64 d = (Rotate(a, 25) + b) * mul;
    return HashLen16(c, d, mul);
  }
  if (len >= 4) {
    uint64 mul = k2 + len * 2;
    uint64 a = Fetch32(s);
    return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
  }
  if (len > 0) {
    uint8 a = s[0];
    uint8 b = s[len >> 1];
    uint8 c = s[len - 1];
    uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
    uint32 z = len + (static_cast<uint32>(c) << 2);
    return ShiftMix(y * k2 ^ z * k0) * k2;
  }
  return k2;
 }
 // This probably works well for 16-byte strings as well, but it may be overkill
 // in that case.
 static uint64 HashLen17to32(const char *s, size_t len) {
  uint64 mul = k2 + len * 2;
  uint64 a = Fetch64(s) * k1;
  uint64 b = Fetch64(s + 8);
  uint64 c = Fetch64(s + len - 8) * mul;
  uint64 d = Fetch64(s + len - 16) * k2;
  return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
                   a + Rotate(b + k2, 18) + c, mul);
 }
 // Return a 16-byte hash for 48 bytes.  Quick and dirty.
 // Callers do best to use "random-looking" values for a and b.
 static pair<uint64, uint64> WeakHashLen32WithSeeds(
    uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
  a += w;
  b = Rotate(b + a + z, 21);
  uint64 c = a;
  a += x;
  a += y;
  b += Rotate(a, 44);
  return make_pair(a + z, b + c);
 }
 // Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
 static pair<uint64, uint64> WeakHashLen32WithSeeds(
    const char* s, uint64 a, uint64 b) {
  return WeakHashLen32WithSeeds(Fetch64(s),
                                Fetch64(s + 8),
                                Fetch64(s + 16),
                                Fetch64(s + 24),
                                a,
                                b);
 }
 // Return an 8-byte hash for 33 to 64 bytes.
 static uint64 HashLen33to64(const char *s, size_t len) {
  uint64 mul = k2 + len * 2;
  uint64 a = Fetch64(s) * k2;
  uint64 b = Fetch64(s + 8);
  uint64 c = Fetch64(s + len - 24);
  uint64 d = Fetch64(s + len - 32);
  uint64 e = Fetch64(s + 16) * k2;
  uint64 f = Fetch64(s + 24) * 9;
  uint64 g = Fetch64(s + len - 8);
  uint64 h = Fetch64(s + len - 16) * mul;
  uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
  uint64 v = ((a + g) ^ d) + f + 1;
  uint64 w = bswap_64((u + v) * mul) + h;
  uint64 x = Rotate(e + f, 42) + c;
  uint64 y = (bswap_64((v + w) * mul) + g) * mul;
  uint64 z = e + f + c;
  a = bswap_64((x + z) * mul + y) + b;
  b = ShiftMix((z + a) * mul + d + h) * mul;
  return b + x;
 }
 uint64 CityHash64(const char *s, size_t len) {
  if (len <= 32) {
    if (len <= 16) {
      return HashLen0to16(s, len);
    } else {
      return HashLen17to32(s, len);
    }
  } else if (len <= 64) {
    return HashLen33to64(s, len);
  }
  // For strings over 64 bytes we hash the end first, and then as we
  // loop we keep 56 bytes of state: v, w, x, y, and z.
  uint64 x = Fetch64(s + len - 40);
  uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
  uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
  pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
  pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
  x = x * k1 + Fetch64(s);
  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
  len = (len - 1) & ~static_cast<size_t>(63);
  do {
    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
    x ^= w.second;
    y += v.first + Fetch64(s + 40);
    z = Rotate(z + w.first, 33) * k1;
    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
    std::swap(z, x);
    s += 64;
    len -= 64;
  } while (len != 0);
  return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
                   HashLen16(v.second, w.second) + x);
 }
 uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
  return CityHash64WithSeeds(s, len, k2, seed);
 }
 uint64 CityHash64WithSeeds(const char *s, size_t len,
                           uint64 seed0, uint64 seed1) {
  return HashLen16(CityHash64(s, len) - seed0, seed1);
 }
 // A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
 // of any length representable in signed long.  Based on City and Murmur.
 static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
  uint64 a = Uint128Low64(seed);
  uint64 b = Uint128High64(seed);
  uint64 c = 0;
  uint64 d = 0;
  signed long l = len - 16;
  if (l <= 0) {  // len <= 16
    a = ShiftMix(a * k1) * k1;
    c = b * k1 + HashLen0to16(s, len);
    d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
  } else {  // len > 16
    c = HashLen16(Fetch64(s + len - 8) + k1, a);
    d = HashLen16(b + len, c + Fetch64(s + len - 16));
    a += d;
    do {
      a ^= ShiftMix(Fetch64(s) * k1) * k1;
      a *= k1;
      b ^= a;
      c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
      c *= k1;
      d ^= c;
      s += 16;
      l -= 16;
    } while (l > 0);
  }
  a = HashLen16(a, c);
  b = HashLen16(d, b);
  return uint128(a ^ b, HashLen16(b, a));
 }
 uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
  if (len < 128) {
    return CityMurmur(s, len, seed);
  }
  // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
  // v, w, x, y, and z.
  pair<uint64, uint64> v, w;
  uint64 x = Uint128Low64(seed);
  uint64 y = Uint128High64(seed);
  uint64 z = len * k1;
  v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
  v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
  w.first = Rotate(y + z, 35) * k1 + x;
  w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
  // This is the same inner loop as CityHash64(), manually unrolled.
  do {
    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
    x ^= w.second;
    y += v.first + Fetch64(s + 40);
    z = Rotate(z + w.first, 33) * k1;
    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
    std::swap(z, x);
    s += 64;
    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
    x ^= w.second;
    y += v.first + Fetch64(s + 40);
    z = Rotate(z + w.first, 33) * k1;
    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
    std::swap(z, x);
    s += 64;
    len -= 128;
  } while (LIKELY(len >= 128));
  x += Rotate(v.first + z, 49) * k0;
  y = y * k0 + Rotate(w.second, 37);
  z = z * k0 + Rotate(w.first, 27);
  w.first *= 9;
  v.first *= k0;
  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
  for (size_t tail_done = 0; tail_done < len; ) {
    tail_done += 32;
    y = Rotate(x + y, 42) * k0 + v.second;
    w.first += Fetch64(s + len - tail_done + 16);
    x = x * k0 + w.first;
    z += w.second + Fetch64(s + len - tail_done);
    w.second += v.first;
    v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
    v.first *= k0;
  }
  // At this point our 56 bytes of state should contain more than
  // enough information for a strong 128-bit hash.  We use two
  // different 56-byte-to-8-byte hashes to get a 16-byte final result.
  x = HashLen16(x, v.first);
  y = HashLen16(y + z, w.first);
  return uint128(HashLen16(x + v.second, w.second) + y,
                 HashLen16(x + w.second, y + v.second));
 }
 uint128 CityHash128(const char *s, size_t len) {
  return len >= 16 ?
      CityHash128WithSeed(s + 16, len - 16,
                          uint128(Fetch64(s), Fetch64(s + 8) + k0)) :
      CityHash128WithSeed(s, len, uint128(k0, k1));
 }
 #ifdef __SSE4_2__
 #include <citycrc.h>
 #include <nmmintrin.h>
 // Requires len >= 240.
 static void CityHashCrc256Long(const char *s, size_t len,
                               uint32 seed, uint64 *result) {
  uint64 a = Fetch64(s + 56) + k0;
  uint64 b = Fetch64(s + 96) + k0;
  uint64 c = result[0] = HashLen16(b, len);
  uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
  uint64 e = Fetch64(s + 184) + seed;
  uint64 f = 0;
  uint64 g = 0;
  uint64 h = c + d;
  uint64 x = seed;
  uint64 y = 0;
  uint64 z = 0;
  // 240 bytes of input per iter.
  size_t iters = len / 240;
  len -= iters * 240;
  do {
 #undef CHUNK
 #define CHUNK(r)                                \
    PERMUTE3(x, z, y);                          \
    b += Fetch64(s);                            \
    c += Fetch64(s + 8);                        \
    d += Fetch64(s + 16);                       \
    e += Fetch64(s + 24);                       \
    f += Fetch64(s + 32);                       \
    a += b;                                     \
    h += f;                                     \
    b += c;                                     \
    f += d;                                     \
    g += e;                                     \
    e += z;                                     \
    g += x;                                     \
    z = _mm_crc32_u64(z, b + g);                \
    y = _mm_crc32_u64(y, e + h);                \
    x = _mm_crc32_u64(x, f + a);                \
    e = Rotate(e, r);                           \
    c += e;                                     \
    s += 40
    CHUNK(0); PERMUTE3(a, h, c);
    CHUNK(33); PERMUTE3(a, h, f);
    CHUNK(0); PERMUTE3(b, h, f);
    CHUNK(42); PERMUTE3(b, h, d);
    CHUNK(0); PERMUTE3(b, h, e);
    CHUNK(33); PERMUTE3(a, h, e);
  } while (--iters > 0);
  while (len >= 40) {
    CHUNK(29);
    e ^= Rotate(a, 20);
    h += Rotate(b, 30);
    g ^= Rotate(c, 40);
    f += Rotate(d, 34);
    PERMUTE3(c, h, g);
    len -= 40;
  }
  if (len > 0) {
    s = s + len - 40;
    CHUNK(33);
    e ^= Rotate(a, 43);
    h += Rotate(b, 42);
    g ^= Rotate(c, 41);
    f += Rotate(d, 40);
  }
  result[0] ^= h;
  result[1] ^= g;
  g += h;
  a = HashLen16(a, g + z);
  x += y << 32;
  b += x;
  c = HashLen16(c, z) + h;
  d = HashLen16(d, e + result[0]);
  g += e;
  h += HashLen16(x, f);
  e = HashLen16(a, d) + g;
  z = HashLen16(b, c) + a;
  y = HashLen16(g, h) + c;
  result[0] = e + z + y + x;
  a = ShiftMix((a + y) * k0) * k0 + b;
  result[1] += a + result[0];
  a = ShiftMix(a * k0) * k0 + c;
  result[2] = a + result[1];
  a = ShiftMix((a + e) * k0) * k0;
  result[3] = a + result[2];
 }
 // Requires len < 240.
 static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) {
  char buf[240];
  memcpy(buf, s, len);
  memset(buf + len, 0, 240 - len);
  CityHashCrc256Long(buf, 240, ~static_cast<uint32>(len), result);
 }
 void CityHashCrc256(const char *s, size_t len, uint64 *result) {
  if (LIKELY(len >= 240)) {
    CityHashCrc256Long(s, len, 0, result);
  } else {
    CityHashCrc256Short(s, len, result);
  }
 }
 uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
  if (len <= 900) {
    return CityHash128WithSeed(s, len, seed);
  } else {
    uint64 result[4];
    CityHashCrc256(s, len, result);
    uint64 u = Uint128High64(seed) + result[0];
    uint64 v = Uint128Low64(seed) + result[1];
    return uint128(HashLen16(u, v + result[2]),
                   HashLen16(Rotate(v, 32), u * k0 + result[3]));
  }
 }
 uint128 CityHashCrc128(const char *s, size_t len) {
  if (len <= 900) {
    return CityHash128(s, len);
  } else {
    uint64 result[4];
    CityHashCrc256(s, len, result);
    return uint128(result[2], result[3]);
  }
 }
 #endif
--- a/3rdParty/libcuckoo/include/cityhash/city.h
+++ b/3rdParty/libcuckoo/include/cityhash/city.h
@ -0,0 +1,112 @@
 // Copyright (c) 2011 Google, Inc.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.
 //
 // CityHash, by Geoff Pike and Jyrki Alakuijala
 //
 // http://code.google.com/p/cityhash/
 //
 // This file provides a few functions for hashing strings.  All of them are
 // high-quality functions in the sense that they pass standard tests such
 // as Austin Appleby's SMHasher.  They are also fast.
 //
 // For 64-bit x86 code, on short strings, we don't know of anything faster than
 // CityHash64 that is of comparable quality.  We believe our nearest competitor
 // is Murmur3.  For 64-bit x86 code, CityHash64 is an excellent choice for hash
 // tables and most other hashing (excluding cryptography).
 //
 // For 64-bit x86 code, on long strings, the picture is more complicated.
 // On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
 // CityHashCrc128 appears to be faster than all competitors of comparable
 // quality.  CityHash128 is also good but not quite as fast.  We believe our
 // nearest competitor is Bob Jenkins' Spooky.  We don't have great data for
 // other 64-bit CPUs, but for long strings we know that Spooky is slightly
 // faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
 // Note that CityHashCrc128 is declared in citycrc.h.
 //
 // For 32-bit x86 code, we don't know of anything faster than CityHash32 that
 // is of comparable quality.  We believe our nearest competitor is Murmur3A.
 // (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
 //
 // Functions in the CityHash family are not suitable for cryptography.
 //
 // Please see CityHash's README file for more details on our performance
 // measurements and so on.
 //
 // WARNING: This code has been only lightly tested on big-endian platforms!
 // It is known to work well on little-endian platforms that have a small penalty
 // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
 // It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
 // bug reports are welcome.
 //
 // By the way, for some hash functions, given strings a and b, the hash
 // of a+b is easily derived from the hashes of a and b.  This property
 // doesn't hold for any hash functions in this file.
 #ifndef CITY_HASH_H_
 #define CITY_HASH_H_
 #include <stdlib.h>  // for size_t.
 #include <stdint.h>
 #include <utility>
 typedef uint8_t uint8;
 typedef uint32_t uint32;
 typedef uint64_t uint64;
 typedef std::pair<uint64, uint64> uint128;
 inline uint64 Uint128Low64(const uint128& x) { return x.first; }
 inline uint64 Uint128High64(const uint128& x) { return x.second; }
 // Hash function for a byte array.
 uint64 CityHash64(const char *buf, size_t len);
 // Hash function for a byte array.  For convenience, a 64-bit seed is also
 // hashed into the result.
 uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
 // Hash function for a byte array.  For convenience, two seeds are also
 // hashed into the result.
 uint64 CityHash64WithSeeds(const char *buf, size_t len,
                           uint64 seed0, uint64 seed1);
 // Hash function for a byte array.
 uint128 CityHash128(const char *s, size_t len);
 // Hash function for a byte array.  For convenience, a 128-bit seed is also
 // hashed into the result.
 uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
 // Hash function for a byte array.  Most useful in 32-bit binaries.
 uint32 CityHash32(const char *buf, size_t len);
 // Hash 128 input bits down to 64 bits of output.
 // This is intended to be a reasonably good hash function.
 inline uint64 Hash128to64(const uint128& x) {
  // Murmur-inspired hashing.
  const uint64 kMul = 0x9ddfea08eb382d69ULL;
  uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
  a ^= (a >> 47);
  uint64 b = (Uint128High64(x) ^ a) * kMul;
  b ^= (b >> 47);
  b *= kMul;
  return b;
 }
 #endif  // CITY_HASH_H_
--- a/3rdParty/libcuckoo/include/cityhash/citycrc.h
+++ b/3rdParty/libcuckoo/include/cityhash/citycrc.h
@ -0,0 +1,43 @@
 // Copyright (c) 2011 Google, Inc.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.
 //
 // CityHash, by Geoff Pike and Jyrki Alakuijala
 //
 // This file declares the subset of the CityHash functions that require
 // _mm_crc32_u64().  See the CityHash README for details.
 //
 // Functions in the CityHash family are not suitable for cryptography.
 #ifndef CITY_HASH_CRC_H_
 #define CITY_HASH_CRC_H_
 #include <city.h>
 // Hash function for a byte array.
 uint128 CityHashCrc128(const char *s, size_t len);
 // Hash function for a byte array.  For convenience, a 128-bit seed is also
 // hashed into the result.
 uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed);
 // Hash function for a byte array.  Sets result[0] ... result[3].
 void CityHashCrc256(const char *s, size_t len, uint64 *result);
 #endif  // CITY_HASH_CRC_H_
--- a/3rdParty/libcuckoo/include/libcuckoo/city_hasher.hh
+++ b/3rdParty/libcuckoo/include/libcuckoo/city_hasher.hh
@ -0,0 +1,40 @@
 #ifndef _CITY_HASHER_HH
 #define _CITY_HASHER_HH
 #include <cityhash/city.h>
 #include <string>
 /*! CityHasher is a std::hash-style wrapper around CityHash. We
 *  encourage using CityHasher instead of the default std::hash if
 *  possible. */
 template <class Key>
 class CityHasher {
 public:
    size_t operator()(const Key& k) const {
        if (sizeof(size_t) < 8) {
            return CityHash32((const char*) &k, sizeof(k));
        }
        /* Although the following line should be optimized away on 32-bit
         * builds, the cast is still necessary to stop MSVC emitting a
         * truncation warning. */
        return static_cast<size_t>(CityHash64((const char*) &k, sizeof(k)));
    }
 };
 /*! This is a template specialization of CityHasher for
 *  std::string. */
 template <>
 class CityHasher<std::string> {
 public:
    size_t operator()(const std::string& k) const {
        if (sizeof(size_t) < 8) {
            return CityHash32(k.c_str(), k.size());
        }
        /* Although the following line should be optimized away on 32-bit
         * builds, the cast is still necessary to stop MSVC emitting a
         * truncation warning. */
        return static_cast<size_t>(CityHash64(k.c_str(), k.size()));
    }
 };
 #endif // _CITY_HASHER_HH
--- a/3rdParty/libcuckoo/include/libcuckoo/cuckoohash_config.hh
+++ b/3rdParty/libcuckoo/include/libcuckoo/cuckoohash_config.hh
@ -0,0 +1,34 @@
 /** \file */
 #ifndef _CUCKOOHASH_CONFIG_HH
 #define _CUCKOOHASH_CONFIG_HH
 #include <cstddef>
 //! The default maximum number of keys per bucket
 const size_t DEFAULT_SLOT_PER_BUCKET = 4;
 //! The default number of elements in an empty hash table
 const size_t DEFAULT_SIZE = (1U << 16) * DEFAULT_SLOT_PER_BUCKET;
 //! On a scale of 0 to 16, the memory granularity of the locks array. 0 is the
 //! least granular, meaning the array is a contiguous array and thus offers the
 //! best performance but the greatest memory overhead. 16 is the most granular,
 //! offering the least memory overhead but worse performance.
 const size_t LOCK_ARRAY_GRANULARITY = 0;
 //! The default minimum load factor that the table allows for automatic
 //! expansion. It must be a number between 0.0 and 1.0. The table will throw
 //! libcuckoo_load_factor_too_low if the load factor falls below this value
 //! during an automatic expansion.
 const double DEFAULT_MINIMUM_LOAD_FACTOR = 0.05;
 //! An alias for the value that sets no limit on the maximum hashpower. If this
 //! value is set as the maximum hashpower limit, there will be no limit. Since 0
 //! is the only hashpower that can never occur, it should stay at 0.
 const size_t NO_MAXIMUM_HASHPOWER = 0;
 //! set LIBCUCKOO_DEBUG to 1 to enable debug output
 #define LIBCUCKOO_DEBUG 0
 #endif // _CUCKOOHASH_CONFIG_HH
--- a/3rdParty/libcuckoo/include/libcuckoo/cuckoohash_map.hh
+++ b/3rdParty/libcuckoo/include/libcuckoo/cuckoohash_map.hh
--- a/3rdParty/libcuckoo/include/libcuckoo/cuckoohash_util.hh
+++ b/3rdParty/libcuckoo/include/libcuckoo/cuckoohash_util.hh
@ -0,0 +1,185 @@
 /** \file */
 #ifndef _CUCKOOHASH_UTIL_HH
 #define _CUCKOOHASH_UTIL_HH
 #include <exception>
 #include <thread>
 #include <vector>
 #include "cuckoohash_config.hh" // for LIBCUCKOO_DEBUG
 #if LIBCUCKOO_DEBUG
 #  define LIBCUCKOO_DBG(fmt, ...)                                          \
     fprintf(stderr, "\x1b[32m""[libcuckoo:%s:%d:%lu] " fmt"" "\x1b[0m",   \
             __FILE__,__LINE__, (unsigned long)std::this_thread::get_id(), \
             __VA_ARGS__)
 #else
 #  define LIBCUCKOO_DBG(fmt, ...)  do {} while (0)
 #endif
 /**
 * alignas() requires GCC >= 4.9, so we stick with the alignment attribute for
 * GCC.
 */
 #ifdef __GNUC__
 #define LIBCUCKOO_ALIGNAS(x) __attribute__((aligned(x)))
 #else
 #define LIBCUCKOO_ALIGNAS(x) alignas(x)
 #endif
 /**
 * At higher warning levels, MSVC produces an annoying warning that alignment
 * may cause wasted space: "structure was padded due to __declspec(align())".
 */
 #ifdef _MSC_VER
 #define LIBCUCKOO_SQUELCH_PADDING_WARNING __pragma(warning(suppress : 4324))
 #else
 #define LIBCUCKOO_SQUELCH_PADDING_WARNING
 #endif
 /**
 * thread_local requires GCC >= 4.8 and is not supported in some clang versions,
 * so we use __thread if thread_local is not supported
 */
 #define LIBCUCKOO_THREAD_LOCAL thread_local
 #if defined(__clang__)
 #  if !__has_feature(cxx_thread_local)
 #    undef LIBCUCKOO_THREAD_LOCAL
 #    define LIBCUCKOO_THREAD_LOCAL __thread
 #  endif
 #elif defined(__GNUC__)
 #  if __GNUC__ == 4 && __GNUC_MINOR__ < 8
 #    undef LIBCUCKOO_THREAD_LOCAL
 #    define LIBCUCKOO_THREAD_LOCAL __thread
 #  endif
 #endif
 // For enabling certain methods based on a condition. Here's an example.
 // ENABLE_IF(some_cond, type, static, inline) method() {
 //     ...
 // }
 #define ENABLE_IF(preamble, condition, return_type)                     \
    template <class Bogus=void*>                                        \
    preamble typename std::enable_if<sizeof(Bogus) &&                   \
        condition, return_type>::type
 /**
 * Thrown when an automatic expansion is triggered, but the load factor of the
 * table is below a minimum threshold, which can be set by the \ref
 * cuckoohash_map::minimum_load_factor method. This can happen if the hash
 * function does not properly distribute keys, or for certain adversarial
 * workloads.
 */
 class libcuckoo_load_factor_too_low : public std::exception {
 public:
    /**
     * Constructor
     *
     * @param lf the load factor of the table when the exception was thrown
     */
    libcuckoo_load_factor_too_low(const double lf)
        : load_factor_(lf) {}
    virtual const char* what() const noexcept override {
        return "Automatic expansion triggered when load factor was below "
            "minimum threshold";
    }
    /**
     * @return the load factor of the table when the exception was thrown
     */
    double load_factor() {
        return load_factor_;
    }
 private:
    const double load_factor_;
 };
 /**
 * Thrown when an expansion is triggered, but the hashpower specified is greater
 * than the maximum, which can be set with the \ref
 * cuckoohash_map::maximum_hashpower method.
 */
 class libcuckoo_maximum_hashpower_exceeded : public std::exception {
 public:
    /**
     * Constructor
     *
     * @param hp the hash power we were trying to expand to
     */
    libcuckoo_maximum_hashpower_exceeded(const size_t hp)
        : hashpower_(hp) {}
    virtual const char* what() const noexcept override {
        return "Expansion beyond maximum hashpower";
    }
    /**
     * @return the hashpower we were trying to expand to
     */
    size_t hashpower() {
        return hashpower_;
    }
 private:
    const size_t hashpower_;
 };
 // Allocates an array of the given size and value-initializes each element with
 // the 0-argument constructor
 template <class T, class Alloc>
 T* create_array(const size_t size) {
    Alloc allocator;
    T* arr = allocator.allocate(size);
    // Initialize all the elements, safely deallocating and destroying
    // everything in case of error.
    size_t i;
    try {
        for (i = 0; i < size; ++i) {
            allocator.construct(&arr[i]);
        }
    } catch (...) {
        for (size_t j = 0; j < i; ++j) {
            allocator.destroy(&arr[j]);
        }
        allocator.deallocate(arr, size);
        throw;
    }
    return arr;
 }
 // Destroys every element of an array of the given size and then deallocates the
 // memory.
 template <class T, class Alloc>
 void destroy_array(T* arr, const size_t size) {
    Alloc allocator;
    for (size_t i = 0; i < size; ++i) {
        allocator.destroy(&arr[i]);
    }
    allocator.deallocate(arr, size);
 }
 // executes the function over the given range split over num_threads threads
 template <class F>
 static void parallel_exec(size_t start, size_t end,
                          size_t num_threads, F func) {
    size_t work_per_thread = (end - start) / num_threads;
    std::vector<std::thread> threads(num_threads);
    std::vector<std::exception_ptr> eptrs(num_threads, nullptr);
    for (size_t i = 0; i < num_threads - 1; ++i) {
        threads[i] = std::thread(func, start, start + work_per_thread,
                                 std::ref(eptrs[i]));
        start += work_per_thread;
    }
    threads[num_threads - 1] = std::thread(
        func, start, end, std::ref(eptrs[num_threads - 1]));
    for (std::thread& t : threads) {
        t.join();
    }
    for (std::exception_ptr& eptr : eptrs) {
        if (eptr) {
            std::rethrow_exception(eptr);
        }
    }
 }
 #endif // _CUCKOOHASH_UTIL_HH
--- a/3rdParty/libcuckoo/include/libcuckoo/default_hasher.hh
+++ b/3rdParty/libcuckoo/include/libcuckoo/default_hasher.hh
@ -0,0 +1,29 @@
 #ifndef _DEFAULT_HASHER_HH
 #define _DEFAULT_HASHER_HH
 #include <string>
 #include <type_traits>
 /*! DefaultHasher is the default hash class used in the table. It overloads a
 *  few types that std::hash does badly on (namely integers), and falls back to
 *  std::hash for anything else. */
 template <class Key>
 class DefaultHasher {
    std::hash<Key> fallback;
 public:
    template <class T = Key>
    typename std::enable_if<std::is_integral<T>::value, size_t>::type
    operator()(const Key& k) const {
        // This constant is found in the CityHash code
        return k * 0x9ddfea08eb382d69ULL;
    }
    template <class T = Key>
    typename std::enable_if<!std::is_integral<T>::value, size_t>::type
    operator()(const Key& k) const {
        return fallback(k);
    }
 };
 #endif // _DEFAULT_HASHER_HH
--- a/3rdParty/libcuckoo/include/libcuckoo/lazy_array.hh
+++ b/3rdParty/libcuckoo/include/libcuckoo/lazy_array.hh
@ -0,0 +1,119 @@
 /** \file */
 #ifndef _LAZY_ARRAY_HH
 #define _LAZY_ARRAY_HH
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
 #include <memory>
 #include "cuckoohash_util.hh"
 // lazy array. A fixed-size array, broken up into segments that are dynamically
 // allocated, only when requested. The array size and segment size are
 // pre-defined, and are powers of two. The user must make sure the necessary
 // segments are allocated before accessing the array.
 template <uint8_t OFFSET_BITS, uint8_t SEGMENT_BITS,
          class T, class Alloc = std::allocator<T>
          >
 class lazy_array {
    static_assert(SEGMENT_BITS + OFFSET_BITS <= sizeof(size_t)*8,
                  "The number of segment and offset bits cannot exceed "
                  " the number of bits in a size_t");
 private:
    static const size_t SEGMENT_SIZE = 1UL << OFFSET_BITS;
    static const size_t NUM_SEGMENTS = 1UL << SEGMENT_BITS;
    // The segments array itself is mutable, so that the const subscript
    // operator can still add segments
    mutable std::array<T*, NUM_SEGMENTS> segments_;
    void move_other_array(lazy_array&& arr) {
        clear();
        std::copy(arr.segments_.begin(), arr.segments_.end(),
                  segments_.begin());
        std::fill(arr.segments_.begin(), arr.segments_.end(), nullptr);
    }
    inline size_t get_segment(size_t i) {
        return i >> OFFSET_BITS;
    }
    static const size_t OFFSET_MASK = ((1UL << OFFSET_BITS) - 1);
    inline size_t get_offset(size_t i) {
        return i & OFFSET_MASK;
    }
 public:
    lazy_array(): segments_{{nullptr}} {}
    // No copying
    lazy_array(const lazy_array&) = delete;
    lazy_array& operator=(const lazy_array&) = delete;
    // Moving is allowed
    lazy_array(lazy_array&& arr) : segments_{{nullptr}} {
        move_other_array(std::move(arr));
    }
    lazy_array& operator=(lazy_array&& arr) {
        move_other_vector(std::move(arr));
        return *this;
    }
    ~lazy_array() {
        clear();
    }
    void clear() {
        for (size_t i = 0; i < segments_.size(); ++i) {
            if (segments_[i] != nullptr) {
                destroy_array<T, Alloc>(segments_[i], SEGMENT_SIZE);
                segments_[i] = nullptr;
            }
        }
    }
    T& operator[](size_t i) {
        assert(segments_[get_segment(i)] != nullptr);
        return segments_[get_segment(i)][get_offset(i)];
    }
    const T& operator[](size_t i) const {
        assert(segments_[get_segment(i)] != nullptr);
        return segments_[get_segment(i)][get_offset(i)];
    }
    // Ensures that the array has enough segments to index target elements, not
    // exceeding the total size. The user must ensure that the array is properly
    // allocated before accessing a certain index. This saves having to check
    // every index operation.
    void allocate(size_t target) {
        assert(target <= size());
        if (target == 0) {
            return;
        }
        const size_t last_segment = get_segment(target - 1);
        for (size_t i = 0; i <= last_segment; ++i) {
            if (segments_[i] == nullptr) {
                segments_[i] = create_array<T, Alloc>(SEGMENT_SIZE);
            }
        }
    }
    // Returns the number of elements in the array that can be indexed, starting
    // contiguously from the beginning.
    size_t allocated_size() const {
        size_t num_allocated_segments = 0;
        for (;
             (num_allocated_segments < NUM_SEGMENTS &&
              segments_[num_allocated_segments] != nullptr);
             ++num_allocated_segments) {}
        return num_allocated_segments * SEGMENT_SIZE;
    }
    static constexpr size_t size() {
        return 1UL << (OFFSET_BITS + SEGMENT_BITS);
    }
 };
 #endif // _LAZY_ARRAY_HH
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -376,6 +376,7 @@ include_directories(${PROJECT_SOURCE_DIR}/3rdParty/linenoise-ng/include)
 include_directories(${PROJECT_SOURCE_DIR}/3rdParty/linenoise-ng/src)
 include_directories(${PROJECT_SOURCE_DIR}/3rdParty/velocypack/include)
 include_directories(${PROJECT_SOURCE_DIR}/3rdParty/rocksdb/rocksdb/include)
 include_directories(${PROJECT_SOURCE_DIR}/3rdParty/libcuckoo/include)
 include_directories(${PROJECT_BINARY_DIR})
 include_directories(${PROJECT_BINARY_DIR}/lib)
--- a/arangod/CMakeLists.txt
+++ b/arangod/CMakeLists.txt
@ -360,7 +360,7 @@ SET(ARANGOD_SOURCES
  Wal/Slot.cpp
  Wal/Slots.cpp
  Wal/SynchronizerThread.cpp
-  Pregel/AggregatorUsage.cpp
+  Pregel/AggregatorHandler.cpp
  Pregel/AlgoRegistry.cpp
  Pregel/Algos/SSSP.cpp
  Pregel/Algos/PageRank.cpp
@ -396,6 +396,7 @@ target_link_libraries(${BIN_ARANGOD}
  ${SYSTEM_LIBRARIES}
  boost_boost
  boost_system
  libcuckoo
 )
 install(
--- a/arangod/Pregel/Aggregator.h
+++ b/arangod/Pregel/Aggregator.h
@ -50,12 +50,13 @@ class Aggregator {
  // virtual void setValue(VPackSlice slice) = 0;
  virtual VPackValue vpackValue() = 0;
-  virtual void reset() {};
+  virtual void reset(){};
-  virtual bool isPermanent() {return _permanent;}
+  bool isPermanent() { return _permanent; }
 };
 class FloatMaxAggregator : public Aggregator {
  float _value, _initial;
 public:
  FloatMaxAggregator(float init) : _value(init), _initial(init) {}
@ -77,50 +78,43 @@ class FloatMaxAggregator : public Aggregator {
  void reset() override { _value = _initial; }
 };
-template<typename T>
+template <typename T>
 class ValueAggregator : public Aggregator {
  static_assert(std::is_arithmetic<T>::value, "Type must be numeric");
-  
+
  T _value;
-public:
+
 public:
  ValueAggregator(T val) : Aggregator(true), _value(val) {}
-  
+
-  void aggregate(void const* valuePtr) override {
+  void aggregate(void const* valuePtr) override { _value = *((T*)valuePtr); };
-    _value = *((T*)valuePtr);
+  void aggregate(VPackSlice slice) override { _value = slice.getNumber<T>(); }
-  };
+
  void aggregate(VPackSlice slice) override {
    _value = slice.getNumber<T>();
  }
  void const* getValue() const override { return &_value; };
  /*void setValue(VPackSlice slice) override {
   _value = (float)slice.getDouble();
   }*/
  VPackValue vpackValue() override { return VPackValue(_value); };
 };
-  
+
-template<typename T>
+template <typename T>
 class SumAggregator : public Aggregator {
  static_assert(std::is_arithmetic<T>::value, "Type must be numeric");
-  
+
  T _value;
-public:
+
 public:
  SumAggregator(T val) : Aggregator(true), _value(val) {}
-  
+
-  void aggregate(void const* valuePtr) override {
+  void aggregate(void const* valuePtr) override { _value += *((T*)valuePtr); };
-    _value += *((T*)valuePtr);
+  void aggregate(VPackSlice slice) override { _value += slice.getNumber<T>(); }
-  };
+
  void aggregate(VPackSlice slice) override {
    _value += slice.getNumber<T>();
  }
  void const* getValue() const override { return &_value; };
  /*void setValue(VPackSlice slice) override {
   _value = (float)slice.getDouble();
   }*/
  VPackValue vpackValue() override { return VPackValue(_value); };
 };
 }
 }
 #endif
--- a/arangod/Pregel/AggregatorHandler.cpp
+++ b/arangod/Pregel/AggregatorHandler.cpp
@ -20,21 +20,22 @@
 /// @author Simon Grätzer
 ////////////////////////////////////////////////////////////////////////////////
-#include "Pregel/AggregatorUsage.h"
+#include "Pregel/AggregatorHandler.h"
 #include "Pregel/Aggregator.h"
 #include "Pregel/Algorithm.h"
 using namespace arangodb;
 using namespace arangodb::pregel;
-AggregatorUsage::~AggregatorUsage() {
+AggregatorHandler::~AggregatorHandler() {
  for (auto const& it : _values) {
    delete it.second;
  }
  _values.clear();
 }
-void AggregatorUsage::aggregate(std::string const& name, const void* valuePtr) {
+void AggregatorHandler::aggregate(std::string const& name,
                                  const void* valuePtr) {
  auto it = _values.find(name);
  if (it != _values.end()) {
    it->second->aggregate(valuePtr);
@ -48,7 +49,8 @@ void AggregatorUsage::aggregate(std::string const& name, const void* valuePtr) {
  }
 }
-const void* AggregatorUsage::getAggregatedValue(std::string const& name) const {
+const void* AggregatorHandler::getAggregatedValue(
    std::string const& name) const {
  auto const& it = _values.find(name);
  if (it != _values.end()) {
    return it->second->getValue();
@ -56,7 +58,7 @@ const void* AggregatorUsage::getAggregatedValue(std::string const& name) const {
  return nullptr;
 }
-void AggregatorUsage::resetValues() {
+void AggregatorHandler::resetValues() {
  for (auto& it : _values) {
    if (!it.second->isPermanent()) {
      it.second->reset();
@ -64,7 +66,7 @@ void AggregatorUsage::resetValues() {
  }
 }
-void AggregatorUsage::aggregateValues(AggregatorUsage const& workerValues) {
+void AggregatorHandler::aggregateValues(AggregatorHandler const& workerValues) {
  for (auto const& pair : workerValues._values) {
    std::string const& name = pair.first;
    auto my = _values.find(name);
@ -81,7 +83,7 @@ void AggregatorUsage::aggregateValues(AggregatorUsage const& workerValues) {
  }
 }
-void AggregatorUsage::aggregateValues(VPackSlice workerValues) {
+void AggregatorHandler::aggregateValues(VPackSlice workerValues) {
  for (auto const& keyValue : VPackObjectIterator(workerValues)) {
    std::string name = keyValue.key.copyString();
    auto const& it = _values.find(name);
@ -98,10 +100,10 @@ void AggregatorUsage::aggregateValues(VPackSlice workerValues) {
  }
 }
-void AggregatorUsage::serializeValues(VPackBuilder& b) const {
+void AggregatorHandler::serializeValues(VPackBuilder& b) const {
  for (auto const& pair : _values) {
    b.add(pair.first, pair.second->vpackValue());
  }
 }
-size_t AggregatorUsage::size() { return _values.size(); }
+size_t AggregatorHandler::size() { return _values.size(); }
--- a/arangod/Pregel/AggregatorHandler.h
+++ b/arangod/Pregel/AggregatorHandler.h
@ -20,31 +20,32 @@
 /// @author Simon Grätzer
 ////////////////////////////////////////////////////////////////////////////////
-#ifndef ARANGODB_PREGEL_AGGRGS_USAGE_H
+#ifndef ARANGODB_PREGEL_AGGREGATOR_HANDLER_H
-#define ARANGODB_PREGEL_AGGRGS_USAGE_H 1
+#define ARANGODB_PREGEL_AGGREGATOR_HANDLER_H 1
-#include <velocypack/vpack.h>
+#include <velocypack/Builder.h>
 #include <velocypack/Slice.h>
 #include <velocypack/velocypack-aliases.h>
 #include <functional>
 #include <map>
 namespace arangodb {
 namespace pregel {
-  
+
 struct IAlgorithm;
 class Aggregator;
-class AggregatorUsage {
+class AggregatorHandler {
  const IAlgorithm* _create;
  std::map<std::string, Aggregator*> _values;
 public:
-  AggregatorUsage(const IAlgorithm* c) : _create(c) {}
+  AggregatorHandler(const IAlgorithm* c) : _create(c) {}
-  ~AggregatorUsage();
+  ~AggregatorHandler();
  void aggregate(std::string const& name, const void* valuePtr);
  const void* getAggregatedValue(std::string const& name) const;
  void resetValues();
-  void aggregateValues(AggregatorUsage const& workerValues);
+  void aggregateValues(AggregatorHandler const& workerValues);
  void aggregateValues(VPackSlice workerValues);
  void serializeValues(VPackBuilder& b) const;
  size_t size();
--- a/arangod/Pregel/AlgoRegistry.h
+++ b/arangod/Pregel/AlgoRegistry.h
@ -31,11 +31,14 @@ struct TRI_vocbase_t;
 namespace arangodb {
 namespace pregel {
 struct AlgoRegistry {
-  static IAlgorithm* createAlgorithm(std::string const& algorithm, VPackSlice userParams);
+  static IAlgorithm* createAlgorithm(std::string const& algorithm,
                                     VPackSlice userParams);
  static IWorker* createWorker(TRI_vocbase_t* vocbase, VPackSlice body);
-private:
+
 private:
  template <typename V, typename E, typename M>
-  static IWorker* createWorker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo, VPackSlice body);
+  static IWorker* createWorker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo,
                               VPackSlice body);
 };
 }
 }
--- a/arangod/Pregel/Algorithm.h
+++ b/arangod/Pregel/Algorithm.h
@ -30,9 +30,9 @@
 #include "Basics/Common.h"
 #include "GraphFormat.h"
 #include "MasterContext.h"
 #include "MessageCombiner.h"
 #include "MessageFormat.h"
 #include "MasterContext.h"
 #include "WorkerContext.h"
 namespace arangodb {
@ -40,7 +40,7 @@ namespace pregel {
 template <typename V, typename E, typename M>
 class VertexComputation;
-  
+
 template <typename V, typename E, typename M>
 class VertexCompensation;
@ -48,31 +48,30 @@ class Aggregator;
 struct IAlgorithm {
  virtual ~IAlgorithm() {}
-  
+
  // virtual bool isFixpointAlgorithm() const {return false;}
-  // virtual bool preserveTransactions() const { return false; }
+
-  virtual bool supportsCompensation() const {
+  virtual bool supportsAsyncMode() const { return false; }
-    return false;
+
-  }
+  virtual bool supportsCompensation() const { return false; }
-  
+
  virtual Aggregator* aggregator(std::string const& name) const {
    return nullptr;
  }
-  
+
  virtual MasterContext* masterContext(VPackSlice userParams) const {
    return nullptr;
  }
-  
+
  std::string const& name() const { return _name; }
-  
+
-protected:
+ protected:
  IAlgorithm(std::string const& name) : _name(name){};
-  
+
-private:
+ private:
  std::string _name;
 };
 // specify serialization, whatever
 template <typename V, typename E, typename M>
 struct Algorithm : IAlgorithm {
@ -87,7 +86,8 @@ struct Algorithm : IAlgorithm {
  virtual VertexCompensation<V, E, M>* createCompensation(uint64_t gss) const {
    return nullptr;
  }
-protected:
+
 protected:
  Algorithm(std::string const& name) : IAlgorithm(name){};
 };
--- a/arangod/Pregel/Algos/PageRank.h
+++ b/arangod/Pregel/Algos/PageRank.h
@ -36,6 +36,10 @@ struct PageRankAlgorithm : public SimpleAlgorithm<float, float, float> {
 public:
  PageRankAlgorithm(arangodb::velocypack::Slice params);
  bool supportsAsyncMode() const override { return true; }
  bool supportsCompensation() const override { return true; }
  MasterContext* masterContext(VPackSlice userParams) const override;
  GraphFormat<float, float>* inputFormat() const override;
  MessageFormat<float>* messageFormat() const override;
@ -44,8 +48,6 @@ struct PageRankAlgorithm : public SimpleAlgorithm<float, float, float> {
      const override;
  VertexCompensation<float, float, float>* createCompensation(uint64_t gss) const override;
  Aggregator* aggregator(std::string const& name) const override;
  MasterContext* masterContext(VPackSlice userParams) const override;
 };
 }
 }
--- a/arangod/Pregel/Conductor.cpp
+++ b/arangod/Pregel/Conductor.cpp
@ -67,7 +67,7 @@ void Conductor::start(std::string const& algoName, VPackSlice userConfig) {
  } else {
    _userParams.add(userConfig);
  }
-
+  
  _startTimeSecs = TRI_microtime();
  _globalSuperstep = 0;
  _state = ExecutionState::RUNNING;
@ -76,7 +76,11 @@ void Conductor::start(std::string const& algoName, VPackSlice userConfig) {
    THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER,
                                   "Algorithm not found");
  }
-  _aggregatorUsage.reset(new AggregatorUsage(_algorithm.get()));
+  _aggregators.reset(new AggregatorHandler(_algorithm.get()));
  // configure the async mode as optional
  VPackSlice async = _userParams.slice().get("async");
  _asyncMode = _algorithm->supportsAsyncMode();
  _asyncMode = _asyncMode && (async.isNone() || async.getBoolean());
  int res = _initializeWorkers(Utils::startExecutionPath, VPackSlice());
  if (res != TRI_ERROR_NO_ERROR) {
@ -92,15 +96,15 @@ bool Conductor::_startGlobalStep() {
  b.openObject();
  b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
  b.add(Utils::globalSuperstepKey, VPackValue(_globalSuperstep));
-  if (_aggregatorUsage->size() > 0) {
+  if (_aggregators->size() > 0) {
    b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object));
-    _aggregatorUsage->serializeValues(b);
+    _aggregators->serializeValues(b);
    b.close();
  }
  b.close();
  // reset values which are calculated during the superstep
-  _aggregatorUsage->resetValues();
+  _aggregators->resetValues();
  _workerStats.activeCount = 0;
  // first allow all workers to run worker level operations
@ -128,11 +132,11 @@ void Conductor::finishedWorkerStartup(VPackSlice& data) {
    LOG(WARN) << "We are not in a state where we expect a response";
    return;
  }
-  _ensureCorrectness(data);
+  _ensureUniqueResponse(data);
  if (_respondedServers.size() != _dbServers.size()) {
    return;
  }
-  
+
  if (_startGlobalStep()) {
    // listens for changing primary DBServers on each collection shard
    RecoveryManager* mngr = PregelFeature::instance()->recoveryManager();
@ -142,31 +146,38 @@ void Conductor::finishedWorkerStartup(VPackSlice& data) {
  }
 }
-void Conductor::finishedGlobalStep(VPackSlice& data) {
+void Conductor::finishedWorkerStep(VPackSlice& data) {
  MUTEX_LOCKER(guard, _callbackMutex);
-
+  // this method can be called multiple times in a superstep depending on
  // whether we are in the async mode
  uint64_t gss = data.get(Utils::globalSuperstepKey).getUInt();
-  if (gss != _globalSuperstep) {
+  if (gss != _globalSuperstep ||
      !(_state == ExecutionState::RUNNING ||
        _state == ExecutionState::CANCELED)) {
    LOG(WARN) << "Conductor did received a callback from the wrong superstep";
    return;
  }
-  _ensureCorrectness(data);
+  VPackSlice slice = data.get(Utils::gssDone);
-
+  bool gssDone = slice.isBool() && slice.getBool();
-  // collect worker information
+  if (!_asyncMode || gssDone) {
-  VPackSlice workerValues = data.get(Utils::aggregatorValuesKey);
+    _ensureUniqueResponse(data);
-  if (workerValues.isObject()) {
+    
-    _aggregatorUsage->aggregateValues(workerValues);
+    // collect worker information
    slice = data.get(Utils::aggregatorValuesKey);
    if (slice.isObject()) {
      _aggregators->aggregateValues(slice);
    }
    _workerStats.accumulate(data);
  }
-  _workerStats.accumulate(data);
+  
  if (_respondedServers.size() != _dbServers.size()) {
    return;
  }
  bool proceed = true;
-  if (_masterContext) { // ask algorithm to evaluate aggregated values
+  if (_masterContext) {  // ask algorithm to evaluate aggregated values
    proceed = _masterContext->postGlobalSuperstep(_globalSuperstep);
  }
-  
+
  LOG(INFO) << "Finished gss " << _globalSuperstep;
  _globalSuperstep++;
@ -178,7 +189,7 @@ void Conductor::finishedGlobalStep(VPackSlice& data) {
  proceed = proceed && _globalSuperstep <= 100;
  if (proceed && !workersDone && _state == ExecutionState::RUNNING) {
-    _startGlobalStep();// trigger next superstep
+    _startGlobalStep();  // trigger next superstep
  } else if (_state == ExecutionState::RUNNING ||
             _state == ExecutionState::CANCELED) {
    if (_state == ExecutionState::CANCELED) {
@ -195,7 +206,7 @@ void Conductor::finishedGlobalStep(VPackSlice& data) {
    // tells workers to store / discard results
    _finalizeWorkers();
-  } else {// this prop shouldn't occur,
+  } else {  // this prop shouldn't occur,
    LOG(WARN) << "No further action taken after receiving all responses";
  }
 }
@ -206,12 +217,11 @@ void Conductor::finishedRecovery(VPackSlice& data) {
    LOG(WARN) << "We are not in a state where we expect a recovery response";
    return;
  }
-  _ensureCorrectness(data);
+  _ensureUniqueResponse(data);
  if (_respondedServers.size() != _dbServers.size()) {
    return;
  }
-  
+
  if (_algorithm->supportsCompensation()) {
    bool proceed = false;
    if (_masterContext) {
@ -222,15 +232,15 @@ void Conductor::finishedRecovery(VPackSlice& data) {
      b.openObject();
      b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
      b.add(Utils::globalSuperstepKey, VPackValue(_globalSuperstep));
-      if (_aggregatorUsage->size() > 0) {
+      if (_aggregators->size() > 0) {
        b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object));
-        _aggregatorUsage->serializeValues(b);
+        _aggregators->serializeValues(b);
        b.close();
      }
      b.close();
      // reset values which are calculated during the superstep
-      _aggregatorUsage->resetValues();
+      _aggregators->resetValues();
      _workerStats.activeCount = 0;
      // first allow all workers to run worker level operations
@ -248,8 +258,10 @@ void Conductor::finishedRecovery(VPackSlice& data) {
 }
 void Conductor::cancel() {
-  
+  if (_state == ExecutionState::RUNNING ||
-  if (_state == ExecutionState::RUNNING || _state == ExecutionState::RECOVERING) {
+      _state == ExecutionState::RECOVERING) {
    _state = ExecutionState::CANCELED;
    VPackBuilder b;
    b.openObject();
    b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
@ -257,7 +269,6 @@ void Conductor::cancel() {
    b.close();
    _sendToAllDBServers(Utils::cancelGSSPath, b.slice());
  }
  _state = ExecutionState::CANCELED;
  // stop monitoring shards
  RecoveryManager* mngr = PregelFeature::instance()->recoveryManager();
@ -296,7 +307,7 @@ void Conductor::startRecovery() {
      cancel();
      return;
    }
-    
+
    VPackBuilder b;
    b.openObject();
    b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
@ -304,8 +315,9 @@ void Conductor::startRecovery() {
    b.close();
    _dbServers = goodServers;
    _sendToAllDBServers(Utils::cancelGSSPath, b.slice());
    usleep(5 * 1000000);// workers may need a little bit
-    
+    // Let's try recovery
    if (_algorithm->supportsCompensation()) {
      if (_masterContext) {
        _masterContext->preCompensation(_globalSuperstep);
@ -314,13 +326,13 @@ void Conductor::startRecovery() {
      VPackBuilder b;
      b.openObject();
      b.add(Utils::recoveryMethodKey, VPackValue(Utils::compensate));
-      if (_aggregatorUsage->size() > 0) {
+      if (_aggregators->size() > 0) {
        b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object));
-        _aggregatorUsage->serializeValues(b);
+        _aggregators->serializeValues(b);
        b.close();
      }
      b.close();
-      _aggregatorUsage->resetValues();
+      _aggregators->resetValues();
      _workerStats.activeCount = 0;
      // initialize workers will reconfigure the workers and set the
@ -396,7 +408,7 @@ int Conductor::_initializeWorkers(std::string const& suffix,
  if (_masterContext && _masterContext->_vertexCount == 0) {
    _masterContext->_vertexCount = vertexCount;
    _masterContext->_edgeCount = edgeCount;
-    _masterContext->_aggregators = _aggregatorUsage.get();
+    _masterContext->_aggregators = _aggregators.get();
    _masterContext->preApplication();
  }
@ -517,7 +529,7 @@ int Conductor::_sendToAllDBServers(std::string const& suffix,
  return nrGood == requests.size() ? TRI_ERROR_NO_ERROR : TRI_ERROR_FAILED;
 }
-void Conductor::_ensureCorrectness(VPackSlice body) {
+void Conductor::_ensureUniqueResponse(VPackSlice body) {
  // check if this the only time we received this
  ServerID sender = body.get(Utils::senderKey).copyString();
  if (_respondedServers.find(sender) != _respondedServers.end()) {
--- a/arangod/Pregel/Conductor.h
+++ b/arangod/Pregel/Conductor.h
@ -35,55 +35,61 @@ namespace arangodb {
 class RestPregelHandler;
 namespace pregel {
-enum ExecutionState { DEFAULT,// before calling start
+enum ExecutionState {
-  RUNNING,// during normal operation
+  DEFAULT,    // before calling start
-  DONE,// after everyting is done
+  RUNNING,    // during normal operation
-  CANCELED,// after an error or manual canceling
+  DONE,       // after everyting is done
-  RECOVERING// during recovery
+  CANCELED,   // after an error or manual canceling
  RECOVERING  // during recovery
 };
-  
+
 class MasterContext;
-class AggregatorUsage;
+class AggregatorHandler;
 struct IAlgorithm;
 class Conductor {
  friend class arangodb::RestPregelHandler;
-  
+
  ExecutionState _state = ExecutionState::DEFAULT;
  const VocbaseGuard _vocbaseGuard;
  const uint64_t _executionNumber;
  std::unique_ptr<IAlgorithm> _algorithm;
  VPackBuilder _userParams;
  Mutex _callbackMutex;  // prevents concurrent calls to finishedGlobalStep
-  
+
  std::vector<std::shared_ptr<LogicalCollection>> _vertexCollections;
  std::vector<std::shared_ptr<LogicalCollection>> _edgeCollections;
  std::vector<ServerID> _dbServers;
  // initialized on startup
-  std::unique_ptr<AggregatorUsage> _aggregatorUsage;
+  std::unique_ptr<AggregatorHandler> _aggregators;
  std::unique_ptr<MasterContext> _masterContext;
-
+  /// some tracking info
  double _startTimeSecs = 0, _endTimeSecs = 0;
  uint64_t _globalSuperstep = 0;
  /// tracks the servers which responded, only used for stages where we expect an
  /// unique response, not necessarily during the async mode
  std::set<ServerID> _respondedServers;
-  
+  bool _asyncMode = false;
  /// persistent tracking of active vertices, send messages, runtimes
  WorkerStats _workerStats;
  bool _startGlobalStep();
  int _initializeWorkers(std::string const& suffix, VPackSlice additional);
  int _finalizeWorkers();
  int _sendToAllDBServers(std::string const& suffix, VPackSlice const& message);
-  void _ensureCorrectness(VPackSlice body);
+  void _ensureUniqueResponse(VPackSlice body);
  // === REST callbacks ===
  void finishedWorkerStartup(VPackSlice& data);
-  void finishedGlobalStep(VPackSlice& data);
+  void finishedWorkerStep(VPackSlice& data);
  void finishedRecovery(VPackSlice& data);
-  
+
 public:
-  Conductor(uint64_t executionNumber, TRI_vocbase_t* vocbase,
+  Conductor(
-            std::vector<std::shared_ptr<LogicalCollection>> const& vertexCollections,
+      uint64_t executionNumber, TRI_vocbase_t* vocbase,
-            std::vector<std::shared_ptr<LogicalCollection>> const& edgeCollections);
+      std::vector<std::shared_ptr<LogicalCollection>> const& vertexCollections,
      std::vector<std::shared_ptr<LogicalCollection>> const& edgeCollections);
  ~Conductor();
  void start(std::string const& algoName, VPackSlice userConfig);
@ -91,10 +97,11 @@ class Conductor {
  void startRecovery();
  ExecutionState getState() const { return _state; }
-  WorkerStats workerStats() const {return _workerStats;}
+  WorkerStats workerStats() const { return _workerStats; }
-  uint64_t globalSuperstep() const {return _globalSuperstep;}
+  uint64_t globalSuperstep() const { return _globalSuperstep; }
  double totalRuntimeSecs() {
-    return _endTimeSecs == 0 ? TRI_microtime() - _startTimeSecs : _endTimeSecs - _startTimeSecs;
+    return _endTimeSecs == 0 ? TRI_microtime() - _startTimeSecs
                             : _endTimeSecs - _startTimeSecs;
  }
 };
 }
--- a/arangod/Pregel/Graph.h
+++ b/arangod/Pregel/Graph.h
@ -27,7 +27,7 @@ namespace arangodb {
 namespace pregel {
 typedef uint16_t prgl_shard_t;
-  
+
 /// @brief header entry for the edge file
 template <typename E>
 class Edge {
@ -35,15 +35,11 @@ class Edge {
  prgl_shard_t _targetShard;
  std::string _toKey;
  E _data;
-  
+
 public:
  // EdgeEntry() : _nextEntryOffset(0), _dataSize(0), _vertexIDSize(0) {}
-  Edge(prgl_shard_t source,
+  Edge(prgl_shard_t source, prgl_shard_t target, std::string const& key)
-       prgl_shard_t target,
+      : _sourceShard(source), _targetShard(target), _toKey(key) {}
       std::string const& key)
  : _sourceShard(source),
    _targetShard(target),
    _toKey(key) {}
  // size_t getSize() { return sizeof(EdgeEntry) + _vertexIDSize + _dataSize; }
  std::string const& toKey() const { return _toKey; }
@ -51,19 +47,15 @@ class Edge {
  inline E* data() {
    return &_data;  // static_cast<E>(this + sizeof(EdgeEntry) + _vertexIDSize);
  }
-  inline prgl_shard_t sourceShard() const {
+  inline prgl_shard_t sourceShard() const { return _sourceShard; }
-    return _sourceShard;
+  inline prgl_shard_t targetShard() const { return _targetShard; }
  }
  inline prgl_shard_t targetShard() const {
    return _targetShard;
  }
 };
 class VertexEntry {
  template <typename V, typename E>
  friend class GraphStore;
-  const prgl_shard_t _shard;// TODO optimize and remove
+  const prgl_shard_t _shard;  // TODO optimize and remove
  const std::string _key;
  size_t _vertexDataOffset;  // size_t vertexID length
  size_t _edgeDataOffset;
@ -85,14 +77,14 @@ class VertexEntry {
  inline size_t getSize() { return sizeof(VertexEntry); }
  inline bool active() const { return _active; }
  inline void setActive(bool bb) { _active = bb; }
-  
+
-  inline prgl_shard_t shard() const {return _shard;}
+  inline prgl_shard_t shard() const { return _shard; }
  inline std::string const& key() const { return _key; };
  /*std::string const& key() const {
    return std::string(_key, _keySize);
  };*/
 };
-  
+
 // unused right now
 /*class LinkedListIterator {
 private:
--- a/arangod/Pregel/GraphFormat.h
+++ b/arangod/Pregel/GraphFormat.h
@ -70,7 +70,7 @@ class IntegerGraphFormat : public GraphFormat<int64_t, int64_t> {
  int64_t readVertexData(const void* ptr) override { return *((int64_t*)ptr); }
  int64_t readEdgeData(const void* ptr) override { return *((int64_t*)ptr); }
-  size_t copyVertexData(arangodb::velocypack::Slice document,  void* targetPtr,
+  size_t copyVertexData(arangodb::velocypack::Slice document, void* targetPtr,
                        size_t maxSize) override {
    arangodb::velocypack::Slice val = document.get(_sourceField);
    *((int64_t*)targetPtr) = val.isInteger() ? val.getInt() : _vDefault;
@ -84,19 +84,19 @@ class IntegerGraphFormat : public GraphFormat<int64_t, int64_t> {
    return sizeof(int64_t);
  }
-  void buildVertexDocument(arangodb::velocypack::Builder& b, const void* targetPtr,
+  void buildVertexDocument(arangodb::velocypack::Builder& b,
-                           size_t size) override {
+                           const void* targetPtr, size_t size) override {
    b.add(_resultField, VPackValue(readVertexData(targetPtr)));
  }
-  void buildEdgeDocument(arangodb::velocypack::Builder& b, const void* targetPtr,
+  void buildEdgeDocument(arangodb::velocypack::Builder& b,
-                         size_t size) override {
+                         const void* targetPtr, size_t size) override {
    b.add(_resultField, VPackValue(readEdgeData(targetPtr)));
  }
 };
 class FloatGraphFormat : public GraphFormat<float, float> {
-protected:
+ protected:
  const std::string _sourceField, _resultField;
  const float _vDefault, _eDefault;
@ -125,13 +125,13 @@ protected:
    return sizeof(float);
  }
-  void buildVertexDocument(arangodb::velocypack::Builder& b, const void* targetPtr,
+  void buildVertexDocument(arangodb::velocypack::Builder& b,
-                           size_t size) override {
+                           const void* targetPtr, size_t size) override {
    b.add(_resultField, VPackValue(readVertexData(targetPtr)));
  }
-  void buildEdgeDocument(arangodb::velocypack::Builder& b, const void* targetPtr,
+  void buildEdgeDocument(arangodb::velocypack::Builder& b,
-                         size_t size) override {
+                         const void* targetPtr, size_t size) override {
    b.add(_resultField, VPackValue(readEdgeData(targetPtr)));
  }
 };
@ -145,7 +145,7 @@ protected:
  const std::string _sourceField, _resultField;
  const V _vDefault;
  const E _eDefault;
-  
+
 public:
  NumberGraphFormat(std::string const& source, std::string const& result,
                   V vertexNull, E edgeNull)
@ -153,10 +153,10 @@ public:
  _resultField(result),
  _vDefault(vertexNull),
  _eDefault(edgeNull) {}
-  
+
  V readVertexData(void* ptr) override { return *((V*)ptr); }
  E readEdgeData(void* ptr) override { return *((E*)ptr); }
-  
+
  size_t copyVertexData(arangodb::velocypack::Slice document, void* targetPtr,
                        size_t maxSize) override {
    arangodb::velocypack::Slice val = document.get(_sourceField);
@ -171,7 +171,7 @@ public:
    }
    return sizeof(V);
  }
-  
+
  size_t copyEdgeData(arangodb::velocypack::Slice document, void* targetPtr,
                      size_t maxSize) override {
    arangodb::velocypack::Slice val = document.get(_sourceField);
@ -186,13 +186,15 @@ public:
    }
    return sizeof(E);
  }
-  
+
-  void buildVertexDocument(arangodb::velocypack::Builder& b, const void* targetPtr,
+  void buildVertexDocument(arangodb::velocypack::Builder& b, const void*
 targetPtr,
                           size_t size) override {
    b.add(_resultField, VPackValue(readVertexData(targetPtr)));
  }
-  
+
-  void buildEdgeDocument(arangodb::velocypack::Builder& b, const void* targetPtr,
+  void buildEdgeDocument(arangodb::velocypack::Builder& b, const void*
 targetPtr,
                         size_t size) override {
    b.add(_resultField, VPackValue(readEdgeData(targetPtr)));
  }
--- a/arangod/Pregel/GraphStore.cpp
+++ b/arangod/Pregel/GraphStore.cpp
@ -43,16 +43,8 @@ using namespace arangodb;
 using namespace arangodb::pregel;
 template <typename V, typename E>
-GraphStore<V, E>::GraphStore(TRI_vocbase_t* vb, WorkerState const& state,
+GraphStore<V, E>::GraphStore(TRI_vocbase_t* vb, GraphFormat<V, E>* graphFormat)
-                             GraphFormat<V, E>* graphFormat)
+    : _vocbaseGuard(vb), _graphFormat(graphFormat) {}
    : _vocbaseGuard(vb), _graphFormat(graphFormat) {
  //  _edgeCollection = ClusterInfo::instance()->getCollection(
  //      vb->name(), state->edgeCollectionPlanId());
  loadShards(state);
  LOG(INFO) << "Loaded " << _index.size() << "vertices and " << _edges.size()
            << " edges";
 }
 template <typename V, typename E>
 GraphStore<V, E>::~GraphStore() {
@ -61,22 +53,7 @@ GraphStore<V, E>::~GraphStore() {
 template <typename V, typename E>
 void GraphStore<V, E>::loadShards(WorkerState const& state) {
-  std::vector<std::string> readColls, writeColls;
+  _createReadTransaction(state);
  for (auto shard : state.localVertexShardIDs()) {
    readColls.push_back(shard);
  }
  for (auto shard : state.localEdgeShardIDs()) {
    readColls.push_back(shard);
  }
  double lockTimeout =
      (double)(TRI_TRANSACTION_DEFAULT_LOCK_TIMEOUT / 1000000ULL);
  _transaction = new ExplicitTransaction(
      StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()), readColls,
      writeColls, lockTimeout, false, false);
  int res = _transaction->begin();
  if (res != TRI_ERROR_NO_ERROR) {
    THROW_ARANGO_EXCEPTION(res);
  }
  std::map<CollectionID, std::vector<ShardID>> const& vertexMap =
      state.vertexCollectionShards();
@ -108,6 +85,50 @@ void GraphStore<V, E>::loadShards(WorkerState const& state) {
  _cleanupTransactions();
 }
 template <typename V, typename E>
 void GraphStore<V, E>::loadDocument(WorkerState const& state,
                                    ShardID const& shard,
                                    std::string const& _key) {
  /*if (_readTrx == nullptr) {
   _createReadTransaction(state);
  }
  prgl_shard_t sourceShard = (prgl_shard_t)state.shardId(shard);
  bool storeData = _graphFormat->storesVertexData();
  VPackBuilder builder;
  builder.openObject();
  builder.add(StaticStrings::KeyString, VPackValue(_key));
  builder.close();
  OperationOptions options;
  options.ignoreRevs = false;
  TRI_voc_cid_t cid = _readTrx->addCollectionAtRuntime(shard);
  _readTrx->orderDitch(cid);  // will throw when it fails
  OperationResult opResult = _readTrx->document(shard, builder.slice(),
  options);
  if (!opResult.successful()) {
    _cleanupTransactions();
    THROW_ARANGO_EXCEPTION(opResult.code);
  }
  VertexEntry entry(sourceShard, _key);
  if (storeData) {
    V vertexData;
    size_t size =
    _graphFormat->copyVertexData(opResult.slice(), &vertexData, sizeof(V));
    if (size > 0) {
      entry._vertexDataOffset = _vertexData.size();
      _vertexData.push_back(vertexData);
    }
  }
  std::string documentId = _readTrx->extractIdString(opResult.slice());
  _loadEdges(state, edgeShard, entry, documentId);
  _index.push_back(entry);*/
 }
 template <typename V, typename E>
 RangeIterator<VertexEntry> GraphStore<V, E>::vertexIterator() {
  return vertexIterator(0, _index.size());
@ -144,16 +165,36 @@ RangeIterator<Edge<E>> GraphStore<V, E>::edgeIterator(
  return RangeIterator<Edge<E>>(_edges, entry->_edgeDataOffset, end);
 }
 template <typename V, typename E>
 void GraphStore<V, E>::_createReadTransaction(WorkerState const& state) {
  std::vector<std::string> readColls, writeColls;
  for (auto shard : state.localVertexShardIDs()) {
    readColls.push_back(shard);
  }
  for (auto shard : state.localEdgeShardIDs()) {
    readColls.push_back(shard);
  }
  double lockTimeout =
      (double)(TRI_TRANSACTION_DEFAULT_LOCK_TIMEOUT / 1000000ULL);
  _readTrx = new ExplicitTransaction(
      StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()), readColls,
      writeColls, lockTimeout, false, false);
  int res = _readTrx->begin();
  if (res != TRI_ERROR_NO_ERROR) {
    THROW_ARANGO_EXCEPTION(res);
  }
 }
 template <typename V, typename E>
 void GraphStore<V, E>::_cleanupTransactions() {
-  if (_transaction) {
+  if (_readTrx) {
-    if (_transaction->getStatus() == TRI_TRANSACTION_RUNNING) {
+    if (_readTrx->getStatus() == TRI_TRANSACTION_RUNNING) {
-      if (_transaction->commit() != TRI_ERROR_NO_ERROR) {
+      if (_readTrx->commit() != TRI_ERROR_NO_ERROR) {
        LOG(WARN) << "Pregel worker: Failed to commit on a read transaction";
      }
    }
-    delete _transaction;
+    delete _readTrx;
-    _transaction = nullptr;
+    _readTrx = nullptr;
  }
 }
@ -164,18 +205,18 @@ void GraphStore<V, E>::_loadVertices(WorkerState const& state,
  //_graphFormat->willUseCollection(vocbase, vertexShard, false);
  bool storeData = _graphFormat->storesVertexData();
-  TRI_voc_cid_t cid = _transaction->addCollectionAtRuntime(vertexShard);
+  TRI_voc_cid_t cid = _readTrx->addCollectionAtRuntime(vertexShard);
-  _transaction->orderDitch(cid);  // will throw when it fails
+  _readTrx->orderDitch(cid);  // will throw when it fails
  prgl_shard_t sourceShard = (prgl_shard_t)state.shardId(vertexShard);
-  /*int res = _transaction->lockRead();
+  /*int res = _readTrx->lockRead();
  if (res != TRI_ERROR_NO_ERROR) {
    THROW_ARANGO_EXCEPTION_FORMAT(res, "while looking up vertices '%s'",
                                  vertexShard.c_str());
  }*/
-  ManagedDocumentResult mmdr(_transaction);
+  ManagedDocumentResult mmdr(_readTrx);
-  std::unique_ptr<OperationCursor> cursor = _transaction->indexScan(
+  std::unique_ptr<OperationCursor> cursor = _readTrx->indexScan(
      vertexShard, Transaction::CursorType::ALL, Transaction::IndexHandle(), {},
      &mmdr, 0, UINT64_MAX, 1000, false);
@ -192,13 +233,13 @@ void GraphStore<V, E>::_loadVertices(WorkerState const& state,
    cursor->getMoreMptr(result, 1000);
    for (auto const& element : result) {
      TRI_voc_rid_t revisionId = element.revisionId();
-      if (collection->readRevision(_transaction, mmdr, revisionId)) {
+      if (collection->readRevision(_readTrx, mmdr, revisionId)) {
        VPackSlice document(mmdr.vpack());
        if (document.isExternal()) {
          document = document.resolveExternal();
        }
-        //LOG(INFO) << "Loaded Vertex: " << document.toJson();
+        // LOG(INFO) << "Loaded Vertex: " << document.toJson();
        std::string key = document.get(StaticStrings::KeyString).copyString();
        VertexEntry entry(sourceShard, key);
@ -214,7 +255,7 @@ void GraphStore<V, E>::_loadVertices(WorkerState const& state,
          }
        }
-        std::string documentId = _transaction->extractIdString(document);
+        std::string documentId = _readTrx->extractIdString(document);
        _loadEdges(state, edgeShard, entry, documentId);
        _index.push_back(entry);
      }
@ -230,10 +271,10 @@ void GraphStore<V, E>::_loadEdges(WorkerState const& state,
  const bool storeData = _graphFormat->storesEdgeData();
  // Transaction* trx = readTransaction(shard);
-  traverser::EdgeCollectionInfo info(_transaction, edgeShard, TRI_EDGE_OUT,
+  traverser::EdgeCollectionInfo info(_readTrx, edgeShard, TRI_EDGE_OUT,
                                     StaticStrings::FromString, 0);
-  ManagedDocumentResult mmdr(_transaction);
+  ManagedDocumentResult mmdr(_readTrx);
  auto cursor = info.getEdges(documentID, &mmdr);
  if (cursor->failed()) {
    THROW_ARANGO_EXCEPTION_FORMAT(cursor->code,
@ -253,7 +294,7 @@ void GraphStore<V, E>::_loadEdges(WorkerState const& state,
    cursor->getMoreMptr(result, 1000);
    for (auto const& element : result) {
      TRI_voc_rid_t revisionId = element.revisionId();
-      if (collection->readRevision(_transaction, mmdr, revisionId)) {
+      if (collection->readRevision(_readTrx, mmdr, revisionId)) {
        VPackSlice document(mmdr.vpack());
        if (document.isExternal()) {
          document = document.resolveExternal();
@ -262,7 +303,7 @@ void GraphStore<V, E>::_loadEdges(WorkerState const& state,
        // ====== actual loading ======
        vertexEntry._edgeCount += 1;
-        //LOG(INFO) << "Loaded Edge: " << document.toJson();
+        // LOG(INFO) << "Loaded Edge: " << document.toJson();
        std::string toValue =
            document.get(StaticStrings::ToString).copyString();
@ -301,28 +342,6 @@ void GraphStore<V, E>::_loadEdges(WorkerState const& state,
  }*/
 }
 /*template <typename V, typename E>
 SingleCollectionTransaction* GraphStore<V, E>::writeTransaction(ShardID const&
 shard) {
  auto it = _transactions.find(shard);
  if (it != _transactions.end()) {
    return it->second;
  } else {
    auto trx = std::make_unique<SingleCollectionTransaction>(
                                                             StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()),
                                                             shard,
                                                             TRI_TRANSACTION_WRITE);
    int res = trx->begin();
    if (res != TRI_ERROR_NO_ERROR) {
      THROW_ARANGO_EXCEPTION_FORMAT(res, "during transaction of shard '%s'",
                                    shard.c_str());
    }
    _transactions[shard] = trx.get();
    return trx.release();
  }
 }*/
 template <typename V, typename E>
 void GraphStore<V, E>::storeResults(WorkerState const& state) {
  std::vector<std::string> readColls, writeColls;
@ -337,10 +356,10 @@ void GraphStore<V, E>::storeResults(WorkerState const& state) {
  //}
  double lockTimeout =
      (double)(TRI_TRANSACTION_DEFAULT_LOCK_TIMEOUT / 1000000ULL);
-  _transaction = new ExplicitTransaction(
+  ExplicitTransaction writeTrx(
      StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()), readColls,
      writeColls, lockTimeout, false, false);
-  int res = _transaction->begin();
+  int res = writeTrx.begin();
  if (res != TRI_ERROR_NO_ERROR) {
    THROW_ARANGO_EXCEPTION(res);
  }
@ -356,14 +375,17 @@ void GraphStore<V, E>::storeResults(WorkerState const& state) {
    _graphFormat->buildVertexDocument(b, data, sizeof(V));
    b.close();
-    OperationResult result = _transaction->update(shard, b.slice(), options);
+    OperationResult result = writeTrx.update(shard, b.slice(), options);
    if (result.code != TRI_ERROR_NO_ERROR) {
      THROW_ARANGO_EXCEPTION(result.code);
    }
    // TODO loop over edges
  }
-  _cleanupTransactions();
+  res = writeTrx.finish(res);
  if (res != TRI_ERROR_NO_ERROR) {
    THROW_ARANGO_EXCEPTION(res);
  }
 }
 template class arangodb::pregel::GraphStore<int64_t, int64_t>;
--- a/arangod/Pregel/GraphStore.h
+++ b/arangod/Pregel/GraphStore.h
@ -37,7 +37,7 @@ namespace arangodb {
 class Transaction;
 class LogicalCollection;
 namespace pregel {
-  
+
 class WorkerState;
 template <typename V, typename E>
 struct GraphFormat;
@ -47,11 +47,10 @@ struct GraphFormat;
 ////////////////////////////////////////////////////////////////////////////////
 template <typename V, typename E>
 class GraphStore {
  VocbaseGuard _vocbaseGuard;
  const std::unique_ptr<GraphFormat<V, E>> _graphFormat;
-  Transaction *_transaction;// temporary transaction
+  Transaction* _readTrx;  // temporary transaction
-  
+
  // int _indexFd, _vertexFd, _edgeFd;
  // void *_indexMapping, *_vertexMapping, *_edgeMapping;
  // size_t _indexSize, _vertexSize, _edgeSize;
@ -65,25 +64,23 @@ class GraphStore {
  std::set<ShardID> _loadedShards;
  size_t _localVerticeCount;
  size_t _localEdgeCount;
-  
+
  void _createReadTransaction(WorkerState const& state);
  void _cleanupTransactions();
-  void _loadVertices(WorkerState const& state,
+  void _loadVertices(WorkerState const& state, ShardID const& vertexShard,
                     ShardID const& vertexShard,
                     ShardID const& edgeShard);
-  void _loadEdges(WorkerState const& state,
+  void _loadEdges(WorkerState const& state, ShardID const& shard,
-                  ShardID const& shard,
+                  VertexEntry& vertexEntry, std::string const& documentID);
                  VertexEntry& vertexEntry,
                  std::string const& documentID);
 public:
-  GraphStore(TRI_vocbase_t* vocbase, WorkerState const& state,
+  GraphStore(TRI_vocbase_t* vocbase, GraphFormat<V, E>* graphFormat);
             GraphFormat<V, E>* graphFormat);
  ~GraphStore();
  void loadShards(WorkerState const& state);
-  inline size_t vertexCount() {
+  void loadDocument(WorkerState const& state, ShardID const& shard,
-    return _index.size();
+                    std::string const& _key);
-  }
+
  inline size_t vertexCount() { return _index.size(); }
  RangeIterator<VertexEntry> vertexIterator();
  RangeIterator<VertexEntry> vertexIterator(size_t start, size_t count);
  RangeIterator<Edge<E>> edgeIterator(VertexEntry const* entry);
@ -91,7 +88,7 @@ class GraphStore {
  void* mutableVertexData(VertexEntry const* entry);
  V copyVertexData(VertexEntry const* entry);
  void replaceVertexData(VertexEntry const* entry, void* data, size_t size);
-  
+
  /// Write results to database
  void storeResults(WorkerState const& state);
 };
--- a/arangod/Pregel/IncomingCache.cpp
+++ b/arangod/Pregel/IncomingCache.cpp
@ -30,6 +30,9 @@
 #include <velocypack/Iterator.h>
 #include <velocypack/velocypack-aliases.h>
 //#include <libcuckoo/city_hasher.hh>
 //#include <libcuckoo/cuckoohash_map.hh>
 using namespace arangodb;
 using namespace arangodb::pregel;
@ -87,7 +90,7 @@ void ArrayInCache<M>::mergeCache(InCache<M> const* otherCache) {
  // cannot call setDirect since it locks
  for (auto const& pair : other->_shardMap) {
-    HMap &vertexMap = _shardMap[pair.first];
+    HMap& vertexMap = _shardMap[pair.first];
    for (auto& vertexMessage : pair.second) {
      std::vector<M>& a = vertexMap[vertexMessage.first];
      std::vector<M> const& b = vertexMessage.second;
@ -119,7 +122,6 @@ void ArrayInCache<M>::clear() {
  _shardMap.clear();
 }
 template <typename M>
 void ArrayInCache<M>::erase(prgl_shard_t shard, std::string const& key) {
  MUTEX_LOCKER(guard, this->_writeLock);
@ -134,6 +136,19 @@ void CombiningInCache<M>::setDirect(prgl_shard_t shard, std::string const& key,
                                    M const& newValue) {
  MUTEX_LOCKER(guard, this->_writeLock);
  /*cuckoohash_map<int, std::string, CityHasher<int>> Table;
  for (int i = 0; i < 100; i++) {
    Table[i] = "hello"+std::to_string(i);
  }
  for (int i = 0; i < 101; i++) {
    std::string out;
    if (Table.find(i, out)) {
      LOG(INFO) << i << "  " << out;
    } else {
      LOG(INFO) << i << "  NOT FOUND";
    }
  }*/
  this->_receivedMessageCount++;
  HMap& vertexMap = _shardMap[shard];
  auto vmsg = vertexMap.find(key);
--- a/arangod/Pregel/IncomingCache.h
+++ b/arangod/Pregel/IncomingCache.h
@ -44,7 +44,6 @@ processing */
 template <typename M>
 class InCache {
 protected:
  mutable Mutex _writeLock;
  size_t _receivedMessageCount = 0;
  MessageFormat<M> const* _format;
@ -53,12 +52,12 @@ class InCache {
      : _receivedMessageCount(0), _format(format) {}
 public:
-  virtual ~InCache() {};
+  virtual ~InCache(){};
-  
+
-  MessageFormat<M> const* format() const {return _format;}
+  MessageFormat<M> const* format() const { return _format; }
  void parseMessages(VPackSlice messages);
  size_t receivedMessageCount() const { return _receivedMessageCount; }
-  
+
  /// @brief internal method to direclty set the messages for a vertex. Only
  /// valid with already combined messages
  virtual void setDirect(prgl_shard_t shard, std::string const& vertexId,
@ -66,7 +65,8 @@ class InCache {
  virtual void mergeCache(InCache<M> const* otherCache) = 0;
  /// @brief get messages for vertex id. (Don't use keys from _from or _to
  /// directly, they contain the collection name)
-  virtual MessageIterator<M> getMessages(prgl_shard_t shard, std::string const& key) = 0;
+  virtual MessageIterator<M> getMessages(prgl_shard_t shard,
                                         std::string const& key) = 0;
  /// clear cache
  virtual void clear() = 0;
  virtual void erase(prgl_shard_t shard, std::string const& key) = 0;
@ -83,7 +83,8 @@ class ArrayInCache : public InCache<M> {
  void setDirect(prgl_shard_t shard, std::string const& vertexId,
                 M const& data) override;
  void mergeCache(InCache<M> const* otherCache) override;
-  MessageIterator<M> getMessages(prgl_shard_t shard, std::string const& key) override;
+  MessageIterator<M> getMessages(prgl_shard_t shard,
                                 std::string const& key) override;
  void clear() override;
  void erase(prgl_shard_t shard, std::string const& key) override;
 };
@ -91,21 +92,22 @@ class ArrayInCache : public InCache<M> {
 template <typename M>
 class CombiningInCache : public InCache<M> {
  typedef std::unordered_map<std::string, M> HMap;
-  
+
  MessageCombiner<M> const* _combiner;
  std::map<prgl_shard_t, HMap> _shardMap;
 public:
  CombiningInCache(MessageFormat<M> const* format,
                   MessageCombiner<M> const* combiner)
-  : InCache<M>(format), _combiner(combiner) {}
+      : InCache<M>(format), _combiner(combiner) {}
-  
+
-  MessageCombiner<M> const* combiner() const {return _combiner;}
+  MessageCombiner<M> const* combiner() const { return _combiner; }
  void setDirect(prgl_shard_t shard, std::string const& vertexId,
                 M const& data) override;
  void mergeCache(InCache<M> const* otherCache) override;
-  MessageIterator<M> getMessages(prgl_shard_t shard, std::string const& key) override;
+  MessageIterator<M> getMessages(prgl_shard_t shard,
                                 std::string const& key) override;
  void clear() override;
  void erase(prgl_shard_t shard, std::string const& key) override;
 };
--- a/arangod/Pregel/Iterators.h
+++ b/arangod/Pregel/Iterators.h
@ -31,7 +31,7 @@ class MessageIterator {
  M const* _data;
  size_t _current = 0;
  const size_t _size = 1;
-  
+
 public:
  MessageIterator() : _data(nullptr), _current(0), _size(0) {}
@ -74,22 +74,21 @@ class MessageIterator {
  size_t size() const { return _size; }
 };
-  
+
 template <typename T>
 class RangeIterator {
-private:
+ private:
  // void *_begin, *_end, *_current;
  std::vector<T>& _vector;
  size_t _begin, _end, _current;
-  
+
-public:
+ public:
  typedef RangeIterator<T> iterator;
  typedef const RangeIterator<T> const_iterator;
-  
+
  RangeIterator(std::vector<T>& v, size_t begin, size_t end)
-  : _vector(v), _begin(begin), _end(end), _current(begin) {}
+      : _vector(v), _begin(begin), _end(end), _current(begin) {}
-  
+
  iterator begin() { return RangeIterator(_vector, _begin, _end); }
  const_iterator begin() const { return RangeIterator(_vector, _begin, _end); }
  iterator end() {
@ -102,31 +101,31 @@ public:
    it._current = it._end;
    return it;
  }
-  
+
  // prefix ++
  RangeIterator& operator++() {
    _current++;
    return *this;
  }
-  
+
  // postfix ++
  RangeIterator<T>& operator++(int) {
    RangeIterator<T> result(*this);
    ++(*this);
    return result;
  }
-  
+
  T* operator*() const {
    T* el = _vector.data();
    return _current != _end ? el + _current : nullptr;
  }
-  
+
  bool operator!=(RangeIterator<T> const& other) const {
    return _current != other._current;
  }
-  
+
  size_t size() const { return _end - _begin; }
-  
+
  /*EdgeIterator(void* beginPtr, void* endPtr)
   : _begin(beginPtr), _end(endPtr), _current(_begin) {}
   iterator begin() { return EdgeIterator(_begin, _end); }
@ -141,19 +140,18 @@ public:
   it._current = it._end;
   return it;
   }
-   
+
   // prefix ++
   EdgeIterator<E>& operator++() {
   EdgeEntry<E>* entry = static_cast<EdgeEntry<E>>(_current);
   _current += entry->getSize();
   return *this;
   }
-   
+
   EdgeEntry<E>* operator*() const {
   return _current != _end ? static_cast<EdgeEntry<E>>(_current) : nullptr;
   }*/
 };
 }
 }
 #endif
--- a/arangod/Pregel/MasterContext.h
+++ b/arangod/Pregel/MasterContext.h
@ -25,9 +25,9 @@
 #include <velocypack/Slice.h>
 #include <velocypack/velocypack-aliases.h>
 #include "AggregatorUsage.h"
 #include "Basics/Common.h"
-#include "Utils.h"
+#include "Pregel/AggregatorHandler.h"
 #include "Pregel/Utils.h"
 namespace arangodb {
 namespace pregel {
@ -36,7 +36,7 @@ class MasterContext {
  friend class Conductor;
  uint64_t _vertexCount, _edgeCount;
-  AggregatorUsage* _aggregators;
+  AggregatorHandler* _aggregators;
 protected:
  template <typename T>
@ -50,21 +50,21 @@ class MasterContext {
  }
  virtual void preApplication(){};
-  
+
  /// @brief called before supersteps
  /// @return true to continue the computation
-  virtual bool preGlobalSuperstep(uint64_t gss) {return true;};
+  virtual bool preGlobalSuperstep(uint64_t gss) { return true; };
  /// @brief called after supersteps
  /// @return true to continue the computation
-  virtual bool postGlobalSuperstep(uint64_t gss) {return true;};
+  virtual bool postGlobalSuperstep(uint64_t gss) { return true; };
  virtual void postApplication(){};
  /// should indicate if compensation is supposed to start by returning true
-  virtual bool preCompensation(uint64_t gss) {return true;}
+  virtual bool preCompensation(uint64_t gss) { return true; }
  /// should indicate if compensation is finished, by returning false.
  /// otherwise workers will be called again with the aggregated values
-  virtual bool postCompensation(uint64_t gss) {return false;}
+  virtual bool postCompensation(uint64_t gss) { return false; }
-  
+
 public:
  MasterContext(VPackSlice params){};
--- a/arangod/Pregel/MessageCombiner.h
+++ b/arangod/Pregel/MessageCombiner.h
@ -36,8 +36,7 @@ struct MessageCombiner {
 struct IntegerMinCombiner : public MessageCombiner<int64_t> {
  IntegerMinCombiner() {}
-  void combine(int64_t& firstValue,
+  void combine(int64_t& firstValue, int64_t const& secondValue) const override {
                  int64_t const& secondValue) const override {
    if (firstValue > secondValue) {
      firstValue = secondValue;
    }
--- a/arangod/Pregel/MessageFormat.h
+++ b/arangod/Pregel/MessageFormat.h
@ -38,7 +38,7 @@ struct MessageFormat {
  virtual void unwrapValue(VPackSlice body, M& value) const = 0;
  virtual void addValue(VPackBuilder& arrayBuilder, M const& val) const = 0;
 };
-  
+
 struct IntegerMessageFormat : public MessageFormat<int64_t> {
  IntegerMessageFormat() {}
  void unwrapValue(VPackSlice s, int64_t& value) const override {
@ -58,7 +58,7 @@ struct FloatMessageFormat : public MessageFormat<float> {
    arrayBuilder.add(VPackValue(val));
  }
 };
-  
+
 /*
 template <typename M>
 struct NumberMessageFormat : public MessageFormat<M> {
--- a/arangod/Pregel/OutgoingCache.cpp
+++ b/arangod/Pregel/OutgoingCache.cpp
@ -60,7 +60,7 @@ void ArrayOutCache<M>::appendMessage(prgl_shard_t shard, std::string const& key,
                                     M const& data) {
  if (this->_state->isLocalVertexShard(shard)) {
    this->_localCache->setDirect(shard, key, data);
-    //LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data;
+    // LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data;
    this->_sendMessages++;
  } else {
    _shardMap[shard][key].push_back(data);
@ -149,17 +149,17 @@ void CombiningOutCache<M>::appendMessage(prgl_shard_t shard,
                                         M const& data) {
  if (this->_state->isLocalVertexShard(shard)) {
    this->_localCache->setDirect(shard, key, data);
-    //LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data;
+    // LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data;
    this->_sendMessages++;
  } else {
    std::unordered_map<std::string, M>& vertexMap = _shardMap[shard];
    auto it = vertexMap.find(key);
    if (it != vertexMap.end()) {  // more than one message
-     _combiner->combine(vertexMap[key], data);
+      _combiner->combine(vertexMap[key], data);
    } else {  // first message for this vertex
      vertexMap.emplace(key, data);
    }
-    
+
    if (this->_containedMessages++ > this->_batchSize) {
      flushMessages();
    }
@ -181,7 +181,7 @@ void CombiningOutCache<M>::flushMessages() {
    VPackOptions options = VPackOptions::Defaults;
    options.buildUnindexedArrays = true;
    options.buildUnindexedObjects = true;
-    
+
    VPackBuilder package(&options);
    package.openObject();
    package.add(Utils::messagesKey, VPackValue(VPackValueType::Array));
--- a/arangod/Pregel/OutgoingCache.h
+++ b/arangod/Pregel/OutgoingCache.h
@ -27,10 +27,10 @@
 #include "Cluster/ClusterInfo.h"
 #include "VocBase/voc-types.h"
 #include "Pregel/GraphStore.h"
 #include "Pregel/MessageCombiner.h"
 #include "Pregel/MessageFormat.h"
 #include "Pregel/WorkerState.h"
 #include "Pregel/GraphStore.h"
 namespace arangodb {
 namespace pregel {
@ -45,66 +45,70 @@ class CombiningInCache;
 template <typename M>
 class ArrayInCache;
-  
+
 template <typename M>
 class OutCache {
-protected:
+ protected:
  WorkerState const* _state;
  MessageFormat<M> const* _format;
  InCache<M>* _localCache;
  std::string _baseUrl;
  uint32_t _batchSize = 1000;
-  
+
  /// @brief current number of vertices stored
  size_t _containedMessages = 0;
  size_t _sendMessages = 0;
  bool shouldFlushCache();
-  
+
 public:
  OutCache(WorkerState* state, InCache<M>* cache);
-  virtual ~OutCache() {};
+  virtual ~OutCache(){};
-  
+
  size_t sendMessageCount() const { return _sendMessages; }
-  uint32_t batchSize() const {return _batchSize;}
+  uint32_t batchSize() const { return _batchSize; }
-  void setBatchSize(uint32_t bs) {_batchSize = bs;}
+  void setBatchSize(uint32_t bs) { _batchSize = bs; }
  virtual void clear() = 0;
-  virtual void appendMessage(prgl_shard_t shard, std::string const& key, M const& data) = 0;
+  virtual void appendMessage(prgl_shard_t shard, std::string const& key,
                             M const& data) = 0;
  virtual void flushMessages() = 0;
 };
-  
+
 template <typename M>
 class ArrayOutCache : public OutCache<M> {
  /// @brief two stage map: shard -> vertice -> message
  std::unordered_map<prgl_shard_t,
-  std::unordered_map<std::string, std::vector<M>>> _shardMap;
+                     std::unordered_map<std::string, std::vector<M>>>
-  
+      _shardMap;
-public:
+
 public:
  ArrayOutCache(WorkerState* state, InCache<M>* cache)
-  : OutCache<M>(state, cache) {}
+      : OutCache<M>(state, cache) {}
  ~ArrayOutCache();
-  
+
  void clear() override;
-  void appendMessage(prgl_shard_t shard, std::string const& key, M const& data) override;
+  void appendMessage(prgl_shard_t shard, std::string const& key,
                     M const& data) override;
  void flushMessages() override;
 };
-  
+
 template <typename M>
 class CombiningOutCache : public OutCache<M> {
  MessageCombiner<M> const* _combiner;
-  
+
  /// @brief two stage map: shard -> vertice -> message
-  std::unordered_map<prgl_shard_t, std::unordered_map<std::string, M>> _shardMap;
+  std::unordered_map<prgl_shard_t, std::unordered_map<std::string, M>>
-  
+      _shardMap;
-public:
+
 public:
  CombiningOutCache(WorkerState* state, CombiningInCache<M>* cache);
  ~CombiningOutCache();
-  
+
  void clear() override;
-  void appendMessage(prgl_shard_t shard, std::string const& key, M const& data) override;
+  void appendMessage(prgl_shard_t shard, std::string const& key,
                     M const& data) override;
  void flushMessages() override;
 };
 }
 }
 #endif
--- a/arangod/Pregel/PregelFeature.h
+++ b/arangod/Pregel/PregelFeature.h
@ -57,10 +57,10 @@ class PregelFeature final : public application_features::ApplicationFeature {
  void cleanup(uint64_t executionNumber);
  void cleanupAll();
-  
+
-  basics::ThreadPool* threadPool() {return _threadPool.get();}
+  basics::ThreadPool* threadPool() { return _threadPool.get(); }
-  RecoveryManager* recoveryManager() {return _recoveryManager.get();}
+  RecoveryManager* recoveryManager() { return _recoveryManager.get(); }
-  
+
 private:
  Mutex _mutex;
  std::unique_ptr<RecoveryManager> _recoveryManager;
--- a/arangod/Pregel/Recovery.cpp
+++ b/arangod/Pregel/Recovery.cpp
@ -88,27 +88,26 @@ void RecoveryManager::_monitorShard(CollectionID const& cid,
                                    ShardID const& shard) {
  std::function<bool(VPackSlice const& result)> listener =
      [this, shard](VPackSlice const& result) {
-        MUTEX_LOCKER(guard, _lock);// we are editing _primaryServers
+        MUTEX_LOCKER(guard, _lock);  // we are editing _primaryServers
-        
+
        auto const& conductors = _listeners.find(shard);
        if (conductors == _listeners.end()) {
          return false;
        }
        if (result.isArray()) {
          if (result.length() > 0) {
            ServerID nextPrimary = result.at(0).copyString();
            auto const& currentPrimary = _primaryServers.find(shard);
-            if (currentPrimary != _primaryServers.end()
+            if (currentPrimary != _primaryServers.end() &&
-                && currentPrimary->second != nextPrimary) {
+                currentPrimary->second != nextPrimary) {
              _primaryServers[shard] = nextPrimary;
-              for (Conductor *cc : conductors->second) {
+              for (Conductor* cc : conductors->second) {
                cc->startRecovery();
              }
            }
          } else {
-            for (Conductor *cc : conductors->second) {
+            for (Conductor* cc : conductors->second) {
              cc->cancel();
            }
          }
--- a/arangod/Pregel/Recovery.h
+++ b/arangod/Pregel/Recovery.h
@ -23,56 +23,57 @@
 #ifndef ARANGODB_PREGEL_RECOVERY_H
 #define ARANGODB_PREGEL_RECOVERY_H 1
 #include "Basics/Mutex.h"
 #include "Cluster/ClusterInfo.h"
 #include "Agency/AgencyComm.h"
 #include "Agency/AgencyCallbackRegistry.h"
 #include <velocypack/velocypack-aliases.h>
 #include <velocypack/vpack.h>
-
+#include "Agency/AgencyCallbackRegistry.h"
 #include "Agency/AgencyComm.h"
 #include "Basics/Mutex.h"
 #include "Cluster/ClusterInfo.h"
 namespace arangodb {
 namespace pregel {
-template<typename V, typename E>
+template <typename V, typename E>
 class GraphStore;
 class Conductor;
-  
+
 class RecoveryManager {
  Mutex _lock;
  AgencyComm _agency;
-  AgencyCallbackRegistry *_agencyCallbackRegistry;//weak
+  AgencyCallbackRegistry* _agencyCallbackRegistry;  // weak
-  
+
  std::map<ShardID, std::set<Conductor*>> _listeners;
  std::map<ShardID, ServerID> _primaryServers;
  std::map<ShardID, std::shared_ptr<AgencyCallback>> _agencyCallbacks;
-  
+
  void _monitorShard(CollectionID const& cid, ShardID const& shard);
-  
+
 public:
-  RecoveryManager(AgencyCallbackRegistry *registry);
+  RecoveryManager(AgencyCallbackRegistry* registry);
  ~RecoveryManager();
-  void monitorCollections(std::vector<std::shared_ptr<LogicalCollection>> const& collections, Conductor*);
+  void monitorCollections(
      std::vector<std::shared_ptr<LogicalCollection>> const& collections,
      Conductor*);
  void stopMonitoring(Conductor*);
-  int filterGoodServers(std::vector<ServerID> const& servers, std::vector<ServerID> &goodServers);
+  int filterGoodServers(std::vector<ServerID> const& servers,
-  //bool allServersAvailable(std::vector<ServerID> const& dbServers);
+                        std::vector<ServerID>& goodServers);
  // bool allServersAvailable(std::vector<ServerID> const& dbServers);
 };
-  
+
 class RecoveryWorker {
  friend class RestPregelHandler;
-  
+
  std::map<ShardID, ServerID> _secondaries;
-  ServerID const* secondaryForShard(ShardID const& shard) {return nullptr;}
+  ServerID const* secondaryForShard(ShardID const& shard) { return nullptr; }
-  
+
-  //receivedBackupData(VPackSlice slice);
+  // receivedBackupData(VPackSlice slice);
-  
+
-public:
+ public:
-  template<typename V, typename E>
+  template <typename V, typename E>
-  void replicateGraphData(GraphStore<V,E> *graphStore) {}
+  void replicateGraphData(GraphStore<V, E>* graphStore) {}
-  
+
-  void reloadPlanData() {_secondaries.clear();}
+  void reloadPlanData() { _secondaries.clear(); }
 };
 }
 }
--- a/arangod/Pregel/Statistics.h
+++ b/arangod/Pregel/Statistics.h
@ -23,23 +23,23 @@
 #ifndef ARANGODB_PREGEL_STATISTICS_H
 #define ARANGODB_PREGEL_STATISTICS_H 1
 #include <velocypack/Slice.h>
 #include <velocypack/Builder.h>
 #include <velocypack/Slice.h>
 #include <velocypack/velocypack-aliases.h>
 #include "Pregel/Utils.h"
 namespace arangodb {
 namespace pregel {
 struct WorkerStats {
  size_t activeCount = 0;
  size_t sendCount = 0;
  size_t receivedCount = 0;
-  double superstepRuntimeSecs= 0;
+  double superstepRuntimeSecs = 0;
  WorkerStats() {}
-  WorkerStats(size_t a, size_t s, size_t r) : activeCount(a), sendCount(s), receivedCount(r) {}
+  WorkerStats(size_t a, size_t s, size_t r)
      : activeCount(a), sendCount(s), receivedCount(r) {}
  void accumulate(WorkerStats const& other) {
    activeCount += other.activeCount;
@ -47,7 +47,7 @@ struct WorkerStats {
    receivedCount += other.receivedCount;
    superstepRuntimeSecs += other.superstepRuntimeSecs;
  }
-  
+
  void accumulate(VPackSlice statValues) {
    VPackSlice p = statValues.get(Utils::activeCountKey);
    if (p.isInteger()) {
@ -73,13 +73,17 @@ struct WorkerStats {
    b.add(Utils::receivedCountKey, VPackValue(receivedCount));
    b.add(Utils::superstepRuntimeKey, VPackValue(superstepRuntimeSecs));
  }
-  
+
  void reset() {
    activeCount = 0;
    sendCount = 0;
    receivedCount = 0;
    superstepRuntimeSecs = 0;
  }
  bool isDone() {
    return activeCount == 0 && sendCount == receivedCount;
  }
 };
 }
 }
--- a/arangod/Pregel/Utils.cpp
+++ b/arangod/Pregel/Utils.cpp
@ -40,7 +40,7 @@ std::string const Utils::startExecutionPath = "startExecution";
 std::string const Utils::finishedStartupPath = "finishedStartup";
 std::string const Utils::prepareGSSPath = "prepareGSS";
 std::string const Utils::startGSSPath = "startGSS";
-std::string const Utils::finishedGSSPath = "finishedGSS";
+std::string const Utils::finishedWorkerStepPath = "finishedStep";
 std::string const Utils::cancelGSSPath = "cancelGSS";
 std::string const Utils::messagesPath = "messages";
 std::string const Utils::finalizeExecutionPath = "finalizeExecution";
@ -56,6 +56,7 @@ std::string const Utils::globalShardListKey = "globalShardList";
 std::string const Utils::totalVertexCount = "vertexCount";
 std::string const Utils::totalEdgeCount = "edgeCount";
 std::string const Utils::asyncMode = "async";
 std::string const Utils::gssDone = "gssDone";
 std::string const Utils::coordinatorIdKey = "coordinatorId";
 std::string const Utils::algorithmKey = "algorithm";
@ -73,7 +74,6 @@ std::string const Utils::receivedCountKey = "receivedCount";
 std::string const Utils::sendCountKey = "sendCount";
 std::string const Utils::superstepRuntimeKey = "superstepRuntime";
 std::string const Utils::userParametersKey = "userparams";
 std::string Utils::baseUrl(std::string dbName) {
--- a/arangod/Pregel/Utils.h
+++ b/arangod/Pregel/Utils.h
@ -45,7 +45,7 @@ class Utils {
  static std::string const finishedStartupPath;
  static std::string const prepareGSSPath;
  static std::string const startGSSPath;
-  static std::string const finishedGSSPath;
+  static std::string const finishedWorkerStepPath;
  static std::string const cancelGSSPath;
  static std::string const messagesPath;
  static std::string const finalizeExecutionPath;
@ -63,6 +63,7 @@ class Utils {
  static std::string const totalVertexCount;
  static std::string const totalEdgeCount;
  static std::string const asyncMode;
  static std::string const gssDone;
  static std::string const globalSuperstepKey;
  static std::string const messagesKey;
@ -70,14 +71,13 @@ class Utils {
  static std::string const recoveryMethodKey;
  static std::string const compensate;
  static std::string const rollback;
-  
+
  static std::string const storeResultsKey;
  static std::string const aggregatorValuesKey;
  static std::string const activeCountKey;
  static std::string const receivedCountKey;
  static std::string const sendCountKey;
  static std::string const superstepRuntimeKey;
  // User parameters
  static std::string const userParametersKey;
@ -88,11 +88,9 @@ class Utils {
  static int64_t countDocuments(TRI_vocbase_t* vocbase,
                                std::string const& collection);
  static std::shared_ptr<LogicalCollection> resolveCollection(
-                                                              std::string const& database,
+      std::string const& database, std::string const& collectionName,
                                                              std::string const& collectionName,
      std::map<std::string, std::string> const& collectionPlanIdMap);
-  static void resolveShard(LogicalCollection* info,
+  static void resolveShard(LogicalCollection* info, std::string const& shardKey,
                           std::string const& shardKey,
                           std::string const& vertexKey,
                           std::string& responsibleShard);
 };
--- a/arangod/Pregel/VertexComputation.h
+++ b/arangod/Pregel/VertexComputation.h
@ -35,7 +35,7 @@ namespace pregel {
 template <typename V, typename E, typename M>
 class Worker;
 class Aggregator;
-  
+
 template <typename V, typename E, typename M>
 class VertexContext {
  friend class Worker<V, E, M>;
@ -43,17 +43,16 @@ class VertexContext {
  uint64_t _gss = 0;
  WorkerContext* _context;
  GraphStore<V, E>* _graphStore;
-  const AggregatorUsage* _conductorAggregators;
+  const AggregatorHandler* _conductorAggregators;
-  AggregatorUsage* _workerAggregators;
+  AggregatorHandler* _workerAggregators;
  VertexEntry* _vertexEntry;
 public:
  template <typename T>
  inline const T* getAggregatedValue(std::string const& name) {
    return (const T*)_conductorAggregators->getAggregatedValue(name);
  }
-  
+
  template <typename T>
  inline void aggregate(std::string const& name, const T* valuePtr) {
    _workerAggregators->aggregate(name, valuePtr);
@ -61,47 +60,48 @@ class VertexContext {
  inline WorkerContext const* context() { return _context; }
-  template<typename T>
+  template <typename T>
  T* mutableVertexData() {
-    return (T*) _graphStore->mutableVertexData(_vertexEntry);
+    return (T*)_graphStore->mutableVertexData(_vertexEntry);
  }
  V vertexData() { return _graphStore->copyVertexData(_vertexEntry); }
-  RangeIterator<Edge<E>> getEdges() { return _graphStore->edgeIterator(_vertexEntry); }
+  RangeIterator<Edge<E>> getEdges() {
    return _graphStore->edgeIterator(_vertexEntry);
  }
  /// store data, will potentially move the data around
  void setVertexData(void const* ptr, size_t size) {
    _graphStore->replaceVertexData(_vertexEntry, (void*)ptr, size);
  }
-  
+
-  void voteHalt() {_vertexEntry->setActive(false); }
+  void voteHalt() { _vertexEntry->setActive(false); }
-  void voteActive() {_vertexEntry->setActive(true);}
+  void voteActive() { _vertexEntry->setActive(true); }
  inline uint64_t globalSuperstep() const { return _gss; }
 };
-  
+
 template <typename V, typename E, typename M>
 class VertexComputation : public VertexContext<V, E, M> {
  friend class Worker<V, E, M>;
  OutCache<M>* _outgoing;
-public:
+
-  
+ public:
  void sendMessage(Edge<E> const* edge, M const& data) {
    _outgoing->appendMessage(edge->targetShard(), edge->toKey(), data);
  }
-  
+
  virtual void compute(MessageIterator<M> const& messages) = 0;
 };
-  
+
 template <typename V, typename E, typename M>
 class VertexCompensation : public VertexContext<V, E, M> {
  friend class Worker<V, E, M>;
-  
+
-public:
+ public:
  virtual void compensate(bool inLostPartition) = 0;
 };
 }
 }
 #endif
--- a/arangod/Pregel/Worker.cpp
+++ b/arangod/Pregel/Worker.cpp
@ -47,12 +47,15 @@ template <typename V, typename E, typename M>
 Worker<V, E, M>::Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo,
                        VPackSlice initConfig)
    : _running(true), _state(vocbase->name(), initConfig), _algorithm(algo) {
  VPackSlice userParams = initConfig.get(Utils::userParametersKey);
  _workerContext.reset(algo->workerContext(userParams));
  _messageFormat.reset(algo->messageFormat());
  _messageCombiner.reset(algo->messageCombiner());
-  _conductorAggregators.reset(new AggregatorUsage(algo));
+  _conductorAggregators.reset(new AggregatorHandler(algo));
-  _workerAggregators.reset(new AggregatorUsage(algo));
+  _workerAggregators.reset(new AggregatorHandler(algo));
  _graphStore.reset(new GraphStore<V, E>(vocbase, _algorithm->inputFormat()));
  if (_messageCombiner) {
    _readCache.reset(
        new CombiningInCache<M>(_messageFormat.get(), _messageCombiner.get()));
@ -70,24 +73,24 @@ Worker<V, E, M>::Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo,
  // of time. Therefore this is performed asynchronous
  ThreadPool* pool = PregelFeature::instance()->threadPool();
  pool->enqueue([this, vocbase, vc, ec] {
-    _graphStore.reset(
+    _graphStore->loadShards(this->_state);
-        new GraphStore<V, E>(vocbase, _state, _algorithm->inputFormat()));
+
    // execute the user defined startup code
    if (_workerContext) {
      _workerContext->_conductorAggregators = _conductorAggregators.get();
      _workerContext->_workerAggregators = _workerAggregators.get();
      _workerContext->_vertexCount = vc;
      _workerContext->_edgeCount = ec;
      _workerContext->preApplication();
      VPackBuilder package;
      package.openObject();
      package.add(Utils::senderKey,
                  VPackValue(ServerState::instance()->getId()));
      package.add(Utils::executionNumberKey,
                  VPackValue(_state.executionNumber()));
      package.close();
      _callConductor(Utils::finishedStartupPath, package.slice());
    }
    VPackBuilder package;
    package.openObject();
    package.add(Utils::senderKey, VPackValue(ServerState::instance()->getId()));
    package.add(Utils::executionNumberKey,
                VPackValue(_state.executionNumber()));
    package.close();
    _callConductor(Utils::finishedStartupPath, package.slice());
  });
 }
@ -120,13 +123,13 @@ void Worker<V, E, M>::prepareGlobalStep(VPackSlice data) {
  // clean up message caches, intialize gss
  _state._globalSuperstep = gss;
  _swapIncomingCaches();  // write cache becomes the readable cache
-  // parse aggregated values from conductor
+  _workerAggregators->resetValues();
  _conductorAggregators->resetValues();
  // parse aggregated values from conductor
  VPackSlice aggValues = data.get(Utils::aggregatorValuesKey);
  if (aggValues.isObject()) {
    _conductorAggregators->aggregateValues(aggValues);
  }
  _workerAggregators->resetValues();
  _superstepStats.reset();  // don't forget to reset before the superstep
  // execute context
  if (_workerContext != nullptr) {
@ -136,7 +139,7 @@ void Worker<V, E, M>::prepareGlobalStep(VPackSlice data) {
 template <typename V, typename E, typename M>
 void Worker<V, E, M>::receivedMessages(VPackSlice data) {
-  //LOG(INFO) << "Worker received some messages: " << data.toJson();
+  // LOG(INFO) << "Worker received some messages: " << data.toJson();
  VPackSlice gssSlice = data.get(Utils::globalSuperstepKey);
  VPackSlice messageSlice = data.get(Utils::messagesKey);
@ -231,7 +234,7 @@ void Worker<V, E, M>::_executeGlobalStep(
    outCache.reset(new ArrayOutCache<M>(&_state, inCache.get()));
  }
-  AggregatorUsage workerAggregator(_algorithm.get());
+  AggregatorHandler workerAggregator(_algorithm.get());
  // TODO look if we can avoid instantiating this
  std::unique_ptr<VertexComputation<V, E, M>> vertexComputation(
@ -250,9 +253,9 @@ void Worker<V, E, M>::_executeGlobalStep(
      vertexComputation->compute(messages);
      if (vertexEntry->active()) {
        activeCount++;
-      }/* else {
+      } /* else {
-        LOG(INFO) << vertexEntry->key() << " vertex has halted";
+         LOG(INFO) << vertexEntry->key() << " vertex has halted";
-      }*/
+       }*/
    }
    // TODO delete read messages immediatly
    // technically messages to non-existing vertices trigger
@ -280,7 +283,7 @@ void Worker<V, E, M>::_executeGlobalStep(
 // called at the end of a worker thread, needs mutex
 template <typename V, typename E, typename M>
-void Worker<V, E, M>::_workerThreadDone(AggregatorUsage* threadAggregators,
+void Worker<V, E, M>::_workerThreadDone(AggregatorHandler* threadAggregators,
                                        WorkerStats const& threadStats) {
  MUTEX_LOCKER(guard, _threadMutex);  // only one thread at a time
@ -314,18 +317,22 @@ void Worker<V, E, M>::_workerThreadDone(AggregatorUsage* threadAggregators,
    _workerAggregators->serializeValues(package);
    package.close();
  }
-  _superstepStats.serializeValues(package);  // add stats
+  if (_superstepStats.isDone()) {
    _superstepStats.serializeValues(package);  // add stats
    package.add(Utils::gssDone, VPackValue(true));
  }
  package.close();
-
+  _workerAggregators->resetValues();
  // TODO ask how to implement message sending without waiting for a response
  // ============ Call Coordinator ============
-  _callConductor(Utils::finishedGSSPath, package.slice());
+  _callConductor(Utils::finishedWorkerStepPath, package.slice());
 }
 template <typename V, typename E, typename M>
 void Worker<V, E, M>::finalizeExecution(VPackSlice body) {
  // Only expect serial calls from the conductor.
-  //Lock to prevent malicous activity
+  // Lock to prevent malicous activity
  MUTEX_LOCKER(guard, _conductorMutex);
  _running = false;
@ -356,7 +363,7 @@ void Worker<V, E, M>::finalizeExecution(VPackSlice body) {
 template <typename V, typename E, typename M>
 void Worker<V, E, M>::startRecovery(VPackSlice data) {
  MUTEX_LOCKER(guard, _conductorMutex);
-  
+
  _running = true;
  VPackSlice method = data.get(Utils::recoveryMethodKey);
  if (method.compareString(Utils::compensate) == 0) {
@ -372,8 +379,8 @@ void Worker<V, E, M>::startRecovery(VPackSlice data) {
 template <typename V, typename E, typename M>
 void Worker<V, E, M>::compensateStep(VPackSlice data) {
-   MUTEX_LOCKER(guard, _conductorMutex);
+  MUTEX_LOCKER(guard, _conductorMutex);
-  
+
  _conductorAggregators->resetValues();
  VPackSlice aggValues = data.get(Utils::aggregatorValuesKey);
  if (aggValues.isObject()) {
--- a/arangod/Pregel/Worker.h
+++ b/arangod/Pregel/Worker.h
@ -25,11 +25,11 @@
 #include "Basics/Common.h"
 #include "Basics/Mutex.h"
-#include "Pregel/AggregatorUsage.h"
+#include "Pregel/AggregatorHandler.h"
 #include "Pregel/Algorithm.h"
 #include "Pregel/Statistics.h"
 #include "Pregel/WorkerContext.h"
 #include "Pregel/WorkerState.h"
 #include "Pregel/Statistics.h"
 struct TRI_vocbase_t;
 namespace arangodb {
@ -40,7 +40,7 @@ class IWorker {
 public:
  virtual ~IWorker(){};
  virtual void prepareGlobalStep(VPackSlice data) = 0;
-  virtual void startGlobalStep(VPackSlice data) = 0;  // called by coordinator
+  virtual void startGlobalStep(VPackSlice data) = 0;   // called by coordinator
  virtual void cancelGlobalStep(VPackSlice data) = 0;  // called by coordinator
  virtual void receivedMessages(VPackSlice data) = 0;
  virtual void finalizeExecution(VPackSlice data) = 0;
@ -53,58 +53,62 @@ class GraphStore;
 template <typename M>
 class InCache;
-  
+
 template <typename T>
 class RangeIterator;
 class VertexEntry;
-  
+
 template <typename V, typename E, typename M>
 class VertexContext;
 template <typename V, typename E, typename M>
 class Worker : public IWorker {
-  //friend class arangodb::RestPregelHandler;
+  // friend class arangodb::RestPregelHandler;
-  
+
  bool _running = true;
  WorkerState _state;
  WorkerStats _workerStats;
  uint64_t _expectedGSS = 0;
  std::unique_ptr<Algorithm<V, E, M>> _algorithm;
  std::unique_ptr<WorkerContext> _workerContext;
-  Mutex _conductorMutex;// locks callbak methods
+  Mutex _conductorMutex;       // locks callbak methods
-  mutable Mutex _threadMutex;// locks _workerThreadDone
+  mutable Mutex _threadMutex;  // locks _workerThreadDone
-  
+
  // only valid while recovering to determine the offset
  // where new vertices were inserted
  size_t _preRecoveryTotal;
- 
+
  std::unique_ptr<AggregatorHandler> _conductorAggregators;
  std::unique_ptr<AggregatorHandler> _workerAggregators;
  std::unique_ptr<GraphStore<V, E>> _graphStore;
  std::unique_ptr<InCache<M>> _readCache, _writeCache, _nextPhase;
  std::unique_ptr<AggregatorUsage> _conductorAggregators;
  std::unique_ptr<AggregatorUsage> _workerAggregators;
  std::unique_ptr<MessageFormat<M>> _messageFormat;
  std::unique_ptr<MessageCombiner<M>> _messageCombiner;
-  
+  // from previous or current superstep
  std::unique_ptr<InCache<M>> _readCache;
  // for the current or next superstep
  std::unique_ptr<InCache<M>> _writeCache;
  // intended for the next superstep phase
  std::unique_ptr<InCache<M>> _nextPhase;
  WorkerStats _superstepStats;
  size_t _runningThreads;
-  
+
  void _swapIncomingCaches() {
    _readCache.swap(_writeCache);
    _writeCache->clear();
  }
-  
+
-  void _initializeVertexContext(VertexContext<V, E, M> *ctx);
+  void _initializeVertexContext(VertexContext<V, E, M>* ctx);
-  void _executeGlobalStep(RangeIterator<VertexEntry> &vertexIterator);
+  void _executeGlobalStep(RangeIterator<VertexEntry>& vertexIterator);
-  void _workerThreadDone(AggregatorUsage *threadAggregators,
+  void _workerThreadDone(AggregatorHandler* threadAggregators,
                         WorkerStats const& threadStats);
  void _callConductor(std::string path, VPackSlice message);
 public:
  Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algorithm,
         VPackSlice params);
  ~Worker();
-  
+
  // ====== called by rest handler =====
  void prepareGlobalStep(VPackSlice data) override;
  void startGlobalStep(VPackSlice data) override;
--- a/arangod/Pregel/WorkerContext.h
+++ b/arangod/Pregel/WorkerContext.h
@ -25,8 +25,8 @@
 #include <velocypack/Slice.h>
 #include <velocypack/velocypack-aliases.h>
 #include "Pregel/AggregatorUsage.h"
 #include "Basics/Common.h"
 #include "Pregel/AggregatorHandler.h"
 #include "Pregel/Utils.h"
 namespace arangodb {
@ -37,8 +37,8 @@ class WorkerContext {
  friend class Worker;
  uint64_t _vertexCount, _edgeCount;
-  const AggregatorUsage* _conductorAggregators;
+  const AggregatorHandler* _conductorAggregators;
-  AggregatorUsage* _workerAggregators;
+  AggregatorHandler* _workerAggregators;
 protected:
  template <typename T>
@ -55,9 +55,9 @@ class WorkerContext {
  virtual void preGlobalSuperstep(uint64_t gss){};
  virtual void postGlobalSuperstep(uint64_t gss){};
  virtual void postApplication(){};
-  
+
 public:
-  WorkerContext(VPackSlice params) {};
+  WorkerContext(VPackSlice params){};
  inline uint64_t vertexCount() const { return _vertexCount; }
--- a/arangod/Pregel/WorkerState.cpp
+++ b/arangod/Pregel/WorkerState.cpp
@ -35,6 +35,7 @@ WorkerState::WorkerState(DatabaseID dbname, VPackSlice params)
  VPackSlice execNum = params.get(Utils::executionNumberKey);
  VPackSlice collectionPlanIdMap = params.get(Utils::collectionPlanIdMapKey);
  VPackSlice globalShards = params.get(Utils::globalShardListKey);
  //VPackSlice userParams = params.get(Utils::userParametersKey);
  if (!coordID.isString() || !edgeShardMap.isObject() ||
      !vertexShardMap.isObject() || !execNum.isInteger() ||
      !collectionPlanIdMap.isObject() || !globalShards.isArray()) {
--- a/arangod/Pregel/WorkerState.h
+++ b/arangod/Pregel/WorkerState.h
@ -23,8 +23,8 @@
 #ifndef ARANGODB_PREGEL_WORKER_STATE_H
 #define ARANGODB_PREGEL_WORKER_STATE_H 1
 #include <algorithm>
 #include <velocypack/velocypack-aliases.h>
 #include <algorithm>
 #include "Basics/Common.h"
 #include "Cluster/ClusterInfo.h"
@ -48,25 +48,30 @@ class WorkerState {
  inline uint64_t executionNumber() const { return _executionNumber; }
  inline uint64_t globalSuperstep() const { return _globalSuperstep; }
-  
+
-  inline bool asynchronousMode() const {return _asynchronousMode;}
+  inline uint64_t localSuperstep() const { return _localSuperstep; }
  inline bool asynchronousMode() const { return _asynchronousMode; }
  inline std::string const& coordinatorId() const { return _coordinatorId; }
  inline std::string const& database() const { return _database; }
-  inline std::map<CollectionID, std::vector<ShardID>> const& vertexCollectionShards() const {
+  inline std::map<CollectionID, std::vector<ShardID>> const&
  vertexCollectionShards() const {
    return _vertexCollectionShards;
  }
-  inline std::map<CollectionID, std::vector<ShardID>> const& edgeCollectionShards() const {
+  inline std::map<CollectionID, std::vector<ShardID>> const&
  edgeCollectionShards() const {
    return _edgeCollectionShards;
  }
-  inline std::map<CollectionID, std::string> const& collectionPlanIdMap() const {
+  inline std::map<CollectionID, std::string> const& collectionPlanIdMap()
      const {
    return _collectionPlanIdMap;
  };
-  
+
  // same content on every worker, has to stay equal!!!!
  inline std::vector<ShardID> const& globalShardIDs() const {
    return _globalShardIDs;
@ -83,30 +88,33 @@ class WorkerState {
    return _localEdgeShardIDs;
  };
  inline size_t shardId(ShardID const& responsibleShard) const {
-    auto it = std::find(_globalShardIDs.begin(), _globalShardIDs.end(), responsibleShard);
+    auto it = std::find(_globalShardIDs.begin(), _globalShardIDs.end(),
-    return it != _globalShardIDs.end() ? it - _globalShardIDs.begin() : (uint16_t)-1;
+                        responsibleShard);
    return it != _globalShardIDs.end() ? it - _globalShardIDs.begin()
                                       : (uint16_t)-1;
  }
  // index in globalShardIDs
  inline bool isLocalVertexShard(size_t shardIndex) const {
    // TODO cache this? prob small
    ShardID const& shard = _globalShardIDs[shardIndex];
-    return std::find(_localVertexShardIDs.begin(), _localVertexShardIDs.end(), shard)
+    return std::find(_localVertexShardIDs.begin(), _localVertexShardIDs.end(),
-            != _localVertexShardIDs.end();
+                     shard) != _localVertexShardIDs.end();
  }
-  
+
 private:
  uint64_t _executionNumber = 0;
  uint64_t _globalSuperstep = 0;
  uint64_t _localSuperstep = 0;
  bool _asynchronousMode = false;
  // uint64_t _numWorkerThreads = 1;
  std::string _coordinatorId;
  std::string _database;
-  
+
  std::vector<ShardID> _globalShardIDs;
  std::vector<ShardID> _localVertexShardIDs, _localEdgeShardIDs;
-  
+
-  std::map<CollectionID, std::vector<ShardID>> _vertexCollectionShards, _edgeCollectionShards;
+  std::map<CollectionID, std::vector<ShardID>> _vertexCollectionShards,
      _edgeCollectionShards;
  std::map<std::string, std::string> _collectionPlanIdMap;
 };
 }
--- a/arangod/Pregel/examples/list-importer.js
+++ b/arangod/Pregel/examples/list-importer.js
@ -51,7 +51,7 @@ module.exports = function (gname, filename) {
      graph[eColl].save(vColl+"/"+parts[0],
                        vColl+"/"+parts[1],
-                        {_vertex:parts[0], value:-1});
+                        {_vertex:parts[0]});
    }
  });
 };
--- a/arangod/RestHandler/RestPregelHandler.cpp
+++ b/arangod/RestHandler/RestPregelHandler.cpp
@ -118,10 +118,10 @@ RestStatus RestPregelHandler::execute() {
      if (exe) {
        exe->receivedMessages(body);
      }
-    } else if (suffix[0] == Utils::finishedGSSPath) {
+    } else if (suffix[0] == Utils::finishedWorkerStepPath) {
      Conductor *exe = PregelFeature::instance()->conductor(executionNumber);
      if (exe) {
-        exe->finishedGlobalStep(body);
+        exe->finishedWorkerStep(body);
      } else {
        LOG(ERR) << "Conductor not found: " << executionNumber;
      }