1
0
Fork 0

reorganization

This commit is contained in:
Simon Grätzer 2016-12-13 13:54:14 +01:00
parent 3a4a07f13d
commit 63f87fbf52
51 changed files with 4090 additions and 468 deletions

View File

@ -111,6 +111,12 @@ set(SKIP_INSTALL_ALL ON)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/snappy/google-snappy-d53de18) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/snappy/google-snappy-d53de18)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/rocksdb) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/rocksdb)
################################################################################
## LIBCUCKOO
################################################################################
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libcuckoo/)
################################################################################ ################################################################################
## LINK_DIRECTORIES ## LINK_DIRECTORIES
################################################################################ ################################################################################

37
3rdParty/libcuckoo/.gitignore vendored Executable file
View File

@ -0,0 +1,37 @@
*.a
*.in
*.la
*.lo
*.log
*.o
*.out
*.trs
*~
.DS_Store
.deps
.libs
Makefile
aclocal.m4
autom4te.cache
cityhash_unittest
compile
config.guess
config.h
config.log
config.status
config.sub
config.sub
configure
depcomp
depcomp
examples/count_freq
examples/hellohash
examples/nested_table
install-sh
libtool
libtool.m4
lt*.m4
ltmain.sh
missing
stamp-h1
test-driver

5
3rdParty/libcuckoo/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,5 @@
cmake_minimum_required (VERSION 2.8)
include_directories("${PROJECT_BINARY_DIR}/include/")
#target_link_libraries(LIBCUCKOO pthread)
add_library(libcuckoo include/cityhash/city.cc)

18
3rdParty/libcuckoo/LICENSE vendored Executable file
View File

@ -0,0 +1,18 @@
Copyright (C) 2013, Carnegie Mellon University and Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
---------------------------
CityHash (lib/city.h, lib/city.cc) is Copyright (c) Google, Inc. and
has its own license, as detailed in the source files.

107
3rdParty/libcuckoo/README.md vendored Executable file
View File

@ -0,0 +1,107 @@
Note to existing users: the iterator implementation has changed significantly
since we introduced the `locked_table` in [this
commit](https://github.com/efficient/libcuckoo/commit/2bedb3d0c811cd8b3adb3e78e2d2a28c66ba1d1d).
Please see the [`locked_table`
documentation](http://efficient.github.io/libcuckoo/classcuckoohash__map_1_1locked__table.html)
and [examples
directory](https://github.com/efficient/libcuckoo/tree/master/examples) for
information and examples of how to use iterators.
libcuckoo
=========
libcuckoo provides a high-performance, compact hash table that allows
multiple concurrent reader and writer threads.
The Doxygen-generated documentation is available at the
[project page](http://efficient.github.io/libcuckoo/).
Authors: Manu Goyal, Bin Fan, Xiaozhou Li, David G. Andersen, and Michael Kaminsky
For details about this algorithm and citations, please refer to
our papers in [NSDI 2013][1] and [EuroSys 2014][2]. Some of the details of the hashing
algorithm have been improved since that work (e.g., the previous algorithm
in [1] serializes all writer threads, while our current
implementation supports multiple concurrent writers), however, and this source
code is now the definitive reference.
[1]: http://www.cs.cmu.edu/~dga/papers/memc3-nsdi2013.pdf "MemC3: Compact and Concurrent Memcache with Dumber Caching and Smarter Hashing"
[2]: http://www.cs.princeton.edu/~mfreed/docs/cuckoo-eurosys14.pdf "Algorithmic Improvements for Fast Concurrent Cuckoo Hashing"
Requirements
================
This library has been tested on Mac OSX >= 10.8 and Ubuntu >= 12.04.
It compiles with clang++ >= 3.3 and g++ >= 4.7, however we strongly suggest
using the latest versions of both compilers, as they have greatly improved
support for atomic operations. Building the library requires the
autotools. Install them on Ubuntu
$ sudo apt-get update && sudo apt-get install build-essential autoconf libtool
Building
==========
$ autoreconf -fis
$ ./configure
$ make
$ make install
Usage
==========
To build a program with the hash table, include
`libcuckoo/cuckoohash_map.hh` into your source file. If you want to
use CityHash, which we recommend, we have provided a wrapper
compatible with the `std::hash` type around it in the
`libcuckoo/city_hasher.hh` file. If compiling with CityHash, add the
`-lcityhash` flag. You must also enable C++11 features on your
compiler. Compiling the file `examples/count_freq.cc` with g++
might look like this:
$ g++ -std=c++11 examples/count_freq.cc -lcityhash
The
[examples directory](https://github.com/efficient/libcuckoo/tree/master/examples)
contains some simple demonstrations of some of the basic features of the hash
table.
Tests
==========
The [tests directory](https://github.com/efficient/libcuckoo/tree/master/tests)
directory contains a number of tests and benchmarks of the hash table, which
also can serve as useful examples of how to use the table's various features.
After running `make all`, the entire test suite can be run with the `make check`
command. This will not run the benchmarks, which must be run individually. The
test executables, which have the suffix `.out`, can be run individually as well.
Issue Report
============
To let us know your questions or issues, we recommend you
[report an issue](https://github.com/efficient/libcuckoo/issues) on
github. You can also email us at
[libcuckoo-dev@googlegroups.com](mailto:libcuckoo-dev@googlegroups.com).
Licence
===========
Copyright (C) 2013, Carnegie Mellon University and Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
---------------------------
CityHash (lib/city.h, lib/city.cc) is Copyright (c) Google, Inc. and
has its own license, as detailed in the source files.

19
3rdParty/libcuckoo/include/cityhash/COPYING vendored Executable file
View File

@ -0,0 +1,19 @@
// Copyright (c) 2011 Google, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

627
3rdParty/libcuckoo/include/cityhash/city.cc vendored Executable file
View File

@ -0,0 +1,627 @@
// Copyright (c) 2011 Google, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// CityHash, by Geoff Pike and Jyrki Alakuijala
//
// This file provides CityHash64() and related functions.
//
// It's probably possible to create even faster hash functions by
// writing a program that systematically explores some of the space of
// possible hash functions, by using SIMD instructions, or by
// compromising on hash quality.
#include "city.h"
#include <algorithm>
#include <string.h> // for memcpy and memset
using namespace std;
static uint64 UNALIGNED_LOAD64(const char *p) {
uint64 result;
memcpy(&result, p, sizeof(result));
return result;
}
static uint32 UNALIGNED_LOAD32(const char *p) {
uint32 result;
memcpy(&result, p, sizeof(result));
return result;
}
#ifdef _MSC_VER
#include <stdlib.h>
#define bswap_32(x) _byteswap_ulong(x)
#define bswap_64(x) _byteswap_uint64(x)
#elif defined(__APPLE__)
// Mac OS X / Darwin features
#include <libkern/OSByteOrder.h>
#define bswap_32(x) OSSwapInt32(x)
#define bswap_64(x) OSSwapInt64(x)
#elif defined(__NetBSD__)
#include <sys/types.h>
#include <machine/bswap.h>
#if defined(__BSWAP_RENAME) && !defined(__bswap_32)
#define bswap_32(x) bswap32(x)
#define bswap_64(x) bswap64(x)
#endif
#else
#include <byteswap.h>
#endif
#ifdef WORDS_BIGENDIAN
#define uint32_in_expected_order(x) (bswap_32(x))
#define uint64_in_expected_order(x) (bswap_64(x))
#else
#define uint32_in_expected_order(x) (x)
#define uint64_in_expected_order(x) (x)
#endif
#if !defined(LIKELY)
#if HAVE_BUILTIN_EXPECT
#define LIKELY(x) (__builtin_expect(!!(x), 1))
#else
#define LIKELY(x) (x)
#endif
#endif
static uint64 Fetch64(const char *p) {
return uint64_in_expected_order(UNALIGNED_LOAD64(p));
}
static uint32 Fetch32(const char *p) {
return uint32_in_expected_order(UNALIGNED_LOAD32(p));
}
// Some primes between 2^63 and 2^64 for various uses.
static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
static const uint64 k1 = 0xb492b66fbe98f273ULL;
static const uint64 k2 = 0x9ae16a3b2f90404fULL;
// Magic numbers for 32-bit hashing. Copied from Murmur3.
static const uint32_t c1 = 0xcc9e2d51;
static const uint32_t c2 = 0x1b873593;
// A 32-bit to 32-bit integer hash copied from Murmur3.
static uint32 fmix(uint32 h)
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
static uint32 Rotate32(uint32 val, int shift) {
// Avoid shifting by 32: doing so yields an undefined result.
return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
}
#undef PERMUTE3
#define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0)
static uint32 Mur(uint32 a, uint32 h) {
// Helper from Murmur3 for combining two 32-bit values.
a *= c1;
a = Rotate32(a, 17);
a *= c2;
h ^= a;
h = Rotate32(h, 19);
return h * 5 + 0xe6546b64;
}
static uint32 Hash32Len13to24(const char *s, size_t len) {
uint32 a = Fetch32(s - 4 + (len >> 1));
uint32 b = Fetch32(s + 4);
uint32 c = Fetch32(s + len - 8);
uint32 d = Fetch32(s + (len >> 1));
uint32 e = Fetch32(s);
uint32 f = Fetch32(s + len - 4);
uint32 h = len;
return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
}
static uint32 Hash32Len0to4(const char *s, size_t len) {
uint32 b = 0;
uint32 c = 9;
for (uint32 i = 0; i < len; i++) {
signed char v = s[i];
b = b * c1 + v;
c ^= b;
}
return fmix(Mur(b, Mur(len, c)));
}
static uint32 Hash32Len5to12(const char *s, size_t len) {
uint32 a = len, b = len * 5, c = 9, d = b;
a += Fetch32(s);
b += Fetch32(s + len - 4);
c += Fetch32(s + ((len >> 1) & 4));
return fmix(Mur(c, Mur(b, Mur(a, d))));
}
uint32 CityHash32(const char *s, size_t len) {
if (len <= 24) {
return len <= 12 ?
(len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
Hash32Len13to24(s, len);
}
// len > 24
uint32 h = len, g = c1 * len, f = g;
uint32 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
uint32 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
uint32 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
uint32 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
uint32 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
h ^= a0;
h = Rotate32(h, 19);
h = h * 5 + 0xe6546b64;
h ^= a2;
h = Rotate32(h, 19);
h = h * 5 + 0xe6546b64;
g ^= a1;
g = Rotate32(g, 19);
g = g * 5 + 0xe6546b64;
g ^= a3;
g = Rotate32(g, 19);
g = g * 5 + 0xe6546b64;
f += a4;
f = Rotate32(f, 19);
f = f * 5 + 0xe6546b64;
size_t iters = (len - 1) / 20;
do {
uint32 a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
uint32 a1 = Fetch32(s + 4);
uint32 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
uint32 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
uint32 a4 = Fetch32(s + 16);
h ^= a0;
h = Rotate32(h, 18);
h = h * 5 + 0xe6546b64;
f += a1;
f = Rotate32(f, 19);
f = f * c1;
g += a2;
g = Rotate32(g, 18);
g = g * 5 + 0xe6546b64;
h ^= a3 + a1;
h = Rotate32(h, 19);
h = h * 5 + 0xe6546b64;
g ^= a4;
g = bswap_32(g) * 5;
h += a4 * 5;
h = bswap_32(h);
f += a0;
PERMUTE3(f, h, g);
s += 20;
} while (--iters != 0);
g = Rotate32(g, 11) * c1;
g = Rotate32(g, 17) * c1;
f = Rotate32(f, 11) * c1;
f = Rotate32(f, 17) * c1;
h = Rotate32(h + g, 19);
h = h * 5 + 0xe6546b64;
h = Rotate32(h, 17) * c1;
h = Rotate32(h + f, 19);
h = h * 5 + 0xe6546b64;
h = Rotate32(h, 17) * c1;
return h;
}
// Bitwise right rotate. Normally this will compile to a single
// instruction, especially if the shift is a manifest constant.
static uint64 Rotate(uint64 val, int shift) {
// Avoid shifting by 64: doing so yields an undefined result.
return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
}
static uint64 ShiftMix(uint64 val) {
return val ^ (val >> 47);
}
static uint64 HashLen16(uint64 u, uint64 v) {
return Hash128to64(uint128(u, v));
}
static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
// Murmur-inspired hashing.
uint64 a = (u ^ v) * mul;
a ^= (a >> 47);
uint64 b = (v ^ a) * mul;
b ^= (b >> 47);
b *= mul;
return b;
}
static uint64 HashLen0to16(const char *s, size_t len) {
if (len >= 8) {
uint64 mul = k2 + len * 2;
uint64 a = Fetch64(s) + k2;
uint64 b = Fetch64(s + len - 8);
uint64 c = Rotate(b, 37) * mul + a;
uint64 d = (Rotate(a, 25) + b) * mul;
return HashLen16(c, d, mul);
}
if (len >= 4) {
uint64 mul = k2 + len * 2;
uint64 a = Fetch32(s);
return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
}
if (len > 0) {
uint8 a = s[0];
uint8 b = s[len >> 1];
uint8 c = s[len - 1];
uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
uint32 z = len + (static_cast<uint32>(c) << 2);
return ShiftMix(y * k2 ^ z * k0) * k2;
}
return k2;
}
// This probably works well for 16-byte strings as well, but it may be overkill
// in that case.
static uint64 HashLen17to32(const char *s, size_t len) {
uint64 mul = k2 + len * 2;
uint64 a = Fetch64(s) * k1;
uint64 b = Fetch64(s + 8);
uint64 c = Fetch64(s + len - 8) * mul;
uint64 d = Fetch64(s + len - 16) * k2;
return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
a + Rotate(b + k2, 18) + c, mul);
}
// Return a 16-byte hash for 48 bytes. Quick and dirty.
// Callers do best to use "random-looking" values for a and b.
static pair<uint64, uint64> WeakHashLen32WithSeeds(
uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
a += w;
b = Rotate(b + a + z, 21);
uint64 c = a;
a += x;
a += y;
b += Rotate(a, 44);
return make_pair(a + z, b + c);
}
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
static pair<uint64, uint64> WeakHashLen32WithSeeds(
const char* s, uint64 a, uint64 b) {
return WeakHashLen32WithSeeds(Fetch64(s),
Fetch64(s + 8),
Fetch64(s + 16),
Fetch64(s + 24),
a,
b);
}
// Return an 8-byte hash for 33 to 64 bytes.
static uint64 HashLen33to64(const char *s, size_t len) {
uint64 mul = k2 + len * 2;
uint64 a = Fetch64(s) * k2;
uint64 b = Fetch64(s + 8);
uint64 c = Fetch64(s + len - 24);
uint64 d = Fetch64(s + len - 32);
uint64 e = Fetch64(s + 16) * k2;
uint64 f = Fetch64(s + 24) * 9;
uint64 g = Fetch64(s + len - 8);
uint64 h = Fetch64(s + len - 16) * mul;
uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
uint64 v = ((a + g) ^ d) + f + 1;
uint64 w = bswap_64((u + v) * mul) + h;
uint64 x = Rotate(e + f, 42) + c;
uint64 y = (bswap_64((v + w) * mul) + g) * mul;
uint64 z = e + f + c;
a = bswap_64((x + z) * mul + y) + b;
b = ShiftMix((z + a) * mul + d + h) * mul;
return b + x;
}
uint64 CityHash64(const char *s, size_t len) {
if (len <= 32) {
if (len <= 16) {
return HashLen0to16(s, len);
} else {
return HashLen17to32(s, len);
}
} else if (len <= 64) {
return HashLen33to64(s, len);
}
// For strings over 64 bytes we hash the end first, and then as we
// loop we keep 56 bytes of state: v, w, x, y, and z.
uint64 x = Fetch64(s + len - 40);
uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
x = x * k1 + Fetch64(s);
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
len = (len - 1) & ~static_cast<size_t>(63);
do {
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
x ^= w.second;
y += v.first + Fetch64(s + 40);
z = Rotate(z + w.first, 33) * k1;
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
std::swap(z, x);
s += 64;
len -= 64;
} while (len != 0);
return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
HashLen16(v.second, w.second) + x);
}
uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
return CityHash64WithSeeds(s, len, k2, seed);
}
uint64 CityHash64WithSeeds(const char *s, size_t len,
uint64 seed0, uint64 seed1) {
return HashLen16(CityHash64(s, len) - seed0, seed1);
}
// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
// of any length representable in signed long. Based on City and Murmur.
static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
uint64 a = Uint128Low64(seed);
uint64 b = Uint128High64(seed);
uint64 c = 0;
uint64 d = 0;
signed long l = len - 16;
if (l <= 0) { // len <= 16
a = ShiftMix(a * k1) * k1;
c = b * k1 + HashLen0to16(s, len);
d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
} else { // len > 16
c = HashLen16(Fetch64(s + len - 8) + k1, a);
d = HashLen16(b + len, c + Fetch64(s + len - 16));
a += d;
do {
a ^= ShiftMix(Fetch64(s) * k1) * k1;
a *= k1;
b ^= a;
c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
c *= k1;
d ^= c;
s += 16;
l -= 16;
} while (l > 0);
}
a = HashLen16(a, c);
b = HashLen16(d, b);
return uint128(a ^ b, HashLen16(b, a));
}
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
if (len < 128) {
return CityMurmur(s, len, seed);
}
// We expect len >= 128 to be the common case. Keep 56 bytes of state:
// v, w, x, y, and z.
pair<uint64, uint64> v, w;
uint64 x = Uint128Low64(seed);
uint64 y = Uint128High64(seed);
uint64 z = len * k1;
v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
w.first = Rotate(y + z, 35) * k1 + x;
w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
// This is the same inner loop as CityHash64(), manually unrolled.
do {
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
x ^= w.second;
y += v.first + Fetch64(s + 40);
z = Rotate(z + w.first, 33) * k1;
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
std::swap(z, x);
s += 64;
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
x ^= w.second;
y += v.first + Fetch64(s + 40);
z = Rotate(z + w.first, 33) * k1;
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
std::swap(z, x);
s += 64;
len -= 128;
} while (LIKELY(len >= 128));
x += Rotate(v.first + z, 49) * k0;
y = y * k0 + Rotate(w.second, 37);
z = z * k0 + Rotate(w.first, 27);
w.first *= 9;
v.first *= k0;
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
for (size_t tail_done = 0; tail_done < len; ) {
tail_done += 32;
y = Rotate(x + y, 42) * k0 + v.second;
w.first += Fetch64(s + len - tail_done + 16);
x = x * k0 + w.first;
z += w.second + Fetch64(s + len - tail_done);
w.second += v.first;
v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
v.first *= k0;
}
// At this point our 56 bytes of state should contain more than
// enough information for a strong 128-bit hash. We use two
// different 56-byte-to-8-byte hashes to get a 16-byte final result.
x = HashLen16(x, v.first);
y = HashLen16(y + z, w.first);
return uint128(HashLen16(x + v.second, w.second) + y,
HashLen16(x + w.second, y + v.second));
}
uint128 CityHash128(const char *s, size_t len) {
return len >= 16 ?
CityHash128WithSeed(s + 16, len - 16,
uint128(Fetch64(s), Fetch64(s + 8) + k0)) :
CityHash128WithSeed(s, len, uint128(k0, k1));
}
#ifdef __SSE4_2__
#include <citycrc.h>
#include <nmmintrin.h>
// Requires len >= 240.
static void CityHashCrc256Long(const char *s, size_t len,
uint32 seed, uint64 *result) {
uint64 a = Fetch64(s + 56) + k0;
uint64 b = Fetch64(s + 96) + k0;
uint64 c = result[0] = HashLen16(b, len);
uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
uint64 e = Fetch64(s + 184) + seed;
uint64 f = 0;
uint64 g = 0;
uint64 h = c + d;
uint64 x = seed;
uint64 y = 0;
uint64 z = 0;
// 240 bytes of input per iter.
size_t iters = len / 240;
len -= iters * 240;
do {
#undef CHUNK
#define CHUNK(r) \
PERMUTE3(x, z, y); \
b += Fetch64(s); \
c += Fetch64(s + 8); \
d += Fetch64(s + 16); \
e += Fetch64(s + 24); \
f += Fetch64(s + 32); \
a += b; \
h += f; \
b += c; \
f += d; \
g += e; \
e += z; \
g += x; \
z = _mm_crc32_u64(z, b + g); \
y = _mm_crc32_u64(y, e + h); \
x = _mm_crc32_u64(x, f + a); \
e = Rotate(e, r); \
c += e; \
s += 40
CHUNK(0); PERMUTE3(a, h, c);
CHUNK(33); PERMUTE3(a, h, f);
CHUNK(0); PERMUTE3(b, h, f);
CHUNK(42); PERMUTE3(b, h, d);
CHUNK(0); PERMUTE3(b, h, e);
CHUNK(33); PERMUTE3(a, h, e);
} while (--iters > 0);
while (len >= 40) {
CHUNK(29);
e ^= Rotate(a, 20);
h += Rotate(b, 30);
g ^= Rotate(c, 40);
f += Rotate(d, 34);
PERMUTE3(c, h, g);
len -= 40;
}
if (len > 0) {
s = s + len - 40;
CHUNK(33);
e ^= Rotate(a, 43);
h += Rotate(b, 42);
g ^= Rotate(c, 41);
f += Rotate(d, 40);
}
result[0] ^= h;
result[1] ^= g;
g += h;
a = HashLen16(a, g + z);
x += y << 32;
b += x;
c = HashLen16(c, z) + h;
d = HashLen16(d, e + result[0]);
g += e;
h += HashLen16(x, f);
e = HashLen16(a, d) + g;
z = HashLen16(b, c) + a;
y = HashLen16(g, h) + c;
result[0] = e + z + y + x;
a = ShiftMix((a + y) * k0) * k0 + b;
result[1] += a + result[0];
a = ShiftMix(a * k0) * k0 + c;
result[2] = a + result[1];
a = ShiftMix((a + e) * k0) * k0;
result[3] = a + result[2];
}
// Requires len < 240.
static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) {
char buf[240];
memcpy(buf, s, len);
memset(buf + len, 0, 240 - len);
CityHashCrc256Long(buf, 240, ~static_cast<uint32>(len), result);
}
void CityHashCrc256(const char *s, size_t len, uint64 *result) {
if (LIKELY(len >= 240)) {
CityHashCrc256Long(s, len, 0, result);
} else {
CityHashCrc256Short(s, len, result);
}
}
uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
if (len <= 900) {
return CityHash128WithSeed(s, len, seed);
} else {
uint64 result[4];
CityHashCrc256(s, len, result);
uint64 u = Uint128High64(seed) + result[0];
uint64 v = Uint128Low64(seed) + result[1];
return uint128(HashLen16(u, v + result[2]),
HashLen16(Rotate(v, 32), u * k0 + result[3]));
}
}
uint128 CityHashCrc128(const char *s, size_t len) {
if (len <= 900) {
return CityHash128(s, len);
} else {
uint64 result[4];
CityHashCrc256(s, len, result);
return uint128(result[2], result[3]);
}
}
#endif

112
3rdParty/libcuckoo/include/cityhash/city.h vendored Executable file
View File

@ -0,0 +1,112 @@
// Copyright (c) 2011 Google, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// CityHash, by Geoff Pike and Jyrki Alakuijala
//
// http://code.google.com/p/cityhash/
//
// This file provides a few functions for hashing strings. All of them are
// high-quality functions in the sense that they pass standard tests such
// as Austin Appleby's SMHasher. They are also fast.
//
// For 64-bit x86 code, on short strings, we don't know of anything faster than
// CityHash64 that is of comparable quality. We believe our nearest competitor
// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash
// tables and most other hashing (excluding cryptography).
//
// For 64-bit x86 code, on long strings, the picture is more complicated.
// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
// CityHashCrc128 appears to be faster than all competitors of comparable
// quality. CityHash128 is also good but not quite as fast. We believe our
// nearest competitor is Bob Jenkins' Spooky. We don't have great data for
// other 64-bit CPUs, but for long strings we know that Spooky is slightly
// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
// Note that CityHashCrc128 is declared in citycrc.h.
//
// For 32-bit x86 code, we don't know of anything faster than CityHash32 that
// is of comparable quality. We believe our nearest competitor is Murmur3A.
// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
//
// Functions in the CityHash family are not suitable for cryptography.
//
// Please see CityHash's README file for more details on our performance
// measurements and so on.
//
// WARNING: This code has been only lightly tested on big-endian platforms!
// It is known to work well on little-endian platforms that have a small penalty
// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
// bug reports are welcome.
//
// By the way, for some hash functions, given strings a and b, the hash
// of a+b is easily derived from the hashes of a and b. This property
// doesn't hold for any hash functions in this file.
#ifndef CITY_HASH_H_
#define CITY_HASH_H_
#include <stdlib.h> // for size_t.
#include <stdint.h>
#include <utility>
typedef uint8_t uint8;
typedef uint32_t uint32;
typedef uint64_t uint64;
typedef std::pair<uint64, uint64> uint128;
inline uint64 Uint128Low64(const uint128& x) { return x.first; }
inline uint64 Uint128High64(const uint128& x) { return x.second; }
// Hash function for a byte array.
uint64 CityHash64(const char *buf, size_t len);
// Hash function for a byte array. For convenience, a 64-bit seed is also
// hashed into the result.
uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
// Hash function for a byte array. For convenience, two seeds are also
// hashed into the result.
uint64 CityHash64WithSeeds(const char *buf, size_t len,
uint64 seed0, uint64 seed1);
// Hash function for a byte array.
uint128 CityHash128(const char *s, size_t len);
// Hash function for a byte array. For convenience, a 128-bit seed is also
// hashed into the result.
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
// Hash function for a byte array. Most useful in 32-bit binaries.
uint32 CityHash32(const char *buf, size_t len);
// Hash 128 input bits down to 64 bits of output.
// This is intended to be a reasonably good hash function.
inline uint64 Hash128to64(const uint128& x) {
// Murmur-inspired hashing.
const uint64 kMul = 0x9ddfea08eb382d69ULL;
uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
a ^= (a >> 47);
uint64 b = (Uint128High64(x) ^ a) * kMul;
b ^= (b >> 47);
b *= kMul;
return b;
}
#endif // CITY_HASH_H_

43
3rdParty/libcuckoo/include/cityhash/citycrc.h vendored Executable file
View File

@ -0,0 +1,43 @@
// Copyright (c) 2011 Google, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// CityHash, by Geoff Pike and Jyrki Alakuijala
//
// This file declares the subset of the CityHash functions that require
// _mm_crc32_u64(). See the CityHash README for details.
//
// Functions in the CityHash family are not suitable for cryptography.
#ifndef CITY_HASH_CRC_H_
#define CITY_HASH_CRC_H_
#include <city.h>
// Hash function for a byte array.
uint128 CityHashCrc128(const char *s, size_t len);
// Hash function for a byte array. For convenience, a 128-bit seed is also
// hashed into the result.
uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed);
// Hash function for a byte array. Sets result[0] ... result[3].
void CityHashCrc256(const char *s, size_t len, uint64 *result);
#endif // CITY_HASH_CRC_H_

View File

@ -0,0 +1,40 @@
#ifndef _CITY_HASHER_HH
#define _CITY_HASHER_HH
#include <cityhash/city.h>
#include <string>
/*! CityHasher is a std::hash-style wrapper around CityHash. We
* encourage using CityHasher instead of the default std::hash if
* possible. */
template <class Key>
class CityHasher {
public:
size_t operator()(const Key& k) const {
if (sizeof(size_t) < 8) {
return CityHash32((const char*) &k, sizeof(k));
}
/* Although the following line should be optimized away on 32-bit
* builds, the cast is still necessary to stop MSVC emitting a
* truncation warning. */
return static_cast<size_t>(CityHash64((const char*) &k, sizeof(k)));
}
};
/*! This is a template specialization of CityHasher for
* std::string. */
template <>
class CityHasher<std::string> {
public:
size_t operator()(const std::string& k) const {
if (sizeof(size_t) < 8) {
return CityHash32(k.c_str(), k.size());
}
/* Although the following line should be optimized away on 32-bit
* builds, the cast is still necessary to stop MSVC emitting a
* truncation warning. */
return static_cast<size_t>(CityHash64(k.c_str(), k.size()));
}
};
#endif // _CITY_HASHER_HH

View File

@ -0,0 +1,34 @@
/** \file */
#ifndef _CUCKOOHASH_CONFIG_HH
#define _CUCKOOHASH_CONFIG_HH
#include <cstddef>
//! The default maximum number of keys per bucket
const size_t DEFAULT_SLOT_PER_BUCKET = 4;
//! The default number of elements in an empty hash table
const size_t DEFAULT_SIZE = (1U << 16) * DEFAULT_SLOT_PER_BUCKET;
//! On a scale of 0 to 16, the memory granularity of the locks array. 0 is the
//! least granular, meaning the array is a contiguous array and thus offers the
//! best performance but the greatest memory overhead. 16 is the most granular,
//! offering the least memory overhead but worse performance.
const size_t LOCK_ARRAY_GRANULARITY = 0;
//! The default minimum load factor that the table allows for automatic
//! expansion. It must be a number between 0.0 and 1.0. The table will throw
//! libcuckoo_load_factor_too_low if the load factor falls below this value
//! during an automatic expansion.
const double DEFAULT_MINIMUM_LOAD_FACTOR = 0.05;
//! An alias for the value that sets no limit on the maximum hashpower. If this
//! value is set as the maximum hashpower limit, there will be no limit. Since 0
//! is the only hashpower that can never occur, it should stay at 0.
const size_t NO_MAXIMUM_HASHPOWER = 0;
//! set LIBCUCKOO_DEBUG to 1 to enable debug output
#define LIBCUCKOO_DEBUG 0
#endif // _CUCKOOHASH_CONFIG_HH

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,185 @@
/** \file */
#ifndef _CUCKOOHASH_UTIL_HH
#define _CUCKOOHASH_UTIL_HH
#include <exception>
#include <thread>
#include <vector>
#include "cuckoohash_config.hh" // for LIBCUCKOO_DEBUG
#if LIBCUCKOO_DEBUG
# define LIBCUCKOO_DBG(fmt, ...) \
fprintf(stderr, "\x1b[32m""[libcuckoo:%s:%d:%lu] " fmt"" "\x1b[0m", \
__FILE__,__LINE__, (unsigned long)std::this_thread::get_id(), \
__VA_ARGS__)
#else
# define LIBCUCKOO_DBG(fmt, ...) do {} while (0)
#endif
/**
* alignas() requires GCC >= 4.9, so we stick with the alignment attribute for
* GCC.
*/
#ifdef __GNUC__
#define LIBCUCKOO_ALIGNAS(x) __attribute__((aligned(x)))
#else
#define LIBCUCKOO_ALIGNAS(x) alignas(x)
#endif
/**
* At higher warning levels, MSVC produces an annoying warning that alignment
* may cause wasted space: "structure was padded due to __declspec(align())".
*/
#ifdef _MSC_VER
#define LIBCUCKOO_SQUELCH_PADDING_WARNING __pragma(warning(suppress : 4324))
#else
#define LIBCUCKOO_SQUELCH_PADDING_WARNING
#endif
/**
* thread_local requires GCC >= 4.8 and is not supported in some clang versions,
* so we use __thread if thread_local is not supported
*/
#define LIBCUCKOO_THREAD_LOCAL thread_local
#if defined(__clang__)
# if !__has_feature(cxx_thread_local)
# undef LIBCUCKOO_THREAD_LOCAL
# define LIBCUCKOO_THREAD_LOCAL __thread
# endif
#elif defined(__GNUC__)
# if __GNUC__ == 4 && __GNUC_MINOR__ < 8
# undef LIBCUCKOO_THREAD_LOCAL
# define LIBCUCKOO_THREAD_LOCAL __thread
# endif
#endif
// For enabling certain methods based on a condition. Here's an example.
// ENABLE_IF(some_cond, type, static, inline) method() {
// ...
// }
#define ENABLE_IF(preamble, condition, return_type) \
template <class Bogus=void*> \
preamble typename std::enable_if<sizeof(Bogus) && \
condition, return_type>::type
/**
* Thrown when an automatic expansion is triggered, but the load factor of the
* table is below a minimum threshold, which can be set by the \ref
* cuckoohash_map::minimum_load_factor method. This can happen if the hash
* function does not properly distribute keys, or for certain adversarial
* workloads.
*/
class libcuckoo_load_factor_too_low : public std::exception {
public:
/**
* Constructor
*
* @param lf the load factor of the table when the exception was thrown
*/
libcuckoo_load_factor_too_low(const double lf)
: load_factor_(lf) {}
virtual const char* what() const noexcept override {
return "Automatic expansion triggered when load factor was below "
"minimum threshold";
}
/**
* @return the load factor of the table when the exception was thrown
*/
double load_factor() {
return load_factor_;
}
private:
const double load_factor_;
};
/**
* Thrown when an expansion is triggered, but the hashpower specified is greater
* than the maximum, which can be set with the \ref
* cuckoohash_map::maximum_hashpower method.
*/
class libcuckoo_maximum_hashpower_exceeded : public std::exception {
public:
/**
* Constructor
*
* @param hp the hash power we were trying to expand to
*/
libcuckoo_maximum_hashpower_exceeded(const size_t hp)
: hashpower_(hp) {}
virtual const char* what() const noexcept override {
return "Expansion beyond maximum hashpower";
}
/**
* @return the hashpower we were trying to expand to
*/
size_t hashpower() {
return hashpower_;
}
private:
const size_t hashpower_;
};
// Allocates an array of the given size and value-initializes each element with
// the 0-argument constructor
template <class T, class Alloc>
T* create_array(const size_t size) {
Alloc allocator;
T* arr = allocator.allocate(size);
// Initialize all the elements, safely deallocating and destroying
// everything in case of error.
size_t i;
try {
for (i = 0; i < size; ++i) {
allocator.construct(&arr[i]);
}
} catch (...) {
for (size_t j = 0; j < i; ++j) {
allocator.destroy(&arr[j]);
}
allocator.deallocate(arr, size);
throw;
}
return arr;
}
// Destroys every element of an array of the given size and then deallocates the
// memory.
template <class T, class Alloc>
void destroy_array(T* arr, const size_t size) {
Alloc allocator;
for (size_t i = 0; i < size; ++i) {
allocator.destroy(&arr[i]);
}
allocator.deallocate(arr, size);
}
// executes the function over the given range split over num_threads threads
template <class F>
static void parallel_exec(size_t start, size_t end,
size_t num_threads, F func) {
size_t work_per_thread = (end - start) / num_threads;
std::vector<std::thread> threads(num_threads);
std::vector<std::exception_ptr> eptrs(num_threads, nullptr);
for (size_t i = 0; i < num_threads - 1; ++i) {
threads[i] = std::thread(func, start, start + work_per_thread,
std::ref(eptrs[i]));
start += work_per_thread;
}
threads[num_threads - 1] = std::thread(
func, start, end, std::ref(eptrs[num_threads - 1]));
for (std::thread& t : threads) {
t.join();
}
for (std::exception_ptr& eptr : eptrs) {
if (eptr) {
std::rethrow_exception(eptr);
}
}
}
#endif // _CUCKOOHASH_UTIL_HH

View File

@ -0,0 +1,29 @@
#ifndef _DEFAULT_HASHER_HH
#define _DEFAULT_HASHER_HH
#include <string>
#include <type_traits>
/*! DefaultHasher is the default hash class used in the table. It overloads a
* few types that std::hash does badly on (namely integers), and falls back to
* std::hash for anything else. */
template <class Key>
class DefaultHasher {
std::hash<Key> fallback;
public:
template <class T = Key>
typename std::enable_if<std::is_integral<T>::value, size_t>::type
operator()(const Key& k) const {
// This constant is found in the CityHash code
return k * 0x9ddfea08eb382d69ULL;
}
template <class T = Key>
typename std::enable_if<!std::is_integral<T>::value, size_t>::type
operator()(const Key& k) const {
return fallback(k);
}
};
#endif // _DEFAULT_HASHER_HH

View File

@ -0,0 +1,119 @@
/** \file */
#ifndef _LAZY_ARRAY_HH
#define _LAZY_ARRAY_HH
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <memory>
#include "cuckoohash_util.hh"
// lazy array. A fixed-size array, broken up into segments that are dynamically
// allocated, only when requested. The array size and segment size are
// pre-defined, and are powers of two. The user must make sure the necessary
// segments are allocated before accessing the array.
template <uint8_t OFFSET_BITS, uint8_t SEGMENT_BITS,
class T, class Alloc = std::allocator<T>
>
class lazy_array {
static_assert(SEGMENT_BITS + OFFSET_BITS <= sizeof(size_t)*8,
"The number of segment and offset bits cannot exceed "
" the number of bits in a size_t");
private:
static const size_t SEGMENT_SIZE = 1UL << OFFSET_BITS;
static const size_t NUM_SEGMENTS = 1UL << SEGMENT_BITS;
// The segments array itself is mutable, so that the const subscript
// operator can still add segments
mutable std::array<T*, NUM_SEGMENTS> segments_;
void move_other_array(lazy_array&& arr) {
clear();
std::copy(arr.segments_.begin(), arr.segments_.end(),
segments_.begin());
std::fill(arr.segments_.begin(), arr.segments_.end(), nullptr);
}
inline size_t get_segment(size_t i) {
return i >> OFFSET_BITS;
}
static const size_t OFFSET_MASK = ((1UL << OFFSET_BITS) - 1);
inline size_t get_offset(size_t i) {
return i & OFFSET_MASK;
}
public:
lazy_array(): segments_{{nullptr}} {}
// No copying
lazy_array(const lazy_array&) = delete;
lazy_array& operator=(const lazy_array&) = delete;
// Moving is allowed
lazy_array(lazy_array&& arr) : segments_{{nullptr}} {
move_other_array(std::move(arr));
}
lazy_array& operator=(lazy_array&& arr) {
move_other_vector(std::move(arr));
return *this;
}
~lazy_array() {
clear();
}
void clear() {
for (size_t i = 0; i < segments_.size(); ++i) {
if (segments_[i] != nullptr) {
destroy_array<T, Alloc>(segments_[i], SEGMENT_SIZE);
segments_[i] = nullptr;
}
}
}
T& operator[](size_t i) {
assert(segments_[get_segment(i)] != nullptr);
return segments_[get_segment(i)][get_offset(i)];
}
const T& operator[](size_t i) const {
assert(segments_[get_segment(i)] != nullptr);
return segments_[get_segment(i)][get_offset(i)];
}
// Ensures that the array has enough segments to index target elements, not
// exceeding the total size. The user must ensure that the array is properly
// allocated before accessing a certain index. This saves having to check
// every index operation.
void allocate(size_t target) {
assert(target <= size());
if (target == 0) {
return;
}
const size_t last_segment = get_segment(target - 1);
for (size_t i = 0; i <= last_segment; ++i) {
if (segments_[i] == nullptr) {
segments_[i] = create_array<T, Alloc>(SEGMENT_SIZE);
}
}
}
// Returns the number of elements in the array that can be indexed, starting
// contiguously from the beginning.
size_t allocated_size() const {
size_t num_allocated_segments = 0;
for (;
(num_allocated_segments < NUM_SEGMENTS &&
segments_[num_allocated_segments] != nullptr);
++num_allocated_segments) {}
return num_allocated_segments * SEGMENT_SIZE;
}
static constexpr size_t size() {
return 1UL << (OFFSET_BITS + SEGMENT_BITS);
}
};
#endif // _LAZY_ARRAY_HH

View File

@ -376,6 +376,7 @@ include_directories(${PROJECT_SOURCE_DIR}/3rdParty/linenoise-ng/include)
include_directories(${PROJECT_SOURCE_DIR}/3rdParty/linenoise-ng/src) include_directories(${PROJECT_SOURCE_DIR}/3rdParty/linenoise-ng/src)
include_directories(${PROJECT_SOURCE_DIR}/3rdParty/velocypack/include) include_directories(${PROJECT_SOURCE_DIR}/3rdParty/velocypack/include)
include_directories(${PROJECT_SOURCE_DIR}/3rdParty/rocksdb/rocksdb/include) include_directories(${PROJECT_SOURCE_DIR}/3rdParty/rocksdb/rocksdb/include)
include_directories(${PROJECT_SOURCE_DIR}/3rdParty/libcuckoo/include)
include_directories(${PROJECT_BINARY_DIR}) include_directories(${PROJECT_BINARY_DIR})
include_directories(${PROJECT_BINARY_DIR}/lib) include_directories(${PROJECT_BINARY_DIR}/lib)

View File

@ -360,7 +360,7 @@ SET(ARANGOD_SOURCES
Wal/Slot.cpp Wal/Slot.cpp
Wal/Slots.cpp Wal/Slots.cpp
Wal/SynchronizerThread.cpp Wal/SynchronizerThread.cpp
Pregel/AggregatorUsage.cpp Pregel/AggregatorHandler.cpp
Pregel/AlgoRegistry.cpp Pregel/AlgoRegistry.cpp
Pregel/Algos/SSSP.cpp Pregel/Algos/SSSP.cpp
Pregel/Algos/PageRank.cpp Pregel/Algos/PageRank.cpp
@ -396,6 +396,7 @@ target_link_libraries(${BIN_ARANGOD}
${SYSTEM_LIBRARIES} ${SYSTEM_LIBRARIES}
boost_boost boost_boost
boost_system boost_system
libcuckoo
) )
install( install(

View File

@ -50,12 +50,13 @@ class Aggregator {
// virtual void setValue(VPackSlice slice) = 0; // virtual void setValue(VPackSlice slice) = 0;
virtual VPackValue vpackValue() = 0; virtual VPackValue vpackValue() = 0;
virtual void reset() {}; virtual void reset(){};
virtual bool isPermanent() {return _permanent;} bool isPermanent() { return _permanent; }
}; };
class FloatMaxAggregator : public Aggregator { class FloatMaxAggregator : public Aggregator {
float _value, _initial; float _value, _initial;
public: public:
FloatMaxAggregator(float init) : _value(init), _initial(init) {} FloatMaxAggregator(float init) : _value(init), _initial(init) {}
@ -77,50 +78,43 @@ class FloatMaxAggregator : public Aggregator {
void reset() override { _value = _initial; } void reset() override { _value = _initial; }
}; };
template<typename T> template <typename T>
class ValueAggregator : public Aggregator { class ValueAggregator : public Aggregator {
static_assert(std::is_arithmetic<T>::value, "Type must be numeric"); static_assert(std::is_arithmetic<T>::value, "Type must be numeric");
T _value; T _value;
public:
public:
ValueAggregator(T val) : Aggregator(true), _value(val) {} ValueAggregator(T val) : Aggregator(true), _value(val) {}
void aggregate(void const* valuePtr) override { void aggregate(void const* valuePtr) override { _value = *((T*)valuePtr); };
_value = *((T*)valuePtr); void aggregate(VPackSlice slice) override { _value = slice.getNumber<T>(); }
};
void aggregate(VPackSlice slice) override {
_value = slice.getNumber<T>();
}
void const* getValue() const override { return &_value; }; void const* getValue() const override { return &_value; };
/*void setValue(VPackSlice slice) override { /*void setValue(VPackSlice slice) override {
_value = (float)slice.getDouble(); _value = (float)slice.getDouble();
}*/ }*/
VPackValue vpackValue() override { return VPackValue(_value); }; VPackValue vpackValue() override { return VPackValue(_value); };
}; };
template<typename T> template <typename T>
class SumAggregator : public Aggregator { class SumAggregator : public Aggregator {
static_assert(std::is_arithmetic<T>::value, "Type must be numeric"); static_assert(std::is_arithmetic<T>::value, "Type must be numeric");
T _value; T _value;
public:
public:
SumAggregator(T val) : Aggregator(true), _value(val) {} SumAggregator(T val) : Aggregator(true), _value(val) {}
void aggregate(void const* valuePtr) override { void aggregate(void const* valuePtr) override { _value += *((T*)valuePtr); };
_value += *((T*)valuePtr); void aggregate(VPackSlice slice) override { _value += slice.getNumber<T>(); }
};
void aggregate(VPackSlice slice) override {
_value += slice.getNumber<T>();
}
void const* getValue() const override { return &_value; }; void const* getValue() const override { return &_value; };
/*void setValue(VPackSlice slice) override { /*void setValue(VPackSlice slice) override {
_value = (float)slice.getDouble(); _value = (float)slice.getDouble();
}*/ }*/
VPackValue vpackValue() override { return VPackValue(_value); }; VPackValue vpackValue() override { return VPackValue(_value); };
}; };
} }
} }
#endif #endif

View File

@ -20,21 +20,22 @@
/// @author Simon Grätzer /// @author Simon Grätzer
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
#include "Pregel/AggregatorUsage.h" #include "Pregel/AggregatorHandler.h"
#include "Pregel/Aggregator.h" #include "Pregel/Aggregator.h"
#include "Pregel/Algorithm.h" #include "Pregel/Algorithm.h"
using namespace arangodb; using namespace arangodb;
using namespace arangodb::pregel; using namespace arangodb::pregel;
AggregatorUsage::~AggregatorUsage() { AggregatorHandler::~AggregatorHandler() {
for (auto const& it : _values) { for (auto const& it : _values) {
delete it.second; delete it.second;
} }
_values.clear(); _values.clear();
} }
void AggregatorUsage::aggregate(std::string const& name, const void* valuePtr) { void AggregatorHandler::aggregate(std::string const& name,
const void* valuePtr) {
auto it = _values.find(name); auto it = _values.find(name);
if (it != _values.end()) { if (it != _values.end()) {
it->second->aggregate(valuePtr); it->second->aggregate(valuePtr);
@ -48,7 +49,8 @@ void AggregatorUsage::aggregate(std::string const& name, const void* valuePtr) {
} }
} }
const void* AggregatorUsage::getAggregatedValue(std::string const& name) const { const void* AggregatorHandler::getAggregatedValue(
std::string const& name) const {
auto const& it = _values.find(name); auto const& it = _values.find(name);
if (it != _values.end()) { if (it != _values.end()) {
return it->second->getValue(); return it->second->getValue();
@ -56,7 +58,7 @@ const void* AggregatorUsage::getAggregatedValue(std::string const& name) const {
return nullptr; return nullptr;
} }
void AggregatorUsage::resetValues() { void AggregatorHandler::resetValues() {
for (auto& it : _values) { for (auto& it : _values) {
if (!it.second->isPermanent()) { if (!it.second->isPermanent()) {
it.second->reset(); it.second->reset();
@ -64,7 +66,7 @@ void AggregatorUsage::resetValues() {
} }
} }
void AggregatorUsage::aggregateValues(AggregatorUsage const& workerValues) { void AggregatorHandler::aggregateValues(AggregatorHandler const& workerValues) {
for (auto const& pair : workerValues._values) { for (auto const& pair : workerValues._values) {
std::string const& name = pair.first; std::string const& name = pair.first;
auto my = _values.find(name); auto my = _values.find(name);
@ -81,7 +83,7 @@ void AggregatorUsage::aggregateValues(AggregatorUsage const& workerValues) {
} }
} }
void AggregatorUsage::aggregateValues(VPackSlice workerValues) { void AggregatorHandler::aggregateValues(VPackSlice workerValues) {
for (auto const& keyValue : VPackObjectIterator(workerValues)) { for (auto const& keyValue : VPackObjectIterator(workerValues)) {
std::string name = keyValue.key.copyString(); std::string name = keyValue.key.copyString();
auto const& it = _values.find(name); auto const& it = _values.find(name);
@ -98,10 +100,10 @@ void AggregatorUsage::aggregateValues(VPackSlice workerValues) {
} }
} }
void AggregatorUsage::serializeValues(VPackBuilder& b) const { void AggregatorHandler::serializeValues(VPackBuilder& b) const {
for (auto const& pair : _values) { for (auto const& pair : _values) {
b.add(pair.first, pair.second->vpackValue()); b.add(pair.first, pair.second->vpackValue());
} }
} }
size_t AggregatorUsage::size() { return _values.size(); } size_t AggregatorHandler::size() { return _values.size(); }

View File

@ -20,31 +20,32 @@
/// @author Simon Grätzer /// @author Simon Grätzer
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
#ifndef ARANGODB_PREGEL_AGGRGS_USAGE_H #ifndef ARANGODB_PREGEL_AGGREGATOR_HANDLER_H
#define ARANGODB_PREGEL_AGGRGS_USAGE_H 1 #define ARANGODB_PREGEL_AGGREGATOR_HANDLER_H 1
#include <velocypack/vpack.h> #include <velocypack/Builder.h>
#include <velocypack/Slice.h>
#include <velocypack/velocypack-aliases.h> #include <velocypack/velocypack-aliases.h>
#include <functional> #include <functional>
#include <map> #include <map>
namespace arangodb { namespace arangodb {
namespace pregel { namespace pregel {
struct IAlgorithm; struct IAlgorithm;
class Aggregator; class Aggregator;
class AggregatorUsage { class AggregatorHandler {
const IAlgorithm* _create; const IAlgorithm* _create;
std::map<std::string, Aggregator*> _values; std::map<std::string, Aggregator*> _values;
public: public:
AggregatorUsage(const IAlgorithm* c) : _create(c) {} AggregatorHandler(const IAlgorithm* c) : _create(c) {}
~AggregatorUsage(); ~AggregatorHandler();
void aggregate(std::string const& name, const void* valuePtr); void aggregate(std::string const& name, const void* valuePtr);
const void* getAggregatedValue(std::string const& name) const; const void* getAggregatedValue(std::string const& name) const;
void resetValues(); void resetValues();
void aggregateValues(AggregatorUsage const& workerValues); void aggregateValues(AggregatorHandler const& workerValues);
void aggregateValues(VPackSlice workerValues); void aggregateValues(VPackSlice workerValues);
void serializeValues(VPackBuilder& b) const; void serializeValues(VPackBuilder& b) const;
size_t size(); size_t size();

View File

@ -31,11 +31,14 @@ struct TRI_vocbase_t;
namespace arangodb { namespace arangodb {
namespace pregel { namespace pregel {
struct AlgoRegistry { struct AlgoRegistry {
static IAlgorithm* createAlgorithm(std::string const& algorithm, VPackSlice userParams); static IAlgorithm* createAlgorithm(std::string const& algorithm,
VPackSlice userParams);
static IWorker* createWorker(TRI_vocbase_t* vocbase, VPackSlice body); static IWorker* createWorker(TRI_vocbase_t* vocbase, VPackSlice body);
private:
private:
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
static IWorker* createWorker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo, VPackSlice body); static IWorker* createWorker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo,
VPackSlice body);
}; };
} }
} }

View File

@ -30,9 +30,9 @@
#include "Basics/Common.h" #include "Basics/Common.h"
#include "GraphFormat.h" #include "GraphFormat.h"
#include "MasterContext.h"
#include "MessageCombiner.h" #include "MessageCombiner.h"
#include "MessageFormat.h" #include "MessageFormat.h"
#include "MasterContext.h"
#include "WorkerContext.h" #include "WorkerContext.h"
namespace arangodb { namespace arangodb {
@ -40,7 +40,7 @@ namespace pregel {
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
class VertexComputation; class VertexComputation;
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
class VertexCompensation; class VertexCompensation;
@ -48,31 +48,30 @@ class Aggregator;
struct IAlgorithm { struct IAlgorithm {
virtual ~IAlgorithm() {} virtual ~IAlgorithm() {}
// virtual bool isFixpointAlgorithm() const {return false;} // virtual bool isFixpointAlgorithm() const {return false;}
// virtual bool preserveTransactions() const { return false; }
virtual bool supportsCompensation() const { virtual bool supportsAsyncMode() const { return false; }
return false;
} virtual bool supportsCompensation() const { return false; }
virtual Aggregator* aggregator(std::string const& name) const { virtual Aggregator* aggregator(std::string const& name) const {
return nullptr; return nullptr;
} }
virtual MasterContext* masterContext(VPackSlice userParams) const { virtual MasterContext* masterContext(VPackSlice userParams) const {
return nullptr; return nullptr;
} }
std::string const& name() const { return _name; } std::string const& name() const { return _name; }
protected: protected:
IAlgorithm(std::string const& name) : _name(name){}; IAlgorithm(std::string const& name) : _name(name){};
private: private:
std::string _name; std::string _name;
}; };
// specify serialization, whatever // specify serialization, whatever
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
struct Algorithm : IAlgorithm { struct Algorithm : IAlgorithm {
@ -87,7 +86,8 @@ struct Algorithm : IAlgorithm {
virtual VertexCompensation<V, E, M>* createCompensation(uint64_t gss) const { virtual VertexCompensation<V, E, M>* createCompensation(uint64_t gss) const {
return nullptr; return nullptr;
} }
protected:
protected:
Algorithm(std::string const& name) : IAlgorithm(name){}; Algorithm(std::string const& name) : IAlgorithm(name){};
}; };

View File

@ -36,6 +36,10 @@ struct PageRankAlgorithm : public SimpleAlgorithm<float, float, float> {
public: public:
PageRankAlgorithm(arangodb::velocypack::Slice params); PageRankAlgorithm(arangodb::velocypack::Slice params);
bool supportsAsyncMode() const override { return true; }
bool supportsCompensation() const override { return true; }
MasterContext* masterContext(VPackSlice userParams) const override;
GraphFormat<float, float>* inputFormat() const override; GraphFormat<float, float>* inputFormat() const override;
MessageFormat<float>* messageFormat() const override; MessageFormat<float>* messageFormat() const override;
@ -44,8 +48,6 @@ struct PageRankAlgorithm : public SimpleAlgorithm<float, float, float> {
const override; const override;
VertexCompensation<float, float, float>* createCompensation(uint64_t gss) const override; VertexCompensation<float, float, float>* createCompensation(uint64_t gss) const override;
Aggregator* aggregator(std::string const& name) const override; Aggregator* aggregator(std::string const& name) const override;
MasterContext* masterContext(VPackSlice userParams) const override;
}; };
} }
} }

View File

@ -67,7 +67,7 @@ void Conductor::start(std::string const& algoName, VPackSlice userConfig) {
} else { } else {
_userParams.add(userConfig); _userParams.add(userConfig);
} }
_startTimeSecs = TRI_microtime(); _startTimeSecs = TRI_microtime();
_globalSuperstep = 0; _globalSuperstep = 0;
_state = ExecutionState::RUNNING; _state = ExecutionState::RUNNING;
@ -76,7 +76,11 @@ void Conductor::start(std::string const& algoName, VPackSlice userConfig) {
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER,
"Algorithm not found"); "Algorithm not found");
} }
_aggregatorUsage.reset(new AggregatorUsage(_algorithm.get())); _aggregators.reset(new AggregatorHandler(_algorithm.get()));
// configure the async mode as optional
VPackSlice async = _userParams.slice().get("async");
_asyncMode = _algorithm->supportsAsyncMode();
_asyncMode = _asyncMode && (async.isNone() || async.getBoolean());
int res = _initializeWorkers(Utils::startExecutionPath, VPackSlice()); int res = _initializeWorkers(Utils::startExecutionPath, VPackSlice());
if (res != TRI_ERROR_NO_ERROR) { if (res != TRI_ERROR_NO_ERROR) {
@ -92,15 +96,15 @@ bool Conductor::_startGlobalStep() {
b.openObject(); b.openObject();
b.add(Utils::executionNumberKey, VPackValue(_executionNumber)); b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
b.add(Utils::globalSuperstepKey, VPackValue(_globalSuperstep)); b.add(Utils::globalSuperstepKey, VPackValue(_globalSuperstep));
if (_aggregatorUsage->size() > 0) { if (_aggregators->size() > 0) {
b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object)); b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object));
_aggregatorUsage->serializeValues(b); _aggregators->serializeValues(b);
b.close(); b.close();
} }
b.close(); b.close();
// reset values which are calculated during the superstep // reset values which are calculated during the superstep
_aggregatorUsage->resetValues(); _aggregators->resetValues();
_workerStats.activeCount = 0; _workerStats.activeCount = 0;
// first allow all workers to run worker level operations // first allow all workers to run worker level operations
@ -128,11 +132,11 @@ void Conductor::finishedWorkerStartup(VPackSlice& data) {
LOG(WARN) << "We are not in a state where we expect a response"; LOG(WARN) << "We are not in a state where we expect a response";
return; return;
} }
_ensureCorrectness(data); _ensureUniqueResponse(data);
if (_respondedServers.size() != _dbServers.size()) { if (_respondedServers.size() != _dbServers.size()) {
return; return;
} }
if (_startGlobalStep()) { if (_startGlobalStep()) {
// listens for changing primary DBServers on each collection shard // listens for changing primary DBServers on each collection shard
RecoveryManager* mngr = PregelFeature::instance()->recoveryManager(); RecoveryManager* mngr = PregelFeature::instance()->recoveryManager();
@ -142,31 +146,38 @@ void Conductor::finishedWorkerStartup(VPackSlice& data) {
} }
} }
void Conductor::finishedGlobalStep(VPackSlice& data) { void Conductor::finishedWorkerStep(VPackSlice& data) {
MUTEX_LOCKER(guard, _callbackMutex); MUTEX_LOCKER(guard, _callbackMutex);
// this method can be called multiple times in a superstep depending on
// whether we are in the async mode
uint64_t gss = data.get(Utils::globalSuperstepKey).getUInt(); uint64_t gss = data.get(Utils::globalSuperstepKey).getUInt();
if (gss != _globalSuperstep) { if (gss != _globalSuperstep ||
!(_state == ExecutionState::RUNNING ||
_state == ExecutionState::CANCELED)) {
LOG(WARN) << "Conductor did received a callback from the wrong superstep"; LOG(WARN) << "Conductor did received a callback from the wrong superstep";
return; return;
} }
_ensureCorrectness(data); VPackSlice slice = data.get(Utils::gssDone);
bool gssDone = slice.isBool() && slice.getBool();
// collect worker information if (!_asyncMode || gssDone) {
VPackSlice workerValues = data.get(Utils::aggregatorValuesKey); _ensureUniqueResponse(data);
if (workerValues.isObject()) {
_aggregatorUsage->aggregateValues(workerValues); // collect worker information
slice = data.get(Utils::aggregatorValuesKey);
if (slice.isObject()) {
_aggregators->aggregateValues(slice);
}
_workerStats.accumulate(data);
} }
_workerStats.accumulate(data);
if (_respondedServers.size() != _dbServers.size()) { if (_respondedServers.size() != _dbServers.size()) {
return; return;
} }
bool proceed = true; bool proceed = true;
if (_masterContext) { // ask algorithm to evaluate aggregated values if (_masterContext) { // ask algorithm to evaluate aggregated values
proceed = _masterContext->postGlobalSuperstep(_globalSuperstep); proceed = _masterContext->postGlobalSuperstep(_globalSuperstep);
} }
LOG(INFO) << "Finished gss " << _globalSuperstep; LOG(INFO) << "Finished gss " << _globalSuperstep;
_globalSuperstep++; _globalSuperstep++;
@ -178,7 +189,7 @@ void Conductor::finishedGlobalStep(VPackSlice& data) {
proceed = proceed && _globalSuperstep <= 100; proceed = proceed && _globalSuperstep <= 100;
if (proceed && !workersDone && _state == ExecutionState::RUNNING) { if (proceed && !workersDone && _state == ExecutionState::RUNNING) {
_startGlobalStep();// trigger next superstep _startGlobalStep(); // trigger next superstep
} else if (_state == ExecutionState::RUNNING || } else if (_state == ExecutionState::RUNNING ||
_state == ExecutionState::CANCELED) { _state == ExecutionState::CANCELED) {
if (_state == ExecutionState::CANCELED) { if (_state == ExecutionState::CANCELED) {
@ -195,7 +206,7 @@ void Conductor::finishedGlobalStep(VPackSlice& data) {
// tells workers to store / discard results // tells workers to store / discard results
_finalizeWorkers(); _finalizeWorkers();
} else {// this prop shouldn't occur, } else { // this prop shouldn't occur,
LOG(WARN) << "No further action taken after receiving all responses"; LOG(WARN) << "No further action taken after receiving all responses";
} }
} }
@ -206,12 +217,11 @@ void Conductor::finishedRecovery(VPackSlice& data) {
LOG(WARN) << "We are not in a state where we expect a recovery response"; LOG(WARN) << "We are not in a state where we expect a recovery response";
return; return;
} }
_ensureCorrectness(data); _ensureUniqueResponse(data);
if (_respondedServers.size() != _dbServers.size()) { if (_respondedServers.size() != _dbServers.size()) {
return; return;
} }
if (_algorithm->supportsCompensation()) { if (_algorithm->supportsCompensation()) {
bool proceed = false; bool proceed = false;
if (_masterContext) { if (_masterContext) {
@ -222,15 +232,15 @@ void Conductor::finishedRecovery(VPackSlice& data) {
b.openObject(); b.openObject();
b.add(Utils::executionNumberKey, VPackValue(_executionNumber)); b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
b.add(Utils::globalSuperstepKey, VPackValue(_globalSuperstep)); b.add(Utils::globalSuperstepKey, VPackValue(_globalSuperstep));
if (_aggregatorUsage->size() > 0) { if (_aggregators->size() > 0) {
b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object)); b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object));
_aggregatorUsage->serializeValues(b); _aggregators->serializeValues(b);
b.close(); b.close();
} }
b.close(); b.close();
// reset values which are calculated during the superstep // reset values which are calculated during the superstep
_aggregatorUsage->resetValues(); _aggregators->resetValues();
_workerStats.activeCount = 0; _workerStats.activeCount = 0;
// first allow all workers to run worker level operations // first allow all workers to run worker level operations
@ -248,8 +258,10 @@ void Conductor::finishedRecovery(VPackSlice& data) {
} }
void Conductor::cancel() { void Conductor::cancel() {
if (_state == ExecutionState::RUNNING ||
if (_state == ExecutionState::RUNNING || _state == ExecutionState::RECOVERING) { _state == ExecutionState::RECOVERING) {
_state = ExecutionState::CANCELED;
VPackBuilder b; VPackBuilder b;
b.openObject(); b.openObject();
b.add(Utils::executionNumberKey, VPackValue(_executionNumber)); b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
@ -257,7 +269,6 @@ void Conductor::cancel() {
b.close(); b.close();
_sendToAllDBServers(Utils::cancelGSSPath, b.slice()); _sendToAllDBServers(Utils::cancelGSSPath, b.slice());
} }
_state = ExecutionState::CANCELED; _state = ExecutionState::CANCELED;
// stop monitoring shards // stop monitoring shards
RecoveryManager* mngr = PregelFeature::instance()->recoveryManager(); RecoveryManager* mngr = PregelFeature::instance()->recoveryManager();
@ -296,7 +307,7 @@ void Conductor::startRecovery() {
cancel(); cancel();
return; return;
} }
VPackBuilder b; VPackBuilder b;
b.openObject(); b.openObject();
b.add(Utils::executionNumberKey, VPackValue(_executionNumber)); b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
@ -304,8 +315,9 @@ void Conductor::startRecovery() {
b.close(); b.close();
_dbServers = goodServers; _dbServers = goodServers;
_sendToAllDBServers(Utils::cancelGSSPath, b.slice()); _sendToAllDBServers(Utils::cancelGSSPath, b.slice());
usleep(5 * 1000000);// workers may need a little bit
// Let's try recovery
if (_algorithm->supportsCompensation()) { if (_algorithm->supportsCompensation()) {
if (_masterContext) { if (_masterContext) {
_masterContext->preCompensation(_globalSuperstep); _masterContext->preCompensation(_globalSuperstep);
@ -314,13 +326,13 @@ void Conductor::startRecovery() {
VPackBuilder b; VPackBuilder b;
b.openObject(); b.openObject();
b.add(Utils::recoveryMethodKey, VPackValue(Utils::compensate)); b.add(Utils::recoveryMethodKey, VPackValue(Utils::compensate));
if (_aggregatorUsage->size() > 0) { if (_aggregators->size() > 0) {
b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object)); b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object));
_aggregatorUsage->serializeValues(b); _aggregators->serializeValues(b);
b.close(); b.close();
} }
b.close(); b.close();
_aggregatorUsage->resetValues(); _aggregators->resetValues();
_workerStats.activeCount = 0; _workerStats.activeCount = 0;
// initialize workers will reconfigure the workers and set the // initialize workers will reconfigure the workers and set the
@ -396,7 +408,7 @@ int Conductor::_initializeWorkers(std::string const& suffix,
if (_masterContext && _masterContext->_vertexCount == 0) { if (_masterContext && _masterContext->_vertexCount == 0) {
_masterContext->_vertexCount = vertexCount; _masterContext->_vertexCount = vertexCount;
_masterContext->_edgeCount = edgeCount; _masterContext->_edgeCount = edgeCount;
_masterContext->_aggregators = _aggregatorUsage.get(); _masterContext->_aggregators = _aggregators.get();
_masterContext->preApplication(); _masterContext->preApplication();
} }
@ -517,7 +529,7 @@ int Conductor::_sendToAllDBServers(std::string const& suffix,
return nrGood == requests.size() ? TRI_ERROR_NO_ERROR : TRI_ERROR_FAILED; return nrGood == requests.size() ? TRI_ERROR_NO_ERROR : TRI_ERROR_FAILED;
} }
void Conductor::_ensureCorrectness(VPackSlice body) { void Conductor::_ensureUniqueResponse(VPackSlice body) {
// check if this the only time we received this // check if this the only time we received this
ServerID sender = body.get(Utils::senderKey).copyString(); ServerID sender = body.get(Utils::senderKey).copyString();
if (_respondedServers.find(sender) != _respondedServers.end()) { if (_respondedServers.find(sender) != _respondedServers.end()) {

View File

@ -35,55 +35,61 @@ namespace arangodb {
class RestPregelHandler; class RestPregelHandler;
namespace pregel { namespace pregel {
enum ExecutionState { DEFAULT,// before calling start enum ExecutionState {
RUNNING,// during normal operation DEFAULT, // before calling start
DONE,// after everyting is done RUNNING, // during normal operation
CANCELED,// after an error or manual canceling DONE, // after everyting is done
RECOVERING// during recovery CANCELED, // after an error or manual canceling
RECOVERING // during recovery
}; };
class MasterContext; class MasterContext;
class AggregatorUsage; class AggregatorHandler;
struct IAlgorithm; struct IAlgorithm;
class Conductor { class Conductor {
friend class arangodb::RestPregelHandler; friend class arangodb::RestPregelHandler;
ExecutionState _state = ExecutionState::DEFAULT; ExecutionState _state = ExecutionState::DEFAULT;
const VocbaseGuard _vocbaseGuard; const VocbaseGuard _vocbaseGuard;
const uint64_t _executionNumber; const uint64_t _executionNumber;
std::unique_ptr<IAlgorithm> _algorithm; std::unique_ptr<IAlgorithm> _algorithm;
VPackBuilder _userParams; VPackBuilder _userParams;
Mutex _callbackMutex; // prevents concurrent calls to finishedGlobalStep Mutex _callbackMutex; // prevents concurrent calls to finishedGlobalStep
std::vector<std::shared_ptr<LogicalCollection>> _vertexCollections; std::vector<std::shared_ptr<LogicalCollection>> _vertexCollections;
std::vector<std::shared_ptr<LogicalCollection>> _edgeCollections; std::vector<std::shared_ptr<LogicalCollection>> _edgeCollections;
std::vector<ServerID> _dbServers; std::vector<ServerID> _dbServers;
// initialized on startup // initialized on startup
std::unique_ptr<AggregatorUsage> _aggregatorUsage; std::unique_ptr<AggregatorHandler> _aggregators;
std::unique_ptr<MasterContext> _masterContext; std::unique_ptr<MasterContext> _masterContext;
/// some tracking info
double _startTimeSecs = 0, _endTimeSecs = 0; double _startTimeSecs = 0, _endTimeSecs = 0;
uint64_t _globalSuperstep = 0; uint64_t _globalSuperstep = 0;
/// tracks the servers which responded, only used for stages where we expect an
/// unique response, not necessarily during the async mode
std::set<ServerID> _respondedServers; std::set<ServerID> _respondedServers;
bool _asyncMode = false;
/// persistent tracking of active vertices, send messages, runtimes
WorkerStats _workerStats; WorkerStats _workerStats;
bool _startGlobalStep(); bool _startGlobalStep();
int _initializeWorkers(std::string const& suffix, VPackSlice additional); int _initializeWorkers(std::string const& suffix, VPackSlice additional);
int _finalizeWorkers(); int _finalizeWorkers();
int _sendToAllDBServers(std::string const& suffix, VPackSlice const& message); int _sendToAllDBServers(std::string const& suffix, VPackSlice const& message);
void _ensureCorrectness(VPackSlice body); void _ensureUniqueResponse(VPackSlice body);
// === REST callbacks === // === REST callbacks ===
void finishedWorkerStartup(VPackSlice& data); void finishedWorkerStartup(VPackSlice& data);
void finishedGlobalStep(VPackSlice& data); void finishedWorkerStep(VPackSlice& data);
void finishedRecovery(VPackSlice& data); void finishedRecovery(VPackSlice& data);
public: public:
Conductor(uint64_t executionNumber, TRI_vocbase_t* vocbase, Conductor(
std::vector<std::shared_ptr<LogicalCollection>> const& vertexCollections, uint64_t executionNumber, TRI_vocbase_t* vocbase,
std::vector<std::shared_ptr<LogicalCollection>> const& edgeCollections); std::vector<std::shared_ptr<LogicalCollection>> const& vertexCollections,
std::vector<std::shared_ptr<LogicalCollection>> const& edgeCollections);
~Conductor(); ~Conductor();
void start(std::string const& algoName, VPackSlice userConfig); void start(std::string const& algoName, VPackSlice userConfig);
@ -91,10 +97,11 @@ class Conductor {
void startRecovery(); void startRecovery();
ExecutionState getState() const { return _state; } ExecutionState getState() const { return _state; }
WorkerStats workerStats() const {return _workerStats;} WorkerStats workerStats() const { return _workerStats; }
uint64_t globalSuperstep() const {return _globalSuperstep;} uint64_t globalSuperstep() const { return _globalSuperstep; }
double totalRuntimeSecs() { double totalRuntimeSecs() {
return _endTimeSecs == 0 ? TRI_microtime() - _startTimeSecs : _endTimeSecs - _startTimeSecs; return _endTimeSecs == 0 ? TRI_microtime() - _startTimeSecs
: _endTimeSecs - _startTimeSecs;
} }
}; };
} }

View File

@ -27,7 +27,7 @@ namespace arangodb {
namespace pregel { namespace pregel {
typedef uint16_t prgl_shard_t; typedef uint16_t prgl_shard_t;
/// @brief header entry for the edge file /// @brief header entry for the edge file
template <typename E> template <typename E>
class Edge { class Edge {
@ -35,15 +35,11 @@ class Edge {
prgl_shard_t _targetShard; prgl_shard_t _targetShard;
std::string _toKey; std::string _toKey;
E _data; E _data;
public: public:
// EdgeEntry() : _nextEntryOffset(0), _dataSize(0), _vertexIDSize(0) {} // EdgeEntry() : _nextEntryOffset(0), _dataSize(0), _vertexIDSize(0) {}
Edge(prgl_shard_t source, Edge(prgl_shard_t source, prgl_shard_t target, std::string const& key)
prgl_shard_t target, : _sourceShard(source), _targetShard(target), _toKey(key) {}
std::string const& key)
: _sourceShard(source),
_targetShard(target),
_toKey(key) {}
// size_t getSize() { return sizeof(EdgeEntry) + _vertexIDSize + _dataSize; } // size_t getSize() { return sizeof(EdgeEntry) + _vertexIDSize + _dataSize; }
std::string const& toKey() const { return _toKey; } std::string const& toKey() const { return _toKey; }
@ -51,19 +47,15 @@ class Edge {
inline E* data() { inline E* data() {
return &_data; // static_cast<E>(this + sizeof(EdgeEntry) + _vertexIDSize); return &_data; // static_cast<E>(this + sizeof(EdgeEntry) + _vertexIDSize);
} }
inline prgl_shard_t sourceShard() const { inline prgl_shard_t sourceShard() const { return _sourceShard; }
return _sourceShard; inline prgl_shard_t targetShard() const { return _targetShard; }
}
inline prgl_shard_t targetShard() const {
return _targetShard;
}
}; };
class VertexEntry { class VertexEntry {
template <typename V, typename E> template <typename V, typename E>
friend class GraphStore; friend class GraphStore;
const prgl_shard_t _shard;// TODO optimize and remove const prgl_shard_t _shard; // TODO optimize and remove
const std::string _key; const std::string _key;
size_t _vertexDataOffset; // size_t vertexID length size_t _vertexDataOffset; // size_t vertexID length
size_t _edgeDataOffset; size_t _edgeDataOffset;
@ -85,14 +77,14 @@ class VertexEntry {
inline size_t getSize() { return sizeof(VertexEntry); } inline size_t getSize() { return sizeof(VertexEntry); }
inline bool active() const { return _active; } inline bool active() const { return _active; }
inline void setActive(bool bb) { _active = bb; } inline void setActive(bool bb) { _active = bb; }
inline prgl_shard_t shard() const {return _shard;} inline prgl_shard_t shard() const { return _shard; }
inline std::string const& key() const { return _key; }; inline std::string const& key() const { return _key; };
/*std::string const& key() const { /*std::string const& key() const {
return std::string(_key, _keySize); return std::string(_key, _keySize);
};*/ };*/
}; };
// unused right now // unused right now
/*class LinkedListIterator { /*class LinkedListIterator {
private: private:

View File

@ -70,7 +70,7 @@ class IntegerGraphFormat : public GraphFormat<int64_t, int64_t> {
int64_t readVertexData(const void* ptr) override { return *((int64_t*)ptr); } int64_t readVertexData(const void* ptr) override { return *((int64_t*)ptr); }
int64_t readEdgeData(const void* ptr) override { return *((int64_t*)ptr); } int64_t readEdgeData(const void* ptr) override { return *((int64_t*)ptr); }
size_t copyVertexData(arangodb::velocypack::Slice document, void* targetPtr, size_t copyVertexData(arangodb::velocypack::Slice document, void* targetPtr,
size_t maxSize) override { size_t maxSize) override {
arangodb::velocypack::Slice val = document.get(_sourceField); arangodb::velocypack::Slice val = document.get(_sourceField);
*((int64_t*)targetPtr) = val.isInteger() ? val.getInt() : _vDefault; *((int64_t*)targetPtr) = val.isInteger() ? val.getInt() : _vDefault;
@ -84,19 +84,19 @@ class IntegerGraphFormat : public GraphFormat<int64_t, int64_t> {
return sizeof(int64_t); return sizeof(int64_t);
} }
void buildVertexDocument(arangodb::velocypack::Builder& b, const void* targetPtr, void buildVertexDocument(arangodb::velocypack::Builder& b,
size_t size) override { const void* targetPtr, size_t size) override {
b.add(_resultField, VPackValue(readVertexData(targetPtr))); b.add(_resultField, VPackValue(readVertexData(targetPtr)));
} }
void buildEdgeDocument(arangodb::velocypack::Builder& b, const void* targetPtr, void buildEdgeDocument(arangodb::velocypack::Builder& b,
size_t size) override { const void* targetPtr, size_t size) override {
b.add(_resultField, VPackValue(readEdgeData(targetPtr))); b.add(_resultField, VPackValue(readEdgeData(targetPtr)));
} }
}; };
class FloatGraphFormat : public GraphFormat<float, float> { class FloatGraphFormat : public GraphFormat<float, float> {
protected: protected:
const std::string _sourceField, _resultField; const std::string _sourceField, _resultField;
const float _vDefault, _eDefault; const float _vDefault, _eDefault;
@ -125,13 +125,13 @@ protected:
return sizeof(float); return sizeof(float);
} }
void buildVertexDocument(arangodb::velocypack::Builder& b, const void* targetPtr, void buildVertexDocument(arangodb::velocypack::Builder& b,
size_t size) override { const void* targetPtr, size_t size) override {
b.add(_resultField, VPackValue(readVertexData(targetPtr))); b.add(_resultField, VPackValue(readVertexData(targetPtr)));
} }
void buildEdgeDocument(arangodb::velocypack::Builder& b, const void* targetPtr, void buildEdgeDocument(arangodb::velocypack::Builder& b,
size_t size) override { const void* targetPtr, size_t size) override {
b.add(_resultField, VPackValue(readEdgeData(targetPtr))); b.add(_resultField, VPackValue(readEdgeData(targetPtr)));
} }
}; };
@ -145,7 +145,7 @@ protected:
const std::string _sourceField, _resultField; const std::string _sourceField, _resultField;
const V _vDefault; const V _vDefault;
const E _eDefault; const E _eDefault;
public: public:
NumberGraphFormat(std::string const& source, std::string const& result, NumberGraphFormat(std::string const& source, std::string const& result,
V vertexNull, E edgeNull) V vertexNull, E edgeNull)
@ -153,10 +153,10 @@ public:
_resultField(result), _resultField(result),
_vDefault(vertexNull), _vDefault(vertexNull),
_eDefault(edgeNull) {} _eDefault(edgeNull) {}
V readVertexData(void* ptr) override { return *((V*)ptr); } V readVertexData(void* ptr) override { return *((V*)ptr); }
E readEdgeData(void* ptr) override { return *((E*)ptr); } E readEdgeData(void* ptr) override { return *((E*)ptr); }
size_t copyVertexData(arangodb::velocypack::Slice document, void* targetPtr, size_t copyVertexData(arangodb::velocypack::Slice document, void* targetPtr,
size_t maxSize) override { size_t maxSize) override {
arangodb::velocypack::Slice val = document.get(_sourceField); arangodb::velocypack::Slice val = document.get(_sourceField);
@ -171,7 +171,7 @@ public:
} }
return sizeof(V); return sizeof(V);
} }
size_t copyEdgeData(arangodb::velocypack::Slice document, void* targetPtr, size_t copyEdgeData(arangodb::velocypack::Slice document, void* targetPtr,
size_t maxSize) override { size_t maxSize) override {
arangodb::velocypack::Slice val = document.get(_sourceField); arangodb::velocypack::Slice val = document.get(_sourceField);
@ -186,13 +186,15 @@ public:
} }
return sizeof(E); return sizeof(E);
} }
void buildVertexDocument(arangodb::velocypack::Builder& b, const void* targetPtr, void buildVertexDocument(arangodb::velocypack::Builder& b, const void*
targetPtr,
size_t size) override { size_t size) override {
b.add(_resultField, VPackValue(readVertexData(targetPtr))); b.add(_resultField, VPackValue(readVertexData(targetPtr)));
} }
void buildEdgeDocument(arangodb::velocypack::Builder& b, const void* targetPtr, void buildEdgeDocument(arangodb::velocypack::Builder& b, const void*
targetPtr,
size_t size) override { size_t size) override {
b.add(_resultField, VPackValue(readEdgeData(targetPtr))); b.add(_resultField, VPackValue(readEdgeData(targetPtr)));
} }

View File

@ -43,16 +43,8 @@ using namespace arangodb;
using namespace arangodb::pregel; using namespace arangodb::pregel;
template <typename V, typename E> template <typename V, typename E>
GraphStore<V, E>::GraphStore(TRI_vocbase_t* vb, WorkerState const& state, GraphStore<V, E>::GraphStore(TRI_vocbase_t* vb, GraphFormat<V, E>* graphFormat)
GraphFormat<V, E>* graphFormat) : _vocbaseGuard(vb), _graphFormat(graphFormat) {}
: _vocbaseGuard(vb), _graphFormat(graphFormat) {
// _edgeCollection = ClusterInfo::instance()->getCollection(
// vb->name(), state->edgeCollectionPlanId());
loadShards(state);
LOG(INFO) << "Loaded " << _index.size() << "vertices and " << _edges.size()
<< " edges";
}
template <typename V, typename E> template <typename V, typename E>
GraphStore<V, E>::~GraphStore() { GraphStore<V, E>::~GraphStore() {
@ -61,22 +53,7 @@ GraphStore<V, E>::~GraphStore() {
template <typename V, typename E> template <typename V, typename E>
void GraphStore<V, E>::loadShards(WorkerState const& state) { void GraphStore<V, E>::loadShards(WorkerState const& state) {
std::vector<std::string> readColls, writeColls; _createReadTransaction(state);
for (auto shard : state.localVertexShardIDs()) {
readColls.push_back(shard);
}
for (auto shard : state.localEdgeShardIDs()) {
readColls.push_back(shard);
}
double lockTimeout =
(double)(TRI_TRANSACTION_DEFAULT_LOCK_TIMEOUT / 1000000ULL);
_transaction = new ExplicitTransaction(
StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()), readColls,
writeColls, lockTimeout, false, false);
int res = _transaction->begin();
if (res != TRI_ERROR_NO_ERROR) {
THROW_ARANGO_EXCEPTION(res);
}
std::map<CollectionID, std::vector<ShardID>> const& vertexMap = std::map<CollectionID, std::vector<ShardID>> const& vertexMap =
state.vertexCollectionShards(); state.vertexCollectionShards();
@ -108,6 +85,50 @@ void GraphStore<V, E>::loadShards(WorkerState const& state) {
_cleanupTransactions(); _cleanupTransactions();
} }
template <typename V, typename E>
void GraphStore<V, E>::loadDocument(WorkerState const& state,
ShardID const& shard,
std::string const& _key) {
/*if (_readTrx == nullptr) {
_createReadTransaction(state);
}
prgl_shard_t sourceShard = (prgl_shard_t)state.shardId(shard);
bool storeData = _graphFormat->storesVertexData();
VPackBuilder builder;
builder.openObject();
builder.add(StaticStrings::KeyString, VPackValue(_key));
builder.close();
OperationOptions options;
options.ignoreRevs = false;
TRI_voc_cid_t cid = _readTrx->addCollectionAtRuntime(shard);
_readTrx->orderDitch(cid); // will throw when it fails
OperationResult opResult = _readTrx->document(shard, builder.slice(),
options);
if (!opResult.successful()) {
_cleanupTransactions();
THROW_ARANGO_EXCEPTION(opResult.code);
}
VertexEntry entry(sourceShard, _key);
if (storeData) {
V vertexData;
size_t size =
_graphFormat->copyVertexData(opResult.slice(), &vertexData, sizeof(V));
if (size > 0) {
entry._vertexDataOffset = _vertexData.size();
_vertexData.push_back(vertexData);
}
}
std::string documentId = _readTrx->extractIdString(opResult.slice());
_loadEdges(state, edgeShard, entry, documentId);
_index.push_back(entry);*/
}
template <typename V, typename E> template <typename V, typename E>
RangeIterator<VertexEntry> GraphStore<V, E>::vertexIterator() { RangeIterator<VertexEntry> GraphStore<V, E>::vertexIterator() {
return vertexIterator(0, _index.size()); return vertexIterator(0, _index.size());
@ -144,16 +165,36 @@ RangeIterator<Edge<E>> GraphStore<V, E>::edgeIterator(
return RangeIterator<Edge<E>>(_edges, entry->_edgeDataOffset, end); return RangeIterator<Edge<E>>(_edges, entry->_edgeDataOffset, end);
} }
template <typename V, typename E>
void GraphStore<V, E>::_createReadTransaction(WorkerState const& state) {
std::vector<std::string> readColls, writeColls;
for (auto shard : state.localVertexShardIDs()) {
readColls.push_back(shard);
}
for (auto shard : state.localEdgeShardIDs()) {
readColls.push_back(shard);
}
double lockTimeout =
(double)(TRI_TRANSACTION_DEFAULT_LOCK_TIMEOUT / 1000000ULL);
_readTrx = new ExplicitTransaction(
StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()), readColls,
writeColls, lockTimeout, false, false);
int res = _readTrx->begin();
if (res != TRI_ERROR_NO_ERROR) {
THROW_ARANGO_EXCEPTION(res);
}
}
template <typename V, typename E> template <typename V, typename E>
void GraphStore<V, E>::_cleanupTransactions() { void GraphStore<V, E>::_cleanupTransactions() {
if (_transaction) { if (_readTrx) {
if (_transaction->getStatus() == TRI_TRANSACTION_RUNNING) { if (_readTrx->getStatus() == TRI_TRANSACTION_RUNNING) {
if (_transaction->commit() != TRI_ERROR_NO_ERROR) { if (_readTrx->commit() != TRI_ERROR_NO_ERROR) {
LOG(WARN) << "Pregel worker: Failed to commit on a read transaction"; LOG(WARN) << "Pregel worker: Failed to commit on a read transaction";
} }
} }
delete _transaction; delete _readTrx;
_transaction = nullptr; _readTrx = nullptr;
} }
} }
@ -164,18 +205,18 @@ void GraphStore<V, E>::_loadVertices(WorkerState const& state,
//_graphFormat->willUseCollection(vocbase, vertexShard, false); //_graphFormat->willUseCollection(vocbase, vertexShard, false);
bool storeData = _graphFormat->storesVertexData(); bool storeData = _graphFormat->storesVertexData();
TRI_voc_cid_t cid = _transaction->addCollectionAtRuntime(vertexShard); TRI_voc_cid_t cid = _readTrx->addCollectionAtRuntime(vertexShard);
_transaction->orderDitch(cid); // will throw when it fails _readTrx->orderDitch(cid); // will throw when it fails
prgl_shard_t sourceShard = (prgl_shard_t)state.shardId(vertexShard); prgl_shard_t sourceShard = (prgl_shard_t)state.shardId(vertexShard);
/*int res = _transaction->lockRead(); /*int res = _readTrx->lockRead();
if (res != TRI_ERROR_NO_ERROR) { if (res != TRI_ERROR_NO_ERROR) {
THROW_ARANGO_EXCEPTION_FORMAT(res, "while looking up vertices '%s'", THROW_ARANGO_EXCEPTION_FORMAT(res, "while looking up vertices '%s'",
vertexShard.c_str()); vertexShard.c_str());
}*/ }*/
ManagedDocumentResult mmdr(_transaction); ManagedDocumentResult mmdr(_readTrx);
std::unique_ptr<OperationCursor> cursor = _transaction->indexScan( std::unique_ptr<OperationCursor> cursor = _readTrx->indexScan(
vertexShard, Transaction::CursorType::ALL, Transaction::IndexHandle(), {}, vertexShard, Transaction::CursorType::ALL, Transaction::IndexHandle(), {},
&mmdr, 0, UINT64_MAX, 1000, false); &mmdr, 0, UINT64_MAX, 1000, false);
@ -192,13 +233,13 @@ void GraphStore<V, E>::_loadVertices(WorkerState const& state,
cursor->getMoreMptr(result, 1000); cursor->getMoreMptr(result, 1000);
for (auto const& element : result) { for (auto const& element : result) {
TRI_voc_rid_t revisionId = element.revisionId(); TRI_voc_rid_t revisionId = element.revisionId();
if (collection->readRevision(_transaction, mmdr, revisionId)) { if (collection->readRevision(_readTrx, mmdr, revisionId)) {
VPackSlice document(mmdr.vpack()); VPackSlice document(mmdr.vpack());
if (document.isExternal()) { if (document.isExternal()) {
document = document.resolveExternal(); document = document.resolveExternal();
} }
//LOG(INFO) << "Loaded Vertex: " << document.toJson(); // LOG(INFO) << "Loaded Vertex: " << document.toJson();
std::string key = document.get(StaticStrings::KeyString).copyString(); std::string key = document.get(StaticStrings::KeyString).copyString();
VertexEntry entry(sourceShard, key); VertexEntry entry(sourceShard, key);
@ -214,7 +255,7 @@ void GraphStore<V, E>::_loadVertices(WorkerState const& state,
} }
} }
std::string documentId = _transaction->extractIdString(document); std::string documentId = _readTrx->extractIdString(document);
_loadEdges(state, edgeShard, entry, documentId); _loadEdges(state, edgeShard, entry, documentId);
_index.push_back(entry); _index.push_back(entry);
} }
@ -230,10 +271,10 @@ void GraphStore<V, E>::_loadEdges(WorkerState const& state,
const bool storeData = _graphFormat->storesEdgeData(); const bool storeData = _graphFormat->storesEdgeData();
// Transaction* trx = readTransaction(shard); // Transaction* trx = readTransaction(shard);
traverser::EdgeCollectionInfo info(_transaction, edgeShard, TRI_EDGE_OUT, traverser::EdgeCollectionInfo info(_readTrx, edgeShard, TRI_EDGE_OUT,
StaticStrings::FromString, 0); StaticStrings::FromString, 0);
ManagedDocumentResult mmdr(_transaction); ManagedDocumentResult mmdr(_readTrx);
auto cursor = info.getEdges(documentID, &mmdr); auto cursor = info.getEdges(documentID, &mmdr);
if (cursor->failed()) { if (cursor->failed()) {
THROW_ARANGO_EXCEPTION_FORMAT(cursor->code, THROW_ARANGO_EXCEPTION_FORMAT(cursor->code,
@ -253,7 +294,7 @@ void GraphStore<V, E>::_loadEdges(WorkerState const& state,
cursor->getMoreMptr(result, 1000); cursor->getMoreMptr(result, 1000);
for (auto const& element : result) { for (auto const& element : result) {
TRI_voc_rid_t revisionId = element.revisionId(); TRI_voc_rid_t revisionId = element.revisionId();
if (collection->readRevision(_transaction, mmdr, revisionId)) { if (collection->readRevision(_readTrx, mmdr, revisionId)) {
VPackSlice document(mmdr.vpack()); VPackSlice document(mmdr.vpack());
if (document.isExternal()) { if (document.isExternal()) {
document = document.resolveExternal(); document = document.resolveExternal();
@ -262,7 +303,7 @@ void GraphStore<V, E>::_loadEdges(WorkerState const& state,
// ====== actual loading ====== // ====== actual loading ======
vertexEntry._edgeCount += 1; vertexEntry._edgeCount += 1;
//LOG(INFO) << "Loaded Edge: " << document.toJson(); // LOG(INFO) << "Loaded Edge: " << document.toJson();
std::string toValue = std::string toValue =
document.get(StaticStrings::ToString).copyString(); document.get(StaticStrings::ToString).copyString();
@ -301,28 +342,6 @@ void GraphStore<V, E>::_loadEdges(WorkerState const& state,
}*/ }*/
} }
/*template <typename V, typename E>
SingleCollectionTransaction* GraphStore<V, E>::writeTransaction(ShardID const&
shard) {
auto it = _transactions.find(shard);
if (it != _transactions.end()) {
return it->second;
} else {
auto trx = std::make_unique<SingleCollectionTransaction>(
StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()),
shard,
TRI_TRANSACTION_WRITE);
int res = trx->begin();
if (res != TRI_ERROR_NO_ERROR) {
THROW_ARANGO_EXCEPTION_FORMAT(res, "during transaction of shard '%s'",
shard.c_str());
}
_transactions[shard] = trx.get();
return trx.release();
}
}*/
template <typename V, typename E> template <typename V, typename E>
void GraphStore<V, E>::storeResults(WorkerState const& state) { void GraphStore<V, E>::storeResults(WorkerState const& state) {
std::vector<std::string> readColls, writeColls; std::vector<std::string> readColls, writeColls;
@ -337,10 +356,10 @@ void GraphStore<V, E>::storeResults(WorkerState const& state) {
//} //}
double lockTimeout = double lockTimeout =
(double)(TRI_TRANSACTION_DEFAULT_LOCK_TIMEOUT / 1000000ULL); (double)(TRI_TRANSACTION_DEFAULT_LOCK_TIMEOUT / 1000000ULL);
_transaction = new ExplicitTransaction( ExplicitTransaction writeTrx(
StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()), readColls, StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()), readColls,
writeColls, lockTimeout, false, false); writeColls, lockTimeout, false, false);
int res = _transaction->begin(); int res = writeTrx.begin();
if (res != TRI_ERROR_NO_ERROR) { if (res != TRI_ERROR_NO_ERROR) {
THROW_ARANGO_EXCEPTION(res); THROW_ARANGO_EXCEPTION(res);
} }
@ -356,14 +375,17 @@ void GraphStore<V, E>::storeResults(WorkerState const& state) {
_graphFormat->buildVertexDocument(b, data, sizeof(V)); _graphFormat->buildVertexDocument(b, data, sizeof(V));
b.close(); b.close();
OperationResult result = _transaction->update(shard, b.slice(), options); OperationResult result = writeTrx.update(shard, b.slice(), options);
if (result.code != TRI_ERROR_NO_ERROR) { if (result.code != TRI_ERROR_NO_ERROR) {
THROW_ARANGO_EXCEPTION(result.code); THROW_ARANGO_EXCEPTION(result.code);
} }
// TODO loop over edges // TODO loop over edges
} }
_cleanupTransactions(); res = writeTrx.finish(res);
if (res != TRI_ERROR_NO_ERROR) {
THROW_ARANGO_EXCEPTION(res);
}
} }
template class arangodb::pregel::GraphStore<int64_t, int64_t>; template class arangodb::pregel::GraphStore<int64_t, int64_t>;

View File

@ -37,7 +37,7 @@ namespace arangodb {
class Transaction; class Transaction;
class LogicalCollection; class LogicalCollection;
namespace pregel { namespace pregel {
class WorkerState; class WorkerState;
template <typename V, typename E> template <typename V, typename E>
struct GraphFormat; struct GraphFormat;
@ -47,11 +47,10 @@ struct GraphFormat;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
template <typename V, typename E> template <typename V, typename E>
class GraphStore { class GraphStore {
VocbaseGuard _vocbaseGuard; VocbaseGuard _vocbaseGuard;
const std::unique_ptr<GraphFormat<V, E>> _graphFormat; const std::unique_ptr<GraphFormat<V, E>> _graphFormat;
Transaction *_transaction;// temporary transaction Transaction* _readTrx; // temporary transaction
// int _indexFd, _vertexFd, _edgeFd; // int _indexFd, _vertexFd, _edgeFd;
// void *_indexMapping, *_vertexMapping, *_edgeMapping; // void *_indexMapping, *_vertexMapping, *_edgeMapping;
// size_t _indexSize, _vertexSize, _edgeSize; // size_t _indexSize, _vertexSize, _edgeSize;
@ -65,25 +64,23 @@ class GraphStore {
std::set<ShardID> _loadedShards; std::set<ShardID> _loadedShards;
size_t _localVerticeCount; size_t _localVerticeCount;
size_t _localEdgeCount; size_t _localEdgeCount;
void _createReadTransaction(WorkerState const& state);
void _cleanupTransactions(); void _cleanupTransactions();
void _loadVertices(WorkerState const& state, void _loadVertices(WorkerState const& state, ShardID const& vertexShard,
ShardID const& vertexShard,
ShardID const& edgeShard); ShardID const& edgeShard);
void _loadEdges(WorkerState const& state, void _loadEdges(WorkerState const& state, ShardID const& shard,
ShardID const& shard, VertexEntry& vertexEntry, std::string const& documentID);
VertexEntry& vertexEntry,
std::string const& documentID);
public: public:
GraphStore(TRI_vocbase_t* vocbase, WorkerState const& state, GraphStore(TRI_vocbase_t* vocbase, GraphFormat<V, E>* graphFormat);
GraphFormat<V, E>* graphFormat);
~GraphStore(); ~GraphStore();
void loadShards(WorkerState const& state); void loadShards(WorkerState const& state);
inline size_t vertexCount() { void loadDocument(WorkerState const& state, ShardID const& shard,
return _index.size(); std::string const& _key);
}
inline size_t vertexCount() { return _index.size(); }
RangeIterator<VertexEntry> vertexIterator(); RangeIterator<VertexEntry> vertexIterator();
RangeIterator<VertexEntry> vertexIterator(size_t start, size_t count); RangeIterator<VertexEntry> vertexIterator(size_t start, size_t count);
RangeIterator<Edge<E>> edgeIterator(VertexEntry const* entry); RangeIterator<Edge<E>> edgeIterator(VertexEntry const* entry);
@ -91,7 +88,7 @@ class GraphStore {
void* mutableVertexData(VertexEntry const* entry); void* mutableVertexData(VertexEntry const* entry);
V copyVertexData(VertexEntry const* entry); V copyVertexData(VertexEntry const* entry);
void replaceVertexData(VertexEntry const* entry, void* data, size_t size); void replaceVertexData(VertexEntry const* entry, void* data, size_t size);
/// Write results to database /// Write results to database
void storeResults(WorkerState const& state); void storeResults(WorkerState const& state);
}; };

View File

@ -30,6 +30,9 @@
#include <velocypack/Iterator.h> #include <velocypack/Iterator.h>
#include <velocypack/velocypack-aliases.h> #include <velocypack/velocypack-aliases.h>
//#include <libcuckoo/city_hasher.hh>
//#include <libcuckoo/cuckoohash_map.hh>
using namespace arangodb; using namespace arangodb;
using namespace arangodb::pregel; using namespace arangodb::pregel;
@ -87,7 +90,7 @@ void ArrayInCache<M>::mergeCache(InCache<M> const* otherCache) {
// cannot call setDirect since it locks // cannot call setDirect since it locks
for (auto const& pair : other->_shardMap) { for (auto const& pair : other->_shardMap) {
HMap &vertexMap = _shardMap[pair.first]; HMap& vertexMap = _shardMap[pair.first];
for (auto& vertexMessage : pair.second) { for (auto& vertexMessage : pair.second) {
std::vector<M>& a = vertexMap[vertexMessage.first]; std::vector<M>& a = vertexMap[vertexMessage.first];
std::vector<M> const& b = vertexMessage.second; std::vector<M> const& b = vertexMessage.second;
@ -119,7 +122,6 @@ void ArrayInCache<M>::clear() {
_shardMap.clear(); _shardMap.clear();
} }
template <typename M> template <typename M>
void ArrayInCache<M>::erase(prgl_shard_t shard, std::string const& key) { void ArrayInCache<M>::erase(prgl_shard_t shard, std::string const& key) {
MUTEX_LOCKER(guard, this->_writeLock); MUTEX_LOCKER(guard, this->_writeLock);
@ -134,6 +136,19 @@ void CombiningInCache<M>::setDirect(prgl_shard_t shard, std::string const& key,
M const& newValue) { M const& newValue) {
MUTEX_LOCKER(guard, this->_writeLock); MUTEX_LOCKER(guard, this->_writeLock);
/*cuckoohash_map<int, std::string, CityHasher<int>> Table;
for (int i = 0; i < 100; i++) {
Table[i] = "hello"+std::to_string(i);
}
for (int i = 0; i < 101; i++) {
std::string out;
if (Table.find(i, out)) {
LOG(INFO) << i << " " << out;
} else {
LOG(INFO) << i << " NOT FOUND";
}
}*/
this->_receivedMessageCount++; this->_receivedMessageCount++;
HMap& vertexMap = _shardMap[shard]; HMap& vertexMap = _shardMap[shard];
auto vmsg = vertexMap.find(key); auto vmsg = vertexMap.find(key);

View File

@ -44,7 +44,6 @@ processing */
template <typename M> template <typename M>
class InCache { class InCache {
protected: protected:
mutable Mutex _writeLock; mutable Mutex _writeLock;
size_t _receivedMessageCount = 0; size_t _receivedMessageCount = 0;
MessageFormat<M> const* _format; MessageFormat<M> const* _format;
@ -53,12 +52,12 @@ class InCache {
: _receivedMessageCount(0), _format(format) {} : _receivedMessageCount(0), _format(format) {}
public: public:
virtual ~InCache() {}; virtual ~InCache(){};
MessageFormat<M> const* format() const {return _format;} MessageFormat<M> const* format() const { return _format; }
void parseMessages(VPackSlice messages); void parseMessages(VPackSlice messages);
size_t receivedMessageCount() const { return _receivedMessageCount; } size_t receivedMessageCount() const { return _receivedMessageCount; }
/// @brief internal method to direclty set the messages for a vertex. Only /// @brief internal method to direclty set the messages for a vertex. Only
/// valid with already combined messages /// valid with already combined messages
virtual void setDirect(prgl_shard_t shard, std::string const& vertexId, virtual void setDirect(prgl_shard_t shard, std::string const& vertexId,
@ -66,7 +65,8 @@ class InCache {
virtual void mergeCache(InCache<M> const* otherCache) = 0; virtual void mergeCache(InCache<M> const* otherCache) = 0;
/// @brief get messages for vertex id. (Don't use keys from _from or _to /// @brief get messages for vertex id. (Don't use keys from _from or _to
/// directly, they contain the collection name) /// directly, they contain the collection name)
virtual MessageIterator<M> getMessages(prgl_shard_t shard, std::string const& key) = 0; virtual MessageIterator<M> getMessages(prgl_shard_t shard,
std::string const& key) = 0;
/// clear cache /// clear cache
virtual void clear() = 0; virtual void clear() = 0;
virtual void erase(prgl_shard_t shard, std::string const& key) = 0; virtual void erase(prgl_shard_t shard, std::string const& key) = 0;
@ -83,7 +83,8 @@ class ArrayInCache : public InCache<M> {
void setDirect(prgl_shard_t shard, std::string const& vertexId, void setDirect(prgl_shard_t shard, std::string const& vertexId,
M const& data) override; M const& data) override;
void mergeCache(InCache<M> const* otherCache) override; void mergeCache(InCache<M> const* otherCache) override;
MessageIterator<M> getMessages(prgl_shard_t shard, std::string const& key) override; MessageIterator<M> getMessages(prgl_shard_t shard,
std::string const& key) override;
void clear() override; void clear() override;
void erase(prgl_shard_t shard, std::string const& key) override; void erase(prgl_shard_t shard, std::string const& key) override;
}; };
@ -91,21 +92,22 @@ class ArrayInCache : public InCache<M> {
template <typename M> template <typename M>
class CombiningInCache : public InCache<M> { class CombiningInCache : public InCache<M> {
typedef std::unordered_map<std::string, M> HMap; typedef std::unordered_map<std::string, M> HMap;
MessageCombiner<M> const* _combiner; MessageCombiner<M> const* _combiner;
std::map<prgl_shard_t, HMap> _shardMap; std::map<prgl_shard_t, HMap> _shardMap;
public: public:
CombiningInCache(MessageFormat<M> const* format, CombiningInCache(MessageFormat<M> const* format,
MessageCombiner<M> const* combiner) MessageCombiner<M> const* combiner)
: InCache<M>(format), _combiner(combiner) {} : InCache<M>(format), _combiner(combiner) {}
MessageCombiner<M> const* combiner() const {return _combiner;} MessageCombiner<M> const* combiner() const { return _combiner; }
void setDirect(prgl_shard_t shard, std::string const& vertexId, void setDirect(prgl_shard_t shard, std::string const& vertexId,
M const& data) override; M const& data) override;
void mergeCache(InCache<M> const* otherCache) override; void mergeCache(InCache<M> const* otherCache) override;
MessageIterator<M> getMessages(prgl_shard_t shard, std::string const& key) override; MessageIterator<M> getMessages(prgl_shard_t shard,
std::string const& key) override;
void clear() override; void clear() override;
void erase(prgl_shard_t shard, std::string const& key) override; void erase(prgl_shard_t shard, std::string const& key) override;
}; };

View File

@ -31,7 +31,7 @@ class MessageIterator {
M const* _data; M const* _data;
size_t _current = 0; size_t _current = 0;
const size_t _size = 1; const size_t _size = 1;
public: public:
MessageIterator() : _data(nullptr), _current(0), _size(0) {} MessageIterator() : _data(nullptr), _current(0), _size(0) {}
@ -74,22 +74,21 @@ class MessageIterator {
size_t size() const { return _size; } size_t size() const { return _size; }
}; };
template <typename T> template <typename T>
class RangeIterator { class RangeIterator {
private: private:
// void *_begin, *_end, *_current; // void *_begin, *_end, *_current;
std::vector<T>& _vector; std::vector<T>& _vector;
size_t _begin, _end, _current; size_t _begin, _end, _current;
public: public:
typedef RangeIterator<T> iterator; typedef RangeIterator<T> iterator;
typedef const RangeIterator<T> const_iterator; typedef const RangeIterator<T> const_iterator;
RangeIterator(std::vector<T>& v, size_t begin, size_t end) RangeIterator(std::vector<T>& v, size_t begin, size_t end)
: _vector(v), _begin(begin), _end(end), _current(begin) {} : _vector(v), _begin(begin), _end(end), _current(begin) {}
iterator begin() { return RangeIterator(_vector, _begin, _end); } iterator begin() { return RangeIterator(_vector, _begin, _end); }
const_iterator begin() const { return RangeIterator(_vector, _begin, _end); } const_iterator begin() const { return RangeIterator(_vector, _begin, _end); }
iterator end() { iterator end() {
@ -102,31 +101,31 @@ public:
it._current = it._end; it._current = it._end;
return it; return it;
} }
// prefix ++ // prefix ++
RangeIterator& operator++() { RangeIterator& operator++() {
_current++; _current++;
return *this; return *this;
} }
// postfix ++ // postfix ++
RangeIterator<T>& operator++(int) { RangeIterator<T>& operator++(int) {
RangeIterator<T> result(*this); RangeIterator<T> result(*this);
++(*this); ++(*this);
return result; return result;
} }
T* operator*() const { T* operator*() const {
T* el = _vector.data(); T* el = _vector.data();
return _current != _end ? el + _current : nullptr; return _current != _end ? el + _current : nullptr;
} }
bool operator!=(RangeIterator<T> const& other) const { bool operator!=(RangeIterator<T> const& other) const {
return _current != other._current; return _current != other._current;
} }
size_t size() const { return _end - _begin; } size_t size() const { return _end - _begin; }
/*EdgeIterator(void* beginPtr, void* endPtr) /*EdgeIterator(void* beginPtr, void* endPtr)
: _begin(beginPtr), _end(endPtr), _current(_begin) {} : _begin(beginPtr), _end(endPtr), _current(_begin) {}
iterator begin() { return EdgeIterator(_begin, _end); } iterator begin() { return EdgeIterator(_begin, _end); }
@ -141,19 +140,18 @@ public:
it._current = it._end; it._current = it._end;
return it; return it;
} }
// prefix ++ // prefix ++
EdgeIterator<E>& operator++() { EdgeIterator<E>& operator++() {
EdgeEntry<E>* entry = static_cast<EdgeEntry<E>>(_current); EdgeEntry<E>* entry = static_cast<EdgeEntry<E>>(_current);
_current += entry->getSize(); _current += entry->getSize();
return *this; return *this;
} }
EdgeEntry<E>* operator*() const { EdgeEntry<E>* operator*() const {
return _current != _end ? static_cast<EdgeEntry<E>>(_current) : nullptr; return _current != _end ? static_cast<EdgeEntry<E>>(_current) : nullptr;
}*/ }*/
}; };
} }
} }
#endif #endif

View File

@ -25,9 +25,9 @@
#include <velocypack/Slice.h> #include <velocypack/Slice.h>
#include <velocypack/velocypack-aliases.h> #include <velocypack/velocypack-aliases.h>
#include "AggregatorUsage.h"
#include "Basics/Common.h" #include "Basics/Common.h"
#include "Utils.h" #include "Pregel/AggregatorHandler.h"
#include "Pregel/Utils.h"
namespace arangodb { namespace arangodb {
namespace pregel { namespace pregel {
@ -36,7 +36,7 @@ class MasterContext {
friend class Conductor; friend class Conductor;
uint64_t _vertexCount, _edgeCount; uint64_t _vertexCount, _edgeCount;
AggregatorUsage* _aggregators; AggregatorHandler* _aggregators;
protected: protected:
template <typename T> template <typename T>
@ -50,21 +50,21 @@ class MasterContext {
} }
virtual void preApplication(){}; virtual void preApplication(){};
/// @brief called before supersteps /// @brief called before supersteps
/// @return true to continue the computation /// @return true to continue the computation
virtual bool preGlobalSuperstep(uint64_t gss) {return true;}; virtual bool preGlobalSuperstep(uint64_t gss) { return true; };
/// @brief called after supersteps /// @brief called after supersteps
/// @return true to continue the computation /// @return true to continue the computation
virtual bool postGlobalSuperstep(uint64_t gss) {return true;}; virtual bool postGlobalSuperstep(uint64_t gss) { return true; };
virtual void postApplication(){}; virtual void postApplication(){};
/// should indicate if compensation is supposed to start by returning true /// should indicate if compensation is supposed to start by returning true
virtual bool preCompensation(uint64_t gss) {return true;} virtual bool preCompensation(uint64_t gss) { return true; }
/// should indicate if compensation is finished, by returning false. /// should indicate if compensation is finished, by returning false.
/// otherwise workers will be called again with the aggregated values /// otherwise workers will be called again with the aggregated values
virtual bool postCompensation(uint64_t gss) {return false;} virtual bool postCompensation(uint64_t gss) { return false; }
public: public:
MasterContext(VPackSlice params){}; MasterContext(VPackSlice params){};

View File

@ -36,8 +36,7 @@ struct MessageCombiner {
struct IntegerMinCombiner : public MessageCombiner<int64_t> { struct IntegerMinCombiner : public MessageCombiner<int64_t> {
IntegerMinCombiner() {} IntegerMinCombiner() {}
void combine(int64_t& firstValue, void combine(int64_t& firstValue, int64_t const& secondValue) const override {
int64_t const& secondValue) const override {
if (firstValue > secondValue) { if (firstValue > secondValue) {
firstValue = secondValue; firstValue = secondValue;
} }

View File

@ -38,7 +38,7 @@ struct MessageFormat {
virtual void unwrapValue(VPackSlice body, M& value) const = 0; virtual void unwrapValue(VPackSlice body, M& value) const = 0;
virtual void addValue(VPackBuilder& arrayBuilder, M const& val) const = 0; virtual void addValue(VPackBuilder& arrayBuilder, M const& val) const = 0;
}; };
struct IntegerMessageFormat : public MessageFormat<int64_t> { struct IntegerMessageFormat : public MessageFormat<int64_t> {
IntegerMessageFormat() {} IntegerMessageFormat() {}
void unwrapValue(VPackSlice s, int64_t& value) const override { void unwrapValue(VPackSlice s, int64_t& value) const override {
@ -58,7 +58,7 @@ struct FloatMessageFormat : public MessageFormat<float> {
arrayBuilder.add(VPackValue(val)); arrayBuilder.add(VPackValue(val));
} }
}; };
/* /*
template <typename M> template <typename M>
struct NumberMessageFormat : public MessageFormat<M> { struct NumberMessageFormat : public MessageFormat<M> {

View File

@ -60,7 +60,7 @@ void ArrayOutCache<M>::appendMessage(prgl_shard_t shard, std::string const& key,
M const& data) { M const& data) {
if (this->_state->isLocalVertexShard(shard)) { if (this->_state->isLocalVertexShard(shard)) {
this->_localCache->setDirect(shard, key, data); this->_localCache->setDirect(shard, key, data);
//LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data; // LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data;
this->_sendMessages++; this->_sendMessages++;
} else { } else {
_shardMap[shard][key].push_back(data); _shardMap[shard][key].push_back(data);
@ -149,17 +149,17 @@ void CombiningOutCache<M>::appendMessage(prgl_shard_t shard,
M const& data) { M const& data) {
if (this->_state->isLocalVertexShard(shard)) { if (this->_state->isLocalVertexShard(shard)) {
this->_localCache->setDirect(shard, key, data); this->_localCache->setDirect(shard, key, data);
//LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data; // LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data;
this->_sendMessages++; this->_sendMessages++;
} else { } else {
std::unordered_map<std::string, M>& vertexMap = _shardMap[shard]; std::unordered_map<std::string, M>& vertexMap = _shardMap[shard];
auto it = vertexMap.find(key); auto it = vertexMap.find(key);
if (it != vertexMap.end()) { // more than one message if (it != vertexMap.end()) { // more than one message
_combiner->combine(vertexMap[key], data); _combiner->combine(vertexMap[key], data);
} else { // first message for this vertex } else { // first message for this vertex
vertexMap.emplace(key, data); vertexMap.emplace(key, data);
} }
if (this->_containedMessages++ > this->_batchSize) { if (this->_containedMessages++ > this->_batchSize) {
flushMessages(); flushMessages();
} }
@ -181,7 +181,7 @@ void CombiningOutCache<M>::flushMessages() {
VPackOptions options = VPackOptions::Defaults; VPackOptions options = VPackOptions::Defaults;
options.buildUnindexedArrays = true; options.buildUnindexedArrays = true;
options.buildUnindexedObjects = true; options.buildUnindexedObjects = true;
VPackBuilder package(&options); VPackBuilder package(&options);
package.openObject(); package.openObject();
package.add(Utils::messagesKey, VPackValue(VPackValueType::Array)); package.add(Utils::messagesKey, VPackValue(VPackValueType::Array));

View File

@ -27,10 +27,10 @@
#include "Cluster/ClusterInfo.h" #include "Cluster/ClusterInfo.h"
#include "VocBase/voc-types.h" #include "VocBase/voc-types.h"
#include "Pregel/GraphStore.h"
#include "Pregel/MessageCombiner.h" #include "Pregel/MessageCombiner.h"
#include "Pregel/MessageFormat.h" #include "Pregel/MessageFormat.h"
#include "Pregel/WorkerState.h" #include "Pregel/WorkerState.h"
#include "Pregel/GraphStore.h"
namespace arangodb { namespace arangodb {
namespace pregel { namespace pregel {
@ -45,66 +45,70 @@ class CombiningInCache;
template <typename M> template <typename M>
class ArrayInCache; class ArrayInCache;
template <typename M> template <typename M>
class OutCache { class OutCache {
protected: protected:
WorkerState const* _state; WorkerState const* _state;
MessageFormat<M> const* _format; MessageFormat<M> const* _format;
InCache<M>* _localCache; InCache<M>* _localCache;
std::string _baseUrl; std::string _baseUrl;
uint32_t _batchSize = 1000; uint32_t _batchSize = 1000;
/// @brief current number of vertices stored /// @brief current number of vertices stored
size_t _containedMessages = 0; size_t _containedMessages = 0;
size_t _sendMessages = 0; size_t _sendMessages = 0;
bool shouldFlushCache(); bool shouldFlushCache();
public: public:
OutCache(WorkerState* state, InCache<M>* cache); OutCache(WorkerState* state, InCache<M>* cache);
virtual ~OutCache() {}; virtual ~OutCache(){};
size_t sendMessageCount() const { return _sendMessages; } size_t sendMessageCount() const { return _sendMessages; }
uint32_t batchSize() const {return _batchSize;} uint32_t batchSize() const { return _batchSize; }
void setBatchSize(uint32_t bs) {_batchSize = bs;} void setBatchSize(uint32_t bs) { _batchSize = bs; }
virtual void clear() = 0; virtual void clear() = 0;
virtual void appendMessage(prgl_shard_t shard, std::string const& key, M const& data) = 0; virtual void appendMessage(prgl_shard_t shard, std::string const& key,
M const& data) = 0;
virtual void flushMessages() = 0; virtual void flushMessages() = 0;
}; };
template <typename M> template <typename M>
class ArrayOutCache : public OutCache<M> { class ArrayOutCache : public OutCache<M> {
/// @brief two stage map: shard -> vertice -> message /// @brief two stage map: shard -> vertice -> message
std::unordered_map<prgl_shard_t, std::unordered_map<prgl_shard_t,
std::unordered_map<std::string, std::vector<M>>> _shardMap; std::unordered_map<std::string, std::vector<M>>>
_shardMap;
public:
public:
ArrayOutCache(WorkerState* state, InCache<M>* cache) ArrayOutCache(WorkerState* state, InCache<M>* cache)
: OutCache<M>(state, cache) {} : OutCache<M>(state, cache) {}
~ArrayOutCache(); ~ArrayOutCache();
void clear() override; void clear() override;
void appendMessage(prgl_shard_t shard, std::string const& key, M const& data) override; void appendMessage(prgl_shard_t shard, std::string const& key,
M const& data) override;
void flushMessages() override; void flushMessages() override;
}; };
template <typename M> template <typename M>
class CombiningOutCache : public OutCache<M> { class CombiningOutCache : public OutCache<M> {
MessageCombiner<M> const* _combiner; MessageCombiner<M> const* _combiner;
/// @brief two stage map: shard -> vertice -> message /// @brief two stage map: shard -> vertice -> message
std::unordered_map<prgl_shard_t, std::unordered_map<std::string, M>> _shardMap; std::unordered_map<prgl_shard_t, std::unordered_map<std::string, M>>
_shardMap;
public:
public:
CombiningOutCache(WorkerState* state, CombiningInCache<M>* cache); CombiningOutCache(WorkerState* state, CombiningInCache<M>* cache);
~CombiningOutCache(); ~CombiningOutCache();
void clear() override; void clear() override;
void appendMessage(prgl_shard_t shard, std::string const& key, M const& data) override; void appendMessage(prgl_shard_t shard, std::string const& key,
M const& data) override;
void flushMessages() override; void flushMessages() override;
}; };
} }
} }
#endif #endif

View File

@ -57,10 +57,10 @@ class PregelFeature final : public application_features::ApplicationFeature {
void cleanup(uint64_t executionNumber); void cleanup(uint64_t executionNumber);
void cleanupAll(); void cleanupAll();
basics::ThreadPool* threadPool() {return _threadPool.get();} basics::ThreadPool* threadPool() { return _threadPool.get(); }
RecoveryManager* recoveryManager() {return _recoveryManager.get();} RecoveryManager* recoveryManager() { return _recoveryManager.get(); }
private: private:
Mutex _mutex; Mutex _mutex;
std::unique_ptr<RecoveryManager> _recoveryManager; std::unique_ptr<RecoveryManager> _recoveryManager;

View File

@ -88,27 +88,26 @@ void RecoveryManager::_monitorShard(CollectionID const& cid,
ShardID const& shard) { ShardID const& shard) {
std::function<bool(VPackSlice const& result)> listener = std::function<bool(VPackSlice const& result)> listener =
[this, shard](VPackSlice const& result) { [this, shard](VPackSlice const& result) {
MUTEX_LOCKER(guard, _lock);// we are editing _primaryServers MUTEX_LOCKER(guard, _lock); // we are editing _primaryServers
auto const& conductors = _listeners.find(shard); auto const& conductors = _listeners.find(shard);
if (conductors == _listeners.end()) { if (conductors == _listeners.end()) {
return false; return false;
} }
if (result.isArray()) { if (result.isArray()) {
if (result.length() > 0) { if (result.length() > 0) {
ServerID nextPrimary = result.at(0).copyString(); ServerID nextPrimary = result.at(0).copyString();
auto const& currentPrimary = _primaryServers.find(shard); auto const& currentPrimary = _primaryServers.find(shard);
if (currentPrimary != _primaryServers.end() if (currentPrimary != _primaryServers.end() &&
&& currentPrimary->second != nextPrimary) { currentPrimary->second != nextPrimary) {
_primaryServers[shard] = nextPrimary; _primaryServers[shard] = nextPrimary;
for (Conductor *cc : conductors->second) { for (Conductor* cc : conductors->second) {
cc->startRecovery(); cc->startRecovery();
} }
} }
} else { } else {
for (Conductor *cc : conductors->second) { for (Conductor* cc : conductors->second) {
cc->cancel(); cc->cancel();
} }
} }

View File

@ -23,56 +23,57 @@
#ifndef ARANGODB_PREGEL_RECOVERY_H #ifndef ARANGODB_PREGEL_RECOVERY_H
#define ARANGODB_PREGEL_RECOVERY_H 1 #define ARANGODB_PREGEL_RECOVERY_H 1
#include "Basics/Mutex.h"
#include "Cluster/ClusterInfo.h"
#include "Agency/AgencyComm.h"
#include "Agency/AgencyCallbackRegistry.h"
#include <velocypack/velocypack-aliases.h> #include <velocypack/velocypack-aliases.h>
#include <velocypack/vpack.h> #include <velocypack/vpack.h>
#include "Agency/AgencyCallbackRegistry.h"
#include "Agency/AgencyComm.h"
#include "Basics/Mutex.h"
#include "Cluster/ClusterInfo.h"
namespace arangodb { namespace arangodb {
namespace pregel { namespace pregel {
template<typename V, typename E> template <typename V, typename E>
class GraphStore; class GraphStore;
class Conductor; class Conductor;
class RecoveryManager { class RecoveryManager {
Mutex _lock; Mutex _lock;
AgencyComm _agency; AgencyComm _agency;
AgencyCallbackRegistry *_agencyCallbackRegistry;//weak AgencyCallbackRegistry* _agencyCallbackRegistry; // weak
std::map<ShardID, std::set<Conductor*>> _listeners; std::map<ShardID, std::set<Conductor*>> _listeners;
std::map<ShardID, ServerID> _primaryServers; std::map<ShardID, ServerID> _primaryServers;
std::map<ShardID, std::shared_ptr<AgencyCallback>> _agencyCallbacks; std::map<ShardID, std::shared_ptr<AgencyCallback>> _agencyCallbacks;
void _monitorShard(CollectionID const& cid, ShardID const& shard); void _monitorShard(CollectionID const& cid, ShardID const& shard);
public: public:
RecoveryManager(AgencyCallbackRegistry *registry); RecoveryManager(AgencyCallbackRegistry* registry);
~RecoveryManager(); ~RecoveryManager();
void monitorCollections(std::vector<std::shared_ptr<LogicalCollection>> const& collections, Conductor*); void monitorCollections(
std::vector<std::shared_ptr<LogicalCollection>> const& collections,
Conductor*);
void stopMonitoring(Conductor*); void stopMonitoring(Conductor*);
int filterGoodServers(std::vector<ServerID> const& servers, std::vector<ServerID> &goodServers); int filterGoodServers(std::vector<ServerID> const& servers,
//bool allServersAvailable(std::vector<ServerID> const& dbServers); std::vector<ServerID>& goodServers);
// bool allServersAvailable(std::vector<ServerID> const& dbServers);
}; };
class RecoveryWorker { class RecoveryWorker {
friend class RestPregelHandler; friend class RestPregelHandler;
std::map<ShardID, ServerID> _secondaries; std::map<ShardID, ServerID> _secondaries;
ServerID const* secondaryForShard(ShardID const& shard) {return nullptr;} ServerID const* secondaryForShard(ShardID const& shard) { return nullptr; }
//receivedBackupData(VPackSlice slice); // receivedBackupData(VPackSlice slice);
public: public:
template<typename V, typename E> template <typename V, typename E>
void replicateGraphData(GraphStore<V,E> *graphStore) {} void replicateGraphData(GraphStore<V, E>* graphStore) {}
void reloadPlanData() {_secondaries.clear();} void reloadPlanData() { _secondaries.clear(); }
}; };
} }
} }

View File

@ -23,23 +23,23 @@
#ifndef ARANGODB_PREGEL_STATISTICS_H #ifndef ARANGODB_PREGEL_STATISTICS_H
#define ARANGODB_PREGEL_STATISTICS_H 1 #define ARANGODB_PREGEL_STATISTICS_H 1
#include <velocypack/Slice.h>
#include <velocypack/Builder.h> #include <velocypack/Builder.h>
#include <velocypack/Slice.h>
#include <velocypack/velocypack-aliases.h> #include <velocypack/velocypack-aliases.h>
#include "Pregel/Utils.h" #include "Pregel/Utils.h"
namespace arangodb { namespace arangodb {
namespace pregel { namespace pregel {
struct WorkerStats { struct WorkerStats {
size_t activeCount = 0; size_t activeCount = 0;
size_t sendCount = 0; size_t sendCount = 0;
size_t receivedCount = 0; size_t receivedCount = 0;
double superstepRuntimeSecs= 0; double superstepRuntimeSecs = 0;
WorkerStats() {} WorkerStats() {}
WorkerStats(size_t a, size_t s, size_t r) : activeCount(a), sendCount(s), receivedCount(r) {} WorkerStats(size_t a, size_t s, size_t r)
: activeCount(a), sendCount(s), receivedCount(r) {}
void accumulate(WorkerStats const& other) { void accumulate(WorkerStats const& other) {
activeCount += other.activeCount; activeCount += other.activeCount;
@ -47,7 +47,7 @@ struct WorkerStats {
receivedCount += other.receivedCount; receivedCount += other.receivedCount;
superstepRuntimeSecs += other.superstepRuntimeSecs; superstepRuntimeSecs += other.superstepRuntimeSecs;
} }
void accumulate(VPackSlice statValues) { void accumulate(VPackSlice statValues) {
VPackSlice p = statValues.get(Utils::activeCountKey); VPackSlice p = statValues.get(Utils::activeCountKey);
if (p.isInteger()) { if (p.isInteger()) {
@ -73,13 +73,17 @@ struct WorkerStats {
b.add(Utils::receivedCountKey, VPackValue(receivedCount)); b.add(Utils::receivedCountKey, VPackValue(receivedCount));
b.add(Utils::superstepRuntimeKey, VPackValue(superstepRuntimeSecs)); b.add(Utils::superstepRuntimeKey, VPackValue(superstepRuntimeSecs));
} }
void reset() { void reset() {
activeCount = 0; activeCount = 0;
sendCount = 0; sendCount = 0;
receivedCount = 0; receivedCount = 0;
superstepRuntimeSecs = 0; superstepRuntimeSecs = 0;
} }
bool isDone() {
return activeCount == 0 && sendCount == receivedCount;
}
}; };
} }
} }

View File

@ -40,7 +40,7 @@ std::string const Utils::startExecutionPath = "startExecution";
std::string const Utils::finishedStartupPath = "finishedStartup"; std::string const Utils::finishedStartupPath = "finishedStartup";
std::string const Utils::prepareGSSPath = "prepareGSS"; std::string const Utils::prepareGSSPath = "prepareGSS";
std::string const Utils::startGSSPath = "startGSS"; std::string const Utils::startGSSPath = "startGSS";
std::string const Utils::finishedGSSPath = "finishedGSS"; std::string const Utils::finishedWorkerStepPath = "finishedStep";
std::string const Utils::cancelGSSPath = "cancelGSS"; std::string const Utils::cancelGSSPath = "cancelGSS";
std::string const Utils::messagesPath = "messages"; std::string const Utils::messagesPath = "messages";
std::string const Utils::finalizeExecutionPath = "finalizeExecution"; std::string const Utils::finalizeExecutionPath = "finalizeExecution";
@ -56,6 +56,7 @@ std::string const Utils::globalShardListKey = "globalShardList";
std::string const Utils::totalVertexCount = "vertexCount"; std::string const Utils::totalVertexCount = "vertexCount";
std::string const Utils::totalEdgeCount = "edgeCount"; std::string const Utils::totalEdgeCount = "edgeCount";
std::string const Utils::asyncMode = "async"; std::string const Utils::asyncMode = "async";
std::string const Utils::gssDone = "gssDone";
std::string const Utils::coordinatorIdKey = "coordinatorId"; std::string const Utils::coordinatorIdKey = "coordinatorId";
std::string const Utils::algorithmKey = "algorithm"; std::string const Utils::algorithmKey = "algorithm";
@ -73,7 +74,6 @@ std::string const Utils::receivedCountKey = "receivedCount";
std::string const Utils::sendCountKey = "sendCount"; std::string const Utils::sendCountKey = "sendCount";
std::string const Utils::superstepRuntimeKey = "superstepRuntime"; std::string const Utils::superstepRuntimeKey = "superstepRuntime";
std::string const Utils::userParametersKey = "userparams"; std::string const Utils::userParametersKey = "userparams";
std::string Utils::baseUrl(std::string dbName) { std::string Utils::baseUrl(std::string dbName) {

View File

@ -45,7 +45,7 @@ class Utils {
static std::string const finishedStartupPath; static std::string const finishedStartupPath;
static std::string const prepareGSSPath; static std::string const prepareGSSPath;
static std::string const startGSSPath; static std::string const startGSSPath;
static std::string const finishedGSSPath; static std::string const finishedWorkerStepPath;
static std::string const cancelGSSPath; static std::string const cancelGSSPath;
static std::string const messagesPath; static std::string const messagesPath;
static std::string const finalizeExecutionPath; static std::string const finalizeExecutionPath;
@ -63,6 +63,7 @@ class Utils {
static std::string const totalVertexCount; static std::string const totalVertexCount;
static std::string const totalEdgeCount; static std::string const totalEdgeCount;
static std::string const asyncMode; static std::string const asyncMode;
static std::string const gssDone;
static std::string const globalSuperstepKey; static std::string const globalSuperstepKey;
static std::string const messagesKey; static std::string const messagesKey;
@ -70,14 +71,13 @@ class Utils {
static std::string const recoveryMethodKey; static std::string const recoveryMethodKey;
static std::string const compensate; static std::string const compensate;
static std::string const rollback; static std::string const rollback;
static std::string const storeResultsKey; static std::string const storeResultsKey;
static std::string const aggregatorValuesKey; static std::string const aggregatorValuesKey;
static std::string const activeCountKey; static std::string const activeCountKey;
static std::string const receivedCountKey; static std::string const receivedCountKey;
static std::string const sendCountKey; static std::string const sendCountKey;
static std::string const superstepRuntimeKey; static std::string const superstepRuntimeKey;
// User parameters // User parameters
static std::string const userParametersKey; static std::string const userParametersKey;
@ -88,11 +88,9 @@ class Utils {
static int64_t countDocuments(TRI_vocbase_t* vocbase, static int64_t countDocuments(TRI_vocbase_t* vocbase,
std::string const& collection); std::string const& collection);
static std::shared_ptr<LogicalCollection> resolveCollection( static std::shared_ptr<LogicalCollection> resolveCollection(
std::string const& database, std::string const& database, std::string const& collectionName,
std::string const& collectionName,
std::map<std::string, std::string> const& collectionPlanIdMap); std::map<std::string, std::string> const& collectionPlanIdMap);
static void resolveShard(LogicalCollection* info, static void resolveShard(LogicalCollection* info, std::string const& shardKey,
std::string const& shardKey,
std::string const& vertexKey, std::string const& vertexKey,
std::string& responsibleShard); std::string& responsibleShard);
}; };

View File

@ -35,7 +35,7 @@ namespace pregel {
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
class Worker; class Worker;
class Aggregator; class Aggregator;
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
class VertexContext { class VertexContext {
friend class Worker<V, E, M>; friend class Worker<V, E, M>;
@ -43,17 +43,16 @@ class VertexContext {
uint64_t _gss = 0; uint64_t _gss = 0;
WorkerContext* _context; WorkerContext* _context;
GraphStore<V, E>* _graphStore; GraphStore<V, E>* _graphStore;
const AggregatorUsage* _conductorAggregators; const AggregatorHandler* _conductorAggregators;
AggregatorUsage* _workerAggregators; AggregatorHandler* _workerAggregators;
VertexEntry* _vertexEntry; VertexEntry* _vertexEntry;
public: public:
template <typename T> template <typename T>
inline const T* getAggregatedValue(std::string const& name) { inline const T* getAggregatedValue(std::string const& name) {
return (const T*)_conductorAggregators->getAggregatedValue(name); return (const T*)_conductorAggregators->getAggregatedValue(name);
} }
template <typename T> template <typename T>
inline void aggregate(std::string const& name, const T* valuePtr) { inline void aggregate(std::string const& name, const T* valuePtr) {
_workerAggregators->aggregate(name, valuePtr); _workerAggregators->aggregate(name, valuePtr);
@ -61,47 +60,48 @@ class VertexContext {
inline WorkerContext const* context() { return _context; } inline WorkerContext const* context() { return _context; }
template<typename T> template <typename T>
T* mutableVertexData() { T* mutableVertexData() {
return (T*) _graphStore->mutableVertexData(_vertexEntry); return (T*)_graphStore->mutableVertexData(_vertexEntry);
} }
V vertexData() { return _graphStore->copyVertexData(_vertexEntry); } V vertexData() { return _graphStore->copyVertexData(_vertexEntry); }
RangeIterator<Edge<E>> getEdges() { return _graphStore->edgeIterator(_vertexEntry); } RangeIterator<Edge<E>> getEdges() {
return _graphStore->edgeIterator(_vertexEntry);
}
/// store data, will potentially move the data around /// store data, will potentially move the data around
void setVertexData(void const* ptr, size_t size) { void setVertexData(void const* ptr, size_t size) {
_graphStore->replaceVertexData(_vertexEntry, (void*)ptr, size); _graphStore->replaceVertexData(_vertexEntry, (void*)ptr, size);
} }
void voteHalt() {_vertexEntry->setActive(false); } void voteHalt() { _vertexEntry->setActive(false); }
void voteActive() {_vertexEntry->setActive(true);} void voteActive() { _vertexEntry->setActive(true); }
inline uint64_t globalSuperstep() const { return _gss; } inline uint64_t globalSuperstep() const { return _gss; }
}; };
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
class VertexComputation : public VertexContext<V, E, M> { class VertexComputation : public VertexContext<V, E, M> {
friend class Worker<V, E, M>; friend class Worker<V, E, M>;
OutCache<M>* _outgoing; OutCache<M>* _outgoing;
public:
public:
void sendMessage(Edge<E> const* edge, M const& data) { void sendMessage(Edge<E> const* edge, M const& data) {
_outgoing->appendMessage(edge->targetShard(), edge->toKey(), data); _outgoing->appendMessage(edge->targetShard(), edge->toKey(), data);
} }
virtual void compute(MessageIterator<M> const& messages) = 0; virtual void compute(MessageIterator<M> const& messages) = 0;
}; };
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
class VertexCompensation : public VertexContext<V, E, M> { class VertexCompensation : public VertexContext<V, E, M> {
friend class Worker<V, E, M>; friend class Worker<V, E, M>;
public: public:
virtual void compensate(bool inLostPartition) = 0; virtual void compensate(bool inLostPartition) = 0;
}; };
} }
} }
#endif #endif

View File

@ -47,12 +47,15 @@ template <typename V, typename E, typename M>
Worker<V, E, M>::Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo, Worker<V, E, M>::Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo,
VPackSlice initConfig) VPackSlice initConfig)
: _running(true), _state(vocbase->name(), initConfig), _algorithm(algo) { : _running(true), _state(vocbase->name(), initConfig), _algorithm(algo) {
VPackSlice userParams = initConfig.get(Utils::userParametersKey); VPackSlice userParams = initConfig.get(Utils::userParametersKey);
_workerContext.reset(algo->workerContext(userParams)); _workerContext.reset(algo->workerContext(userParams));
_messageFormat.reset(algo->messageFormat()); _messageFormat.reset(algo->messageFormat());
_messageCombiner.reset(algo->messageCombiner()); _messageCombiner.reset(algo->messageCombiner());
_conductorAggregators.reset(new AggregatorUsage(algo)); _conductorAggregators.reset(new AggregatorHandler(algo));
_workerAggregators.reset(new AggregatorUsage(algo)); _workerAggregators.reset(new AggregatorHandler(algo));
_graphStore.reset(new GraphStore<V, E>(vocbase, _algorithm->inputFormat()));
if (_messageCombiner) { if (_messageCombiner) {
_readCache.reset( _readCache.reset(
new CombiningInCache<M>(_messageFormat.get(), _messageCombiner.get())); new CombiningInCache<M>(_messageFormat.get(), _messageCombiner.get()));
@ -70,24 +73,24 @@ Worker<V, E, M>::Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo,
// of time. Therefore this is performed asynchronous // of time. Therefore this is performed asynchronous
ThreadPool* pool = PregelFeature::instance()->threadPool(); ThreadPool* pool = PregelFeature::instance()->threadPool();
pool->enqueue([this, vocbase, vc, ec] { pool->enqueue([this, vocbase, vc, ec] {
_graphStore.reset( _graphStore->loadShards(this->_state);
new GraphStore<V, E>(vocbase, _state, _algorithm->inputFormat()));
// execute the user defined startup code
if (_workerContext) { if (_workerContext) {
_workerContext->_conductorAggregators = _conductorAggregators.get(); _workerContext->_conductorAggregators = _conductorAggregators.get();
_workerContext->_workerAggregators = _workerAggregators.get(); _workerContext->_workerAggregators = _workerAggregators.get();
_workerContext->_vertexCount = vc; _workerContext->_vertexCount = vc;
_workerContext->_edgeCount = ec; _workerContext->_edgeCount = ec;
_workerContext->preApplication(); _workerContext->preApplication();
VPackBuilder package;
package.openObject();
package.add(Utils::senderKey,
VPackValue(ServerState::instance()->getId()));
package.add(Utils::executionNumberKey,
VPackValue(_state.executionNumber()));
package.close();
_callConductor(Utils::finishedStartupPath, package.slice());
} }
VPackBuilder package;
package.openObject();
package.add(Utils::senderKey, VPackValue(ServerState::instance()->getId()));
package.add(Utils::executionNumberKey,
VPackValue(_state.executionNumber()));
package.close();
_callConductor(Utils::finishedStartupPath, package.slice());
}); });
} }
@ -120,13 +123,13 @@ void Worker<V, E, M>::prepareGlobalStep(VPackSlice data) {
// clean up message caches, intialize gss // clean up message caches, intialize gss
_state._globalSuperstep = gss; _state._globalSuperstep = gss;
_swapIncomingCaches(); // write cache becomes the readable cache _swapIncomingCaches(); // write cache becomes the readable cache
// parse aggregated values from conductor _workerAggregators->resetValues();
_conductorAggregators->resetValues(); _conductorAggregators->resetValues();
// parse aggregated values from conductor
VPackSlice aggValues = data.get(Utils::aggregatorValuesKey); VPackSlice aggValues = data.get(Utils::aggregatorValuesKey);
if (aggValues.isObject()) { if (aggValues.isObject()) {
_conductorAggregators->aggregateValues(aggValues); _conductorAggregators->aggregateValues(aggValues);
} }
_workerAggregators->resetValues();
_superstepStats.reset(); // don't forget to reset before the superstep _superstepStats.reset(); // don't forget to reset before the superstep
// execute context // execute context
if (_workerContext != nullptr) { if (_workerContext != nullptr) {
@ -136,7 +139,7 @@ void Worker<V, E, M>::prepareGlobalStep(VPackSlice data) {
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
void Worker<V, E, M>::receivedMessages(VPackSlice data) { void Worker<V, E, M>::receivedMessages(VPackSlice data) {
//LOG(INFO) << "Worker received some messages: " << data.toJson(); // LOG(INFO) << "Worker received some messages: " << data.toJson();
VPackSlice gssSlice = data.get(Utils::globalSuperstepKey); VPackSlice gssSlice = data.get(Utils::globalSuperstepKey);
VPackSlice messageSlice = data.get(Utils::messagesKey); VPackSlice messageSlice = data.get(Utils::messagesKey);
@ -231,7 +234,7 @@ void Worker<V, E, M>::_executeGlobalStep(
outCache.reset(new ArrayOutCache<M>(&_state, inCache.get())); outCache.reset(new ArrayOutCache<M>(&_state, inCache.get()));
} }
AggregatorUsage workerAggregator(_algorithm.get()); AggregatorHandler workerAggregator(_algorithm.get());
// TODO look if we can avoid instantiating this // TODO look if we can avoid instantiating this
std::unique_ptr<VertexComputation<V, E, M>> vertexComputation( std::unique_ptr<VertexComputation<V, E, M>> vertexComputation(
@ -250,9 +253,9 @@ void Worker<V, E, M>::_executeGlobalStep(
vertexComputation->compute(messages); vertexComputation->compute(messages);
if (vertexEntry->active()) { if (vertexEntry->active()) {
activeCount++; activeCount++;
}/* else { } /* else {
LOG(INFO) << vertexEntry->key() << " vertex has halted"; LOG(INFO) << vertexEntry->key() << " vertex has halted";
}*/ }*/
} }
// TODO delete read messages immediatly // TODO delete read messages immediatly
// technically messages to non-existing vertices trigger // technically messages to non-existing vertices trigger
@ -280,7 +283,7 @@ void Worker<V, E, M>::_executeGlobalStep(
// called at the end of a worker thread, needs mutex // called at the end of a worker thread, needs mutex
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
void Worker<V, E, M>::_workerThreadDone(AggregatorUsage* threadAggregators, void Worker<V, E, M>::_workerThreadDone(AggregatorHandler* threadAggregators,
WorkerStats const& threadStats) { WorkerStats const& threadStats) {
MUTEX_LOCKER(guard, _threadMutex); // only one thread at a time MUTEX_LOCKER(guard, _threadMutex); // only one thread at a time
@ -314,18 +317,22 @@ void Worker<V, E, M>::_workerThreadDone(AggregatorUsage* threadAggregators,
_workerAggregators->serializeValues(package); _workerAggregators->serializeValues(package);
package.close(); package.close();
} }
_superstepStats.serializeValues(package); // add stats if (_superstepStats.isDone()) {
_superstepStats.serializeValues(package); // add stats
package.add(Utils::gssDone, VPackValue(true));
}
package.close(); package.close();
_workerAggregators->resetValues();
// TODO ask how to implement message sending without waiting for a response // TODO ask how to implement message sending without waiting for a response
// ============ Call Coordinator ============ // ============ Call Coordinator ============
_callConductor(Utils::finishedGSSPath, package.slice()); _callConductor(Utils::finishedWorkerStepPath, package.slice());
} }
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
void Worker<V, E, M>::finalizeExecution(VPackSlice body) { void Worker<V, E, M>::finalizeExecution(VPackSlice body) {
// Only expect serial calls from the conductor. // Only expect serial calls from the conductor.
//Lock to prevent malicous activity // Lock to prevent malicous activity
MUTEX_LOCKER(guard, _conductorMutex); MUTEX_LOCKER(guard, _conductorMutex);
_running = false; _running = false;
@ -356,7 +363,7 @@ void Worker<V, E, M>::finalizeExecution(VPackSlice body) {
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
void Worker<V, E, M>::startRecovery(VPackSlice data) { void Worker<V, E, M>::startRecovery(VPackSlice data) {
MUTEX_LOCKER(guard, _conductorMutex); MUTEX_LOCKER(guard, _conductorMutex);
_running = true; _running = true;
VPackSlice method = data.get(Utils::recoveryMethodKey); VPackSlice method = data.get(Utils::recoveryMethodKey);
if (method.compareString(Utils::compensate) == 0) { if (method.compareString(Utils::compensate) == 0) {
@ -372,8 +379,8 @@ void Worker<V, E, M>::startRecovery(VPackSlice data) {
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
void Worker<V, E, M>::compensateStep(VPackSlice data) { void Worker<V, E, M>::compensateStep(VPackSlice data) {
MUTEX_LOCKER(guard, _conductorMutex); MUTEX_LOCKER(guard, _conductorMutex);
_conductorAggregators->resetValues(); _conductorAggregators->resetValues();
VPackSlice aggValues = data.get(Utils::aggregatorValuesKey); VPackSlice aggValues = data.get(Utils::aggregatorValuesKey);
if (aggValues.isObject()) { if (aggValues.isObject()) {

View File

@ -25,11 +25,11 @@
#include "Basics/Common.h" #include "Basics/Common.h"
#include "Basics/Mutex.h" #include "Basics/Mutex.h"
#include "Pregel/AggregatorUsage.h" #include "Pregel/AggregatorHandler.h"
#include "Pregel/Algorithm.h" #include "Pregel/Algorithm.h"
#include "Pregel/Statistics.h"
#include "Pregel/WorkerContext.h" #include "Pregel/WorkerContext.h"
#include "Pregel/WorkerState.h" #include "Pregel/WorkerState.h"
#include "Pregel/Statistics.h"
struct TRI_vocbase_t; struct TRI_vocbase_t;
namespace arangodb { namespace arangodb {
@ -40,7 +40,7 @@ class IWorker {
public: public:
virtual ~IWorker(){}; virtual ~IWorker(){};
virtual void prepareGlobalStep(VPackSlice data) = 0; virtual void prepareGlobalStep(VPackSlice data) = 0;
virtual void startGlobalStep(VPackSlice data) = 0; // called by coordinator virtual void startGlobalStep(VPackSlice data) = 0; // called by coordinator
virtual void cancelGlobalStep(VPackSlice data) = 0; // called by coordinator virtual void cancelGlobalStep(VPackSlice data) = 0; // called by coordinator
virtual void receivedMessages(VPackSlice data) = 0; virtual void receivedMessages(VPackSlice data) = 0;
virtual void finalizeExecution(VPackSlice data) = 0; virtual void finalizeExecution(VPackSlice data) = 0;
@ -53,58 +53,62 @@ class GraphStore;
template <typename M> template <typename M>
class InCache; class InCache;
template <typename T> template <typename T>
class RangeIterator; class RangeIterator;
class VertexEntry; class VertexEntry;
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
class VertexContext; class VertexContext;
template <typename V, typename E, typename M> template <typename V, typename E, typename M>
class Worker : public IWorker { class Worker : public IWorker {
//friend class arangodb::RestPregelHandler; // friend class arangodb::RestPregelHandler;
bool _running = true; bool _running = true;
WorkerState _state; WorkerState _state;
WorkerStats _workerStats; WorkerStats _workerStats;
uint64_t _expectedGSS = 0; uint64_t _expectedGSS = 0;
std::unique_ptr<Algorithm<V, E, M>> _algorithm; std::unique_ptr<Algorithm<V, E, M>> _algorithm;
std::unique_ptr<WorkerContext> _workerContext; std::unique_ptr<WorkerContext> _workerContext;
Mutex _conductorMutex;// locks callbak methods Mutex _conductorMutex; // locks callbak methods
mutable Mutex _threadMutex;// locks _workerThreadDone mutable Mutex _threadMutex; // locks _workerThreadDone
// only valid while recovering to determine the offset // only valid while recovering to determine the offset
// where new vertices were inserted // where new vertices were inserted
size_t _preRecoveryTotal; size_t _preRecoveryTotal;
std::unique_ptr<AggregatorHandler> _conductorAggregators;
std::unique_ptr<AggregatorHandler> _workerAggregators;
std::unique_ptr<GraphStore<V, E>> _graphStore; std::unique_ptr<GraphStore<V, E>> _graphStore;
std::unique_ptr<InCache<M>> _readCache, _writeCache, _nextPhase;
std::unique_ptr<AggregatorUsage> _conductorAggregators;
std::unique_ptr<AggregatorUsage> _workerAggregators;
std::unique_ptr<MessageFormat<M>> _messageFormat; std::unique_ptr<MessageFormat<M>> _messageFormat;
std::unique_ptr<MessageCombiner<M>> _messageCombiner; std::unique_ptr<MessageCombiner<M>> _messageCombiner;
// from previous or current superstep
std::unique_ptr<InCache<M>> _readCache;
// for the current or next superstep
std::unique_ptr<InCache<M>> _writeCache;
// intended for the next superstep phase
std::unique_ptr<InCache<M>> _nextPhase;
WorkerStats _superstepStats; WorkerStats _superstepStats;
size_t _runningThreads; size_t _runningThreads;
void _swapIncomingCaches() { void _swapIncomingCaches() {
_readCache.swap(_writeCache); _readCache.swap(_writeCache);
_writeCache->clear(); _writeCache->clear();
} }
void _initializeVertexContext(VertexContext<V, E, M> *ctx); void _initializeVertexContext(VertexContext<V, E, M>* ctx);
void _executeGlobalStep(RangeIterator<VertexEntry> &vertexIterator); void _executeGlobalStep(RangeIterator<VertexEntry>& vertexIterator);
void _workerThreadDone(AggregatorUsage *threadAggregators, void _workerThreadDone(AggregatorHandler* threadAggregators,
WorkerStats const& threadStats); WorkerStats const& threadStats);
void _callConductor(std::string path, VPackSlice message); void _callConductor(std::string path, VPackSlice message);
public: public:
Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algorithm, Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algorithm,
VPackSlice params); VPackSlice params);
~Worker(); ~Worker();
// ====== called by rest handler ===== // ====== called by rest handler =====
void prepareGlobalStep(VPackSlice data) override; void prepareGlobalStep(VPackSlice data) override;
void startGlobalStep(VPackSlice data) override; void startGlobalStep(VPackSlice data) override;

View File

@ -25,8 +25,8 @@
#include <velocypack/Slice.h> #include <velocypack/Slice.h>
#include <velocypack/velocypack-aliases.h> #include <velocypack/velocypack-aliases.h>
#include "Pregel/AggregatorUsage.h"
#include "Basics/Common.h" #include "Basics/Common.h"
#include "Pregel/AggregatorHandler.h"
#include "Pregel/Utils.h" #include "Pregel/Utils.h"
namespace arangodb { namespace arangodb {
@ -37,8 +37,8 @@ class WorkerContext {
friend class Worker; friend class Worker;
uint64_t _vertexCount, _edgeCount; uint64_t _vertexCount, _edgeCount;
const AggregatorUsage* _conductorAggregators; const AggregatorHandler* _conductorAggregators;
AggregatorUsage* _workerAggregators; AggregatorHandler* _workerAggregators;
protected: protected:
template <typename T> template <typename T>
@ -55,9 +55,9 @@ class WorkerContext {
virtual void preGlobalSuperstep(uint64_t gss){}; virtual void preGlobalSuperstep(uint64_t gss){};
virtual void postGlobalSuperstep(uint64_t gss){}; virtual void postGlobalSuperstep(uint64_t gss){};
virtual void postApplication(){}; virtual void postApplication(){};
public: public:
WorkerContext(VPackSlice params) {}; WorkerContext(VPackSlice params){};
inline uint64_t vertexCount() const { return _vertexCount; } inline uint64_t vertexCount() const { return _vertexCount; }

View File

@ -35,6 +35,7 @@ WorkerState::WorkerState(DatabaseID dbname, VPackSlice params)
VPackSlice execNum = params.get(Utils::executionNumberKey); VPackSlice execNum = params.get(Utils::executionNumberKey);
VPackSlice collectionPlanIdMap = params.get(Utils::collectionPlanIdMapKey); VPackSlice collectionPlanIdMap = params.get(Utils::collectionPlanIdMapKey);
VPackSlice globalShards = params.get(Utils::globalShardListKey); VPackSlice globalShards = params.get(Utils::globalShardListKey);
//VPackSlice userParams = params.get(Utils::userParametersKey);
if (!coordID.isString() || !edgeShardMap.isObject() || if (!coordID.isString() || !edgeShardMap.isObject() ||
!vertexShardMap.isObject() || !execNum.isInteger() || !vertexShardMap.isObject() || !execNum.isInteger() ||
!collectionPlanIdMap.isObject() || !globalShards.isArray()) { !collectionPlanIdMap.isObject() || !globalShards.isArray()) {

View File

@ -23,8 +23,8 @@
#ifndef ARANGODB_PREGEL_WORKER_STATE_H #ifndef ARANGODB_PREGEL_WORKER_STATE_H
#define ARANGODB_PREGEL_WORKER_STATE_H 1 #define ARANGODB_PREGEL_WORKER_STATE_H 1
#include <algorithm>
#include <velocypack/velocypack-aliases.h> #include <velocypack/velocypack-aliases.h>
#include <algorithm>
#include "Basics/Common.h" #include "Basics/Common.h"
#include "Cluster/ClusterInfo.h" #include "Cluster/ClusterInfo.h"
@ -48,25 +48,30 @@ class WorkerState {
inline uint64_t executionNumber() const { return _executionNumber; } inline uint64_t executionNumber() const { return _executionNumber; }
inline uint64_t globalSuperstep() const { return _globalSuperstep; } inline uint64_t globalSuperstep() const { return _globalSuperstep; }
inline bool asynchronousMode() const {return _asynchronousMode;} inline uint64_t localSuperstep() const { return _localSuperstep; }
inline bool asynchronousMode() const { return _asynchronousMode; }
inline std::string const& coordinatorId() const { return _coordinatorId; } inline std::string const& coordinatorId() const { return _coordinatorId; }
inline std::string const& database() const { return _database; } inline std::string const& database() const { return _database; }
inline std::map<CollectionID, std::vector<ShardID>> const& vertexCollectionShards() const { inline std::map<CollectionID, std::vector<ShardID>> const&
vertexCollectionShards() const {
return _vertexCollectionShards; return _vertexCollectionShards;
} }
inline std::map<CollectionID, std::vector<ShardID>> const& edgeCollectionShards() const { inline std::map<CollectionID, std::vector<ShardID>> const&
edgeCollectionShards() const {
return _edgeCollectionShards; return _edgeCollectionShards;
} }
inline std::map<CollectionID, std::string> const& collectionPlanIdMap() const { inline std::map<CollectionID, std::string> const& collectionPlanIdMap()
const {
return _collectionPlanIdMap; return _collectionPlanIdMap;
}; };
// same content on every worker, has to stay equal!!!! // same content on every worker, has to stay equal!!!!
inline std::vector<ShardID> const& globalShardIDs() const { inline std::vector<ShardID> const& globalShardIDs() const {
return _globalShardIDs; return _globalShardIDs;
@ -83,30 +88,33 @@ class WorkerState {
return _localEdgeShardIDs; return _localEdgeShardIDs;
}; };
inline size_t shardId(ShardID const& responsibleShard) const { inline size_t shardId(ShardID const& responsibleShard) const {
auto it = std::find(_globalShardIDs.begin(), _globalShardIDs.end(), responsibleShard); auto it = std::find(_globalShardIDs.begin(), _globalShardIDs.end(),
return it != _globalShardIDs.end() ? it - _globalShardIDs.begin() : (uint16_t)-1; responsibleShard);
return it != _globalShardIDs.end() ? it - _globalShardIDs.begin()
: (uint16_t)-1;
} }
// index in globalShardIDs // index in globalShardIDs
inline bool isLocalVertexShard(size_t shardIndex) const { inline bool isLocalVertexShard(size_t shardIndex) const {
// TODO cache this? prob small // TODO cache this? prob small
ShardID const& shard = _globalShardIDs[shardIndex]; ShardID const& shard = _globalShardIDs[shardIndex];
return std::find(_localVertexShardIDs.begin(), _localVertexShardIDs.end(), shard) return std::find(_localVertexShardIDs.begin(), _localVertexShardIDs.end(),
!= _localVertexShardIDs.end(); shard) != _localVertexShardIDs.end();
} }
private: private:
uint64_t _executionNumber = 0; uint64_t _executionNumber = 0;
uint64_t _globalSuperstep = 0; uint64_t _globalSuperstep = 0;
uint64_t _localSuperstep = 0;
bool _asynchronousMode = false; bool _asynchronousMode = false;
// uint64_t _numWorkerThreads = 1;
std::string _coordinatorId; std::string _coordinatorId;
std::string _database; std::string _database;
std::vector<ShardID> _globalShardIDs; std::vector<ShardID> _globalShardIDs;
std::vector<ShardID> _localVertexShardIDs, _localEdgeShardIDs; std::vector<ShardID> _localVertexShardIDs, _localEdgeShardIDs;
std::map<CollectionID, std::vector<ShardID>> _vertexCollectionShards, _edgeCollectionShards; std::map<CollectionID, std::vector<ShardID>> _vertexCollectionShards,
_edgeCollectionShards;
std::map<std::string, std::string> _collectionPlanIdMap; std::map<std::string, std::string> _collectionPlanIdMap;
}; };
} }

View File

@ -51,7 +51,7 @@ module.exports = function (gname, filename) {
graph[eColl].save(vColl+"/"+parts[0], graph[eColl].save(vColl+"/"+parts[0],
vColl+"/"+parts[1], vColl+"/"+parts[1],
{_vertex:parts[0], value:-1}); {_vertex:parts[0]});
} }
}); });
}; };

View File

@ -118,10 +118,10 @@ RestStatus RestPregelHandler::execute() {
if (exe) { if (exe) {
exe->receivedMessages(body); exe->receivedMessages(body);
} }
} else if (suffix[0] == Utils::finishedGSSPath) { } else if (suffix[0] == Utils::finishedWorkerStepPath) {
Conductor *exe = PregelFeature::instance()->conductor(executionNumber); Conductor *exe = PregelFeature::instance()->conductor(executionNumber);
if (exe) { if (exe) {
exe->finishedGlobalStep(body); exe->finishedWorkerStep(body);
} else { } else {
LOG(ERR) << "Conductor not found: " << executionNumber; LOG(ERR) << "Conductor not found: " << executionNumber;
} }