mirror of https://gitee.com/bigwinds/arangodb
reorganization
This commit is contained in:
parent
3a4a07f13d
commit
63f87fbf52
|
@ -111,6 +111,12 @@ set(SKIP_INSTALL_ALL ON)
|
||||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/snappy/google-snappy-d53de18)
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/snappy/google-snappy-d53de18)
|
||||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/rocksdb)
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/rocksdb)
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
## LIBCUCKOO
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libcuckoo/)
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
## LINK_DIRECTORIES
|
## LINK_DIRECTORIES
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
*.a
|
||||||
|
*.in
|
||||||
|
*.la
|
||||||
|
*.lo
|
||||||
|
*.log
|
||||||
|
*.o
|
||||||
|
*.out
|
||||||
|
*.trs
|
||||||
|
*~
|
||||||
|
.DS_Store
|
||||||
|
.deps
|
||||||
|
.libs
|
||||||
|
Makefile
|
||||||
|
aclocal.m4
|
||||||
|
autom4te.cache
|
||||||
|
cityhash_unittest
|
||||||
|
compile
|
||||||
|
config.guess
|
||||||
|
config.h
|
||||||
|
config.log
|
||||||
|
config.status
|
||||||
|
config.sub
|
||||||
|
config.sub
|
||||||
|
configure
|
||||||
|
depcomp
|
||||||
|
depcomp
|
||||||
|
examples/count_freq
|
||||||
|
examples/hellohash
|
||||||
|
examples/nested_table
|
||||||
|
install-sh
|
||||||
|
libtool
|
||||||
|
libtool.m4
|
||||||
|
lt*.m4
|
||||||
|
ltmain.sh
|
||||||
|
missing
|
||||||
|
stamp-h1
|
||||||
|
test-driver
|
|
@ -0,0 +1,5 @@
|
||||||
|
cmake_minimum_required (VERSION 2.8)
|
||||||
|
|
||||||
|
include_directories("${PROJECT_BINARY_DIR}/include/")
|
||||||
|
#target_link_libraries(LIBCUCKOO pthread)
|
||||||
|
add_library(libcuckoo include/cityhash/city.cc)
|
|
@ -0,0 +1,18 @@
|
||||||
|
Copyright (C) 2013, Carnegie Mellon University and Intel Corporation
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
CityHash (lib/city.h, lib/city.cc) is Copyright (c) Google, Inc. and
|
||||||
|
has its own license, as detailed in the source files.
|
|
@ -0,0 +1,107 @@
|
||||||
|
Note to existing users: the iterator implementation has changed significantly
|
||||||
|
since we introduced the `locked_table` in [this
|
||||||
|
commit](https://github.com/efficient/libcuckoo/commit/2bedb3d0c811cd8b3adb3e78e2d2a28c66ba1d1d).
|
||||||
|
Please see the [`locked_table`
|
||||||
|
documentation](http://efficient.github.io/libcuckoo/classcuckoohash__map_1_1locked__table.html)
|
||||||
|
and [examples
|
||||||
|
directory](https://github.com/efficient/libcuckoo/tree/master/examples) for
|
||||||
|
information and examples of how to use iterators.
|
||||||
|
|
||||||
|
libcuckoo
|
||||||
|
=========
|
||||||
|
|
||||||
|
libcuckoo provides a high-performance, compact hash table that allows
|
||||||
|
multiple concurrent reader and writer threads.
|
||||||
|
|
||||||
|
The Doxygen-generated documentation is available at the
|
||||||
|
[project page](http://efficient.github.io/libcuckoo/).
|
||||||
|
|
||||||
|
Authors: Manu Goyal, Bin Fan, Xiaozhou Li, David G. Andersen, and Michael Kaminsky
|
||||||
|
|
||||||
|
For details about this algorithm and citations, please refer to
|
||||||
|
our papers in [NSDI 2013][1] and [EuroSys 2014][2]. Some of the details of the hashing
|
||||||
|
algorithm have been improved since that work (e.g., the previous algorithm
|
||||||
|
in [1] serializes all writer threads, while our current
|
||||||
|
implementation supports multiple concurrent writers), however, and this source
|
||||||
|
code is now the definitive reference.
|
||||||
|
|
||||||
|
[1]: http://www.cs.cmu.edu/~dga/papers/memc3-nsdi2013.pdf "MemC3: Compact and Concurrent Memcache with Dumber Caching and Smarter Hashing"
|
||||||
|
[2]: http://www.cs.princeton.edu/~mfreed/docs/cuckoo-eurosys14.pdf "Algorithmic Improvements for Fast Concurrent Cuckoo Hashing"
|
||||||
|
|
||||||
|
Requirements
|
||||||
|
================
|
||||||
|
|
||||||
|
This library has been tested on Mac OSX >= 10.8 and Ubuntu >= 12.04.
|
||||||
|
|
||||||
|
It compiles with clang++ >= 3.3 and g++ >= 4.7, however we strongly suggest
|
||||||
|
using the latest versions of both compilers, as they have greatly improved
|
||||||
|
support for atomic operations. Building the library requires the
|
||||||
|
autotools. Install them on Ubuntu
|
||||||
|
|
||||||
|
$ sudo apt-get update && sudo apt-get install build-essential autoconf libtool
|
||||||
|
|
||||||
|
Building
|
||||||
|
==========
|
||||||
|
|
||||||
|
$ autoreconf -fis
|
||||||
|
$ ./configure
|
||||||
|
$ make
|
||||||
|
$ make install
|
||||||
|
|
||||||
|
Usage
|
||||||
|
==========
|
||||||
|
|
||||||
|
To build a program with the hash table, include
|
||||||
|
`libcuckoo/cuckoohash_map.hh` into your source file. If you want to
|
||||||
|
use CityHash, which we recommend, we have provided a wrapper
|
||||||
|
compatible with the `std::hash` type around it in the
|
||||||
|
`libcuckoo/city_hasher.hh` file. If compiling with CityHash, add the
|
||||||
|
`-lcityhash` flag. You must also enable C++11 features on your
|
||||||
|
compiler. Compiling the file `examples/count_freq.cc` with g++
|
||||||
|
might look like this:
|
||||||
|
|
||||||
|
$ g++ -std=c++11 examples/count_freq.cc -lcityhash
|
||||||
|
|
||||||
|
The
|
||||||
|
[examples directory](https://github.com/efficient/libcuckoo/tree/master/examples)
|
||||||
|
contains some simple demonstrations of some of the basic features of the hash
|
||||||
|
table.
|
||||||
|
|
||||||
|
Tests
|
||||||
|
==========
|
||||||
|
|
||||||
|
The [tests directory](https://github.com/efficient/libcuckoo/tree/master/tests)
|
||||||
|
directory contains a number of tests and benchmarks of the hash table, which
|
||||||
|
also can serve as useful examples of how to use the table's various features.
|
||||||
|
After running `make all`, the entire test suite can be run with the `make check`
|
||||||
|
command. This will not run the benchmarks, which must be run individually. The
|
||||||
|
test executables, which have the suffix `.out`, can be run individually as well.
|
||||||
|
|
||||||
|
Issue Report
|
||||||
|
============
|
||||||
|
|
||||||
|
To let us know your questions or issues, we recommend you
|
||||||
|
[report an issue](https://github.com/efficient/libcuckoo/issues) on
|
||||||
|
github. You can also email us at
|
||||||
|
[libcuckoo-dev@googlegroups.com](mailto:libcuckoo-dev@googlegroups.com).
|
||||||
|
|
||||||
|
Licence
|
||||||
|
===========
|
||||||
|
Copyright (C) 2013, Carnegie Mellon University and Intel Corporation
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
CityHash (lib/city.h, lib/city.cc) is Copyright (c) Google, Inc. and
|
||||||
|
has its own license, as detailed in the source files.
|
|
@ -0,0 +1,19 @@
|
||||||
|
// Copyright (c) 2011 Google, Inc.
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files (the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions:
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in
|
||||||
|
// all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
// THE SOFTWARE.
|
|
@ -0,0 +1,627 @@
|
||||||
|
// Copyright (c) 2011 Google, Inc.
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files (the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions:
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in
|
||||||
|
// all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
// THE SOFTWARE.
|
||||||
|
//
|
||||||
|
// CityHash, by Geoff Pike and Jyrki Alakuijala
|
||||||
|
//
|
||||||
|
// This file provides CityHash64() and related functions.
|
||||||
|
//
|
||||||
|
// It's probably possible to create even faster hash functions by
|
||||||
|
// writing a program that systematically explores some of the space of
|
||||||
|
// possible hash functions, by using SIMD instructions, or by
|
||||||
|
// compromising on hash quality.
|
||||||
|
|
||||||
|
#include "city.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <string.h> // for memcpy and memset
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
static uint64 UNALIGNED_LOAD64(const char *p) {
|
||||||
|
uint64 result;
|
||||||
|
memcpy(&result, p, sizeof(result));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32 UNALIGNED_LOAD32(const char *p) {
|
||||||
|
uint32 result;
|
||||||
|
memcpy(&result, p, sizeof(result));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#define bswap_32(x) _byteswap_ulong(x)
|
||||||
|
#define bswap_64(x) _byteswap_uint64(x)
|
||||||
|
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
|
||||||
|
// Mac OS X / Darwin features
|
||||||
|
#include <libkern/OSByteOrder.h>
|
||||||
|
#define bswap_32(x) OSSwapInt32(x)
|
||||||
|
#define bswap_64(x) OSSwapInt64(x)
|
||||||
|
|
||||||
|
#elif defined(__NetBSD__)
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <machine/bswap.h>
|
||||||
|
#if defined(__BSWAP_RENAME) && !defined(__bswap_32)
|
||||||
|
#define bswap_32(x) bswap32(x)
|
||||||
|
#define bswap_64(x) bswap64(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include <byteswap.h>
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef WORDS_BIGENDIAN
|
||||||
|
#define uint32_in_expected_order(x) (bswap_32(x))
|
||||||
|
#define uint64_in_expected_order(x) (bswap_64(x))
|
||||||
|
#else
|
||||||
|
#define uint32_in_expected_order(x) (x)
|
||||||
|
#define uint64_in_expected_order(x) (x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(LIKELY)
|
||||||
|
#if HAVE_BUILTIN_EXPECT
|
||||||
|
#define LIKELY(x) (__builtin_expect(!!(x), 1))
|
||||||
|
#else
|
||||||
|
#define LIKELY(x) (x)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static uint64 Fetch64(const char *p) {
|
||||||
|
return uint64_in_expected_order(UNALIGNED_LOAD64(p));
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32 Fetch32(const char *p) {
|
||||||
|
return uint32_in_expected_order(UNALIGNED_LOAD32(p));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some primes between 2^63 and 2^64 for various uses.
|
||||||
|
static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
|
||||||
|
static const uint64 k1 = 0xb492b66fbe98f273ULL;
|
||||||
|
static const uint64 k2 = 0x9ae16a3b2f90404fULL;
|
||||||
|
|
||||||
|
// Magic numbers for 32-bit hashing. Copied from Murmur3.
|
||||||
|
static const uint32_t c1 = 0xcc9e2d51;
|
||||||
|
static const uint32_t c2 = 0x1b873593;
|
||||||
|
|
||||||
|
// A 32-bit to 32-bit integer hash copied from Murmur3.
|
||||||
|
static uint32 fmix(uint32 h)
|
||||||
|
{
|
||||||
|
h ^= h >> 16;
|
||||||
|
h *= 0x85ebca6b;
|
||||||
|
h ^= h >> 13;
|
||||||
|
h *= 0xc2b2ae35;
|
||||||
|
h ^= h >> 16;
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32 Rotate32(uint32 val, int shift) {
|
||||||
|
// Avoid shifting by 32: doing so yields an undefined result.
|
||||||
|
return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef PERMUTE3
|
||||||
|
#define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0)
|
||||||
|
|
||||||
|
static uint32 Mur(uint32 a, uint32 h) {
|
||||||
|
// Helper from Murmur3 for combining two 32-bit values.
|
||||||
|
a *= c1;
|
||||||
|
a = Rotate32(a, 17);
|
||||||
|
a *= c2;
|
||||||
|
h ^= a;
|
||||||
|
h = Rotate32(h, 19);
|
||||||
|
return h * 5 + 0xe6546b64;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32 Hash32Len13to24(const char *s, size_t len) {
|
||||||
|
uint32 a = Fetch32(s - 4 + (len >> 1));
|
||||||
|
uint32 b = Fetch32(s + 4);
|
||||||
|
uint32 c = Fetch32(s + len - 8);
|
||||||
|
uint32 d = Fetch32(s + (len >> 1));
|
||||||
|
uint32 e = Fetch32(s);
|
||||||
|
uint32 f = Fetch32(s + len - 4);
|
||||||
|
uint32 h = len;
|
||||||
|
|
||||||
|
return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32 Hash32Len0to4(const char *s, size_t len) {
|
||||||
|
uint32 b = 0;
|
||||||
|
uint32 c = 9;
|
||||||
|
for (uint32 i = 0; i < len; i++) {
|
||||||
|
signed char v = s[i];
|
||||||
|
b = b * c1 + v;
|
||||||
|
c ^= b;
|
||||||
|
}
|
||||||
|
return fmix(Mur(b, Mur(len, c)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32 Hash32Len5to12(const char *s, size_t len) {
|
||||||
|
uint32 a = len, b = len * 5, c = 9, d = b;
|
||||||
|
a += Fetch32(s);
|
||||||
|
b += Fetch32(s + len - 4);
|
||||||
|
c += Fetch32(s + ((len >> 1) & 4));
|
||||||
|
return fmix(Mur(c, Mur(b, Mur(a, d))));
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32 CityHash32(const char *s, size_t len) {
|
||||||
|
if (len <= 24) {
|
||||||
|
return len <= 12 ?
|
||||||
|
(len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
|
||||||
|
Hash32Len13to24(s, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
// len > 24
|
||||||
|
uint32 h = len, g = c1 * len, f = g;
|
||||||
|
uint32 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
|
||||||
|
uint32 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
|
||||||
|
uint32 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
|
||||||
|
uint32 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
|
||||||
|
uint32 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
|
||||||
|
h ^= a0;
|
||||||
|
h = Rotate32(h, 19);
|
||||||
|
h = h * 5 + 0xe6546b64;
|
||||||
|
h ^= a2;
|
||||||
|
h = Rotate32(h, 19);
|
||||||
|
h = h * 5 + 0xe6546b64;
|
||||||
|
g ^= a1;
|
||||||
|
g = Rotate32(g, 19);
|
||||||
|
g = g * 5 + 0xe6546b64;
|
||||||
|
g ^= a3;
|
||||||
|
g = Rotate32(g, 19);
|
||||||
|
g = g * 5 + 0xe6546b64;
|
||||||
|
f += a4;
|
||||||
|
f = Rotate32(f, 19);
|
||||||
|
f = f * 5 + 0xe6546b64;
|
||||||
|
size_t iters = (len - 1) / 20;
|
||||||
|
do {
|
||||||
|
uint32 a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
|
||||||
|
uint32 a1 = Fetch32(s + 4);
|
||||||
|
uint32 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
|
||||||
|
uint32 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
|
||||||
|
uint32 a4 = Fetch32(s + 16);
|
||||||
|
h ^= a0;
|
||||||
|
h = Rotate32(h, 18);
|
||||||
|
h = h * 5 + 0xe6546b64;
|
||||||
|
f += a1;
|
||||||
|
f = Rotate32(f, 19);
|
||||||
|
f = f * c1;
|
||||||
|
g += a2;
|
||||||
|
g = Rotate32(g, 18);
|
||||||
|
g = g * 5 + 0xe6546b64;
|
||||||
|
h ^= a3 + a1;
|
||||||
|
h = Rotate32(h, 19);
|
||||||
|
h = h * 5 + 0xe6546b64;
|
||||||
|
g ^= a4;
|
||||||
|
g = bswap_32(g) * 5;
|
||||||
|
h += a4 * 5;
|
||||||
|
h = bswap_32(h);
|
||||||
|
f += a0;
|
||||||
|
PERMUTE3(f, h, g);
|
||||||
|
s += 20;
|
||||||
|
} while (--iters != 0);
|
||||||
|
g = Rotate32(g, 11) * c1;
|
||||||
|
g = Rotate32(g, 17) * c1;
|
||||||
|
f = Rotate32(f, 11) * c1;
|
||||||
|
f = Rotate32(f, 17) * c1;
|
||||||
|
h = Rotate32(h + g, 19);
|
||||||
|
h = h * 5 + 0xe6546b64;
|
||||||
|
h = Rotate32(h, 17) * c1;
|
||||||
|
h = Rotate32(h + f, 19);
|
||||||
|
h = h * 5 + 0xe6546b64;
|
||||||
|
h = Rotate32(h, 17) * c1;
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bitwise right rotate. Normally this will compile to a single
|
||||||
|
// instruction, especially if the shift is a manifest constant.
|
||||||
|
static uint64 Rotate(uint64 val, int shift) {
|
||||||
|
// Avoid shifting by 64: doing so yields an undefined result.
|
||||||
|
return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64 ShiftMix(uint64 val) {
|
||||||
|
return val ^ (val >> 47);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64 HashLen16(uint64 u, uint64 v) {
|
||||||
|
return Hash128to64(uint128(u, v));
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
|
||||||
|
// Murmur-inspired hashing.
|
||||||
|
uint64 a = (u ^ v) * mul;
|
||||||
|
a ^= (a >> 47);
|
||||||
|
uint64 b = (v ^ a) * mul;
|
||||||
|
b ^= (b >> 47);
|
||||||
|
b *= mul;
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64 HashLen0to16(const char *s, size_t len) {
|
||||||
|
if (len >= 8) {
|
||||||
|
uint64 mul = k2 + len * 2;
|
||||||
|
uint64 a = Fetch64(s) + k2;
|
||||||
|
uint64 b = Fetch64(s + len - 8);
|
||||||
|
uint64 c = Rotate(b, 37) * mul + a;
|
||||||
|
uint64 d = (Rotate(a, 25) + b) * mul;
|
||||||
|
return HashLen16(c, d, mul);
|
||||||
|
}
|
||||||
|
if (len >= 4) {
|
||||||
|
uint64 mul = k2 + len * 2;
|
||||||
|
uint64 a = Fetch32(s);
|
||||||
|
return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
|
||||||
|
}
|
||||||
|
if (len > 0) {
|
||||||
|
uint8 a = s[0];
|
||||||
|
uint8 b = s[len >> 1];
|
||||||
|
uint8 c = s[len - 1];
|
||||||
|
uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
|
||||||
|
uint32 z = len + (static_cast<uint32>(c) << 2);
|
||||||
|
return ShiftMix(y * k2 ^ z * k0) * k2;
|
||||||
|
}
|
||||||
|
return k2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This probably works well for 16-byte strings as well, but it may be overkill
|
||||||
|
// in that case.
|
||||||
|
static uint64 HashLen17to32(const char *s, size_t len) {
|
||||||
|
uint64 mul = k2 + len * 2;
|
||||||
|
uint64 a = Fetch64(s) * k1;
|
||||||
|
uint64 b = Fetch64(s + 8);
|
||||||
|
uint64 c = Fetch64(s + len - 8) * mul;
|
||||||
|
uint64 d = Fetch64(s + len - 16) * k2;
|
||||||
|
return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
|
||||||
|
a + Rotate(b + k2, 18) + c, mul);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return a 16-byte hash for 48 bytes. Quick and dirty.
|
||||||
|
// Callers do best to use "random-looking" values for a and b.
|
||||||
|
static pair<uint64, uint64> WeakHashLen32WithSeeds(
|
||||||
|
uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
|
||||||
|
a += w;
|
||||||
|
b = Rotate(b + a + z, 21);
|
||||||
|
uint64 c = a;
|
||||||
|
a += x;
|
||||||
|
a += y;
|
||||||
|
b += Rotate(a, 44);
|
||||||
|
return make_pair(a + z, b + c);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
|
||||||
|
static pair<uint64, uint64> WeakHashLen32WithSeeds(
|
||||||
|
const char* s, uint64 a, uint64 b) {
|
||||||
|
return WeakHashLen32WithSeeds(Fetch64(s),
|
||||||
|
Fetch64(s + 8),
|
||||||
|
Fetch64(s + 16),
|
||||||
|
Fetch64(s + 24),
|
||||||
|
a,
|
||||||
|
b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return an 8-byte hash for 33 to 64 bytes.
|
||||||
|
static uint64 HashLen33to64(const char *s, size_t len) {
|
||||||
|
uint64 mul = k2 + len * 2;
|
||||||
|
uint64 a = Fetch64(s) * k2;
|
||||||
|
uint64 b = Fetch64(s + 8);
|
||||||
|
uint64 c = Fetch64(s + len - 24);
|
||||||
|
uint64 d = Fetch64(s + len - 32);
|
||||||
|
uint64 e = Fetch64(s + 16) * k2;
|
||||||
|
uint64 f = Fetch64(s + 24) * 9;
|
||||||
|
uint64 g = Fetch64(s + len - 8);
|
||||||
|
uint64 h = Fetch64(s + len - 16) * mul;
|
||||||
|
uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
|
||||||
|
uint64 v = ((a + g) ^ d) + f + 1;
|
||||||
|
uint64 w = bswap_64((u + v) * mul) + h;
|
||||||
|
uint64 x = Rotate(e + f, 42) + c;
|
||||||
|
uint64 y = (bswap_64((v + w) * mul) + g) * mul;
|
||||||
|
uint64 z = e + f + c;
|
||||||
|
a = bswap_64((x + z) * mul + y) + b;
|
||||||
|
b = ShiftMix((z + a) * mul + d + h) * mul;
|
||||||
|
return b + x;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64 CityHash64(const char *s, size_t len) {
|
||||||
|
if (len <= 32) {
|
||||||
|
if (len <= 16) {
|
||||||
|
return HashLen0to16(s, len);
|
||||||
|
} else {
|
||||||
|
return HashLen17to32(s, len);
|
||||||
|
}
|
||||||
|
} else if (len <= 64) {
|
||||||
|
return HashLen33to64(s, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For strings over 64 bytes we hash the end first, and then as we
|
||||||
|
// loop we keep 56 bytes of state: v, w, x, y, and z.
|
||||||
|
uint64 x = Fetch64(s + len - 40);
|
||||||
|
uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
|
||||||
|
uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
|
||||||
|
pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
|
||||||
|
pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
|
||||||
|
x = x * k1 + Fetch64(s);
|
||||||
|
|
||||||
|
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
|
||||||
|
len = (len - 1) & ~static_cast<size_t>(63);
|
||||||
|
do {
|
||||||
|
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
|
||||||
|
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
|
||||||
|
x ^= w.second;
|
||||||
|
y += v.first + Fetch64(s + 40);
|
||||||
|
z = Rotate(z + w.first, 33) * k1;
|
||||||
|
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
|
||||||
|
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
|
||||||
|
std::swap(z, x);
|
||||||
|
s += 64;
|
||||||
|
len -= 64;
|
||||||
|
} while (len != 0);
|
||||||
|
return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
|
||||||
|
HashLen16(v.second, w.second) + x);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
|
||||||
|
return CityHash64WithSeeds(s, len, k2, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64 CityHash64WithSeeds(const char *s, size_t len,
|
||||||
|
uint64 seed0, uint64 seed1) {
|
||||||
|
return HashLen16(CityHash64(s, len) - seed0, seed1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
|
||||||
|
// of any length representable in signed long. Based on City and Murmur.
|
||||||
|
static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
|
||||||
|
uint64 a = Uint128Low64(seed);
|
||||||
|
uint64 b = Uint128High64(seed);
|
||||||
|
uint64 c = 0;
|
||||||
|
uint64 d = 0;
|
||||||
|
signed long l = len - 16;
|
||||||
|
if (l <= 0) { // len <= 16
|
||||||
|
a = ShiftMix(a * k1) * k1;
|
||||||
|
c = b * k1 + HashLen0to16(s, len);
|
||||||
|
d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
|
||||||
|
} else { // len > 16
|
||||||
|
c = HashLen16(Fetch64(s + len - 8) + k1, a);
|
||||||
|
d = HashLen16(b + len, c + Fetch64(s + len - 16));
|
||||||
|
a += d;
|
||||||
|
do {
|
||||||
|
a ^= ShiftMix(Fetch64(s) * k1) * k1;
|
||||||
|
a *= k1;
|
||||||
|
b ^= a;
|
||||||
|
c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
|
||||||
|
c *= k1;
|
||||||
|
d ^= c;
|
||||||
|
s += 16;
|
||||||
|
l -= 16;
|
||||||
|
} while (l > 0);
|
||||||
|
}
|
||||||
|
a = HashLen16(a, c);
|
||||||
|
b = HashLen16(d, b);
|
||||||
|
return uint128(a ^ b, HashLen16(b, a));
|
||||||
|
}
|
||||||
|
|
||||||
|
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
|
||||||
|
if (len < 128) {
|
||||||
|
return CityMurmur(s, len, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We expect len >= 128 to be the common case. Keep 56 bytes of state:
|
||||||
|
// v, w, x, y, and z.
|
||||||
|
pair<uint64, uint64> v, w;
|
||||||
|
uint64 x = Uint128Low64(seed);
|
||||||
|
uint64 y = Uint128High64(seed);
|
||||||
|
uint64 z = len * k1;
|
||||||
|
v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
|
||||||
|
v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
|
||||||
|
w.first = Rotate(y + z, 35) * k1 + x;
|
||||||
|
w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
|
||||||
|
|
||||||
|
// This is the same inner loop as CityHash64(), manually unrolled.
|
||||||
|
do {
|
||||||
|
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
|
||||||
|
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
|
||||||
|
x ^= w.second;
|
||||||
|
y += v.first + Fetch64(s + 40);
|
||||||
|
z = Rotate(z + w.first, 33) * k1;
|
||||||
|
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
|
||||||
|
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
|
||||||
|
std::swap(z, x);
|
||||||
|
s += 64;
|
||||||
|
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
|
||||||
|
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
|
||||||
|
x ^= w.second;
|
||||||
|
y += v.first + Fetch64(s + 40);
|
||||||
|
z = Rotate(z + w.first, 33) * k1;
|
||||||
|
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
|
||||||
|
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
|
||||||
|
std::swap(z, x);
|
||||||
|
s += 64;
|
||||||
|
len -= 128;
|
||||||
|
} while (LIKELY(len >= 128));
|
||||||
|
x += Rotate(v.first + z, 49) * k0;
|
||||||
|
y = y * k0 + Rotate(w.second, 37);
|
||||||
|
z = z * k0 + Rotate(w.first, 27);
|
||||||
|
w.first *= 9;
|
||||||
|
v.first *= k0;
|
||||||
|
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
|
||||||
|
for (size_t tail_done = 0; tail_done < len; ) {
|
||||||
|
tail_done += 32;
|
||||||
|
y = Rotate(x + y, 42) * k0 + v.second;
|
||||||
|
w.first += Fetch64(s + len - tail_done + 16);
|
||||||
|
x = x * k0 + w.first;
|
||||||
|
z += w.second + Fetch64(s + len - tail_done);
|
||||||
|
w.second += v.first;
|
||||||
|
v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
|
||||||
|
v.first *= k0;
|
||||||
|
}
|
||||||
|
// At this point our 56 bytes of state should contain more than
|
||||||
|
// enough information for a strong 128-bit hash. We use two
|
||||||
|
// different 56-byte-to-8-byte hashes to get a 16-byte final result.
|
||||||
|
x = HashLen16(x, v.first);
|
||||||
|
y = HashLen16(y + z, w.first);
|
||||||
|
return uint128(HashLen16(x + v.second, w.second) + y,
|
||||||
|
HashLen16(x + w.second, y + v.second));
|
||||||
|
}
|
||||||
|
|
||||||
|
uint128 CityHash128(const char *s, size_t len) {
|
||||||
|
return len >= 16 ?
|
||||||
|
CityHash128WithSeed(s + 16, len - 16,
|
||||||
|
uint128(Fetch64(s), Fetch64(s + 8) + k0)) :
|
||||||
|
CityHash128WithSeed(s, len, uint128(k0, k1));
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __SSE4_2__
|
||||||
|
#include <citycrc.h>
|
||||||
|
#include <nmmintrin.h>
|
||||||
|
|
||||||
|
// Requires len >= 240.
|
||||||
|
static void CityHashCrc256Long(const char *s, size_t len,
|
||||||
|
uint32 seed, uint64 *result) {
|
||||||
|
uint64 a = Fetch64(s + 56) + k0;
|
||||||
|
uint64 b = Fetch64(s + 96) + k0;
|
||||||
|
uint64 c = result[0] = HashLen16(b, len);
|
||||||
|
uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
|
||||||
|
uint64 e = Fetch64(s + 184) + seed;
|
||||||
|
uint64 f = 0;
|
||||||
|
uint64 g = 0;
|
||||||
|
uint64 h = c + d;
|
||||||
|
uint64 x = seed;
|
||||||
|
uint64 y = 0;
|
||||||
|
uint64 z = 0;
|
||||||
|
|
||||||
|
// 240 bytes of input per iter.
|
||||||
|
size_t iters = len / 240;
|
||||||
|
len -= iters * 240;
|
||||||
|
do {
|
||||||
|
#undef CHUNK
|
||||||
|
#define CHUNK(r) \
|
||||||
|
PERMUTE3(x, z, y); \
|
||||||
|
b += Fetch64(s); \
|
||||||
|
c += Fetch64(s + 8); \
|
||||||
|
d += Fetch64(s + 16); \
|
||||||
|
e += Fetch64(s + 24); \
|
||||||
|
f += Fetch64(s + 32); \
|
||||||
|
a += b; \
|
||||||
|
h += f; \
|
||||||
|
b += c; \
|
||||||
|
f += d; \
|
||||||
|
g += e; \
|
||||||
|
e += z; \
|
||||||
|
g += x; \
|
||||||
|
z = _mm_crc32_u64(z, b + g); \
|
||||||
|
y = _mm_crc32_u64(y, e + h); \
|
||||||
|
x = _mm_crc32_u64(x, f + a); \
|
||||||
|
e = Rotate(e, r); \
|
||||||
|
c += e; \
|
||||||
|
s += 40
|
||||||
|
|
||||||
|
CHUNK(0); PERMUTE3(a, h, c);
|
||||||
|
CHUNK(33); PERMUTE3(a, h, f);
|
||||||
|
CHUNK(0); PERMUTE3(b, h, f);
|
||||||
|
CHUNK(42); PERMUTE3(b, h, d);
|
||||||
|
CHUNK(0); PERMUTE3(b, h, e);
|
||||||
|
CHUNK(33); PERMUTE3(a, h, e);
|
||||||
|
} while (--iters > 0);
|
||||||
|
|
||||||
|
while (len >= 40) {
|
||||||
|
CHUNK(29);
|
||||||
|
e ^= Rotate(a, 20);
|
||||||
|
h += Rotate(b, 30);
|
||||||
|
g ^= Rotate(c, 40);
|
||||||
|
f += Rotate(d, 34);
|
||||||
|
PERMUTE3(c, h, g);
|
||||||
|
len -= 40;
|
||||||
|
}
|
||||||
|
if (len > 0) {
|
||||||
|
s = s + len - 40;
|
||||||
|
CHUNK(33);
|
||||||
|
e ^= Rotate(a, 43);
|
||||||
|
h += Rotate(b, 42);
|
||||||
|
g ^= Rotate(c, 41);
|
||||||
|
f += Rotate(d, 40);
|
||||||
|
}
|
||||||
|
result[0] ^= h;
|
||||||
|
result[1] ^= g;
|
||||||
|
g += h;
|
||||||
|
a = HashLen16(a, g + z);
|
||||||
|
x += y << 32;
|
||||||
|
b += x;
|
||||||
|
c = HashLen16(c, z) + h;
|
||||||
|
d = HashLen16(d, e + result[0]);
|
||||||
|
g += e;
|
||||||
|
h += HashLen16(x, f);
|
||||||
|
e = HashLen16(a, d) + g;
|
||||||
|
z = HashLen16(b, c) + a;
|
||||||
|
y = HashLen16(g, h) + c;
|
||||||
|
result[0] = e + z + y + x;
|
||||||
|
a = ShiftMix((a + y) * k0) * k0 + b;
|
||||||
|
result[1] += a + result[0];
|
||||||
|
a = ShiftMix(a * k0) * k0 + c;
|
||||||
|
result[2] = a + result[1];
|
||||||
|
a = ShiftMix((a + e) * k0) * k0;
|
||||||
|
result[3] = a + result[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Requires len < 240.
|
||||||
|
static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) {
|
||||||
|
char buf[240];
|
||||||
|
memcpy(buf, s, len);
|
||||||
|
memset(buf + len, 0, 240 - len);
|
||||||
|
CityHashCrc256Long(buf, 240, ~static_cast<uint32>(len), result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CityHashCrc256(const char *s, size_t len, uint64 *result) {
|
||||||
|
if (LIKELY(len >= 240)) {
|
||||||
|
CityHashCrc256Long(s, len, 0, result);
|
||||||
|
} else {
|
||||||
|
CityHashCrc256Short(s, len, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
|
||||||
|
if (len <= 900) {
|
||||||
|
return CityHash128WithSeed(s, len, seed);
|
||||||
|
} else {
|
||||||
|
uint64 result[4];
|
||||||
|
CityHashCrc256(s, len, result);
|
||||||
|
uint64 u = Uint128High64(seed) + result[0];
|
||||||
|
uint64 v = Uint128Low64(seed) + result[1];
|
||||||
|
return uint128(HashLen16(u, v + result[2]),
|
||||||
|
HashLen16(Rotate(v, 32), u * k0 + result[3]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint128 CityHashCrc128(const char *s, size_t len) {
|
||||||
|
if (len <= 900) {
|
||||||
|
return CityHash128(s, len);
|
||||||
|
} else {
|
||||||
|
uint64 result[4];
|
||||||
|
CityHashCrc256(s, len, result);
|
||||||
|
return uint128(result[2], result[3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,112 @@
|
||||||
|
// Copyright (c) 2011 Google, Inc.
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files (the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions:
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in
|
||||||
|
// all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
// THE SOFTWARE.
|
||||||
|
//
|
||||||
|
// CityHash, by Geoff Pike and Jyrki Alakuijala
|
||||||
|
//
|
||||||
|
// http://code.google.com/p/cityhash/
|
||||||
|
//
|
||||||
|
// This file provides a few functions for hashing strings. All of them are
|
||||||
|
// high-quality functions in the sense that they pass standard tests such
|
||||||
|
// as Austin Appleby's SMHasher. They are also fast.
|
||||||
|
//
|
||||||
|
// For 64-bit x86 code, on short strings, we don't know of anything faster than
|
||||||
|
// CityHash64 that is of comparable quality. We believe our nearest competitor
|
||||||
|
// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash
|
||||||
|
// tables and most other hashing (excluding cryptography).
|
||||||
|
//
|
||||||
|
// For 64-bit x86 code, on long strings, the picture is more complicated.
|
||||||
|
// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
|
||||||
|
// CityHashCrc128 appears to be faster than all competitors of comparable
|
||||||
|
// quality. CityHash128 is also good but not quite as fast. We believe our
|
||||||
|
// nearest competitor is Bob Jenkins' Spooky. We don't have great data for
|
||||||
|
// other 64-bit CPUs, but for long strings we know that Spooky is slightly
|
||||||
|
// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
|
||||||
|
// Note that CityHashCrc128 is declared in citycrc.h.
|
||||||
|
//
|
||||||
|
// For 32-bit x86 code, we don't know of anything faster than CityHash32 that
|
||||||
|
// is of comparable quality. We believe our nearest competitor is Murmur3A.
|
||||||
|
// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
|
||||||
|
//
|
||||||
|
// Functions in the CityHash family are not suitable for cryptography.
|
||||||
|
//
|
||||||
|
// Please see CityHash's README file for more details on our performance
|
||||||
|
// measurements and so on.
|
||||||
|
//
|
||||||
|
// WARNING: This code has been only lightly tested on big-endian platforms!
|
||||||
|
// It is known to work well on little-endian platforms that have a small penalty
|
||||||
|
// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
|
||||||
|
// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
|
||||||
|
// bug reports are welcome.
|
||||||
|
//
|
||||||
|
// By the way, for some hash functions, given strings a and b, the hash
|
||||||
|
// of a+b is easily derived from the hashes of a and b. This property
|
||||||
|
// doesn't hold for any hash functions in this file.
|
||||||
|
|
||||||
|
#ifndef CITY_HASH_H_
|
||||||
|
#define CITY_HASH_H_
|
||||||
|
|
||||||
|
#include <stdlib.h> // for size_t.
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
typedef uint8_t uint8;
|
||||||
|
typedef uint32_t uint32;
|
||||||
|
typedef uint64_t uint64;
|
||||||
|
typedef std::pair<uint64, uint64> uint128;
|
||||||
|
|
||||||
|
inline uint64 Uint128Low64(const uint128& x) { return x.first; }
|
||||||
|
inline uint64 Uint128High64(const uint128& x) { return x.second; }
|
||||||
|
|
||||||
|
// Hash function for a byte array.
|
||||||
|
uint64 CityHash64(const char *buf, size_t len);
|
||||||
|
|
||||||
|
// Hash function for a byte array. For convenience, a 64-bit seed is also
|
||||||
|
// hashed into the result.
|
||||||
|
uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
|
||||||
|
|
||||||
|
// Hash function for a byte array. For convenience, two seeds are also
|
||||||
|
// hashed into the result.
|
||||||
|
uint64 CityHash64WithSeeds(const char *buf, size_t len,
|
||||||
|
uint64 seed0, uint64 seed1);
|
||||||
|
|
||||||
|
// Hash function for a byte array.
|
||||||
|
uint128 CityHash128(const char *s, size_t len);
|
||||||
|
|
||||||
|
// Hash function for a byte array. For convenience, a 128-bit seed is also
|
||||||
|
// hashed into the result.
|
||||||
|
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
|
||||||
|
|
||||||
|
// Hash function for a byte array. Most useful in 32-bit binaries.
|
||||||
|
uint32 CityHash32(const char *buf, size_t len);
|
||||||
|
|
||||||
|
// Hash 128 input bits down to 64 bits of output.
|
||||||
|
// This is intended to be a reasonably good hash function.
|
||||||
|
inline uint64 Hash128to64(const uint128& x) {
|
||||||
|
// Murmur-inspired hashing.
|
||||||
|
const uint64 kMul = 0x9ddfea08eb382d69ULL;
|
||||||
|
uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
|
||||||
|
a ^= (a >> 47);
|
||||||
|
uint64 b = (Uint128High64(x) ^ a) * kMul;
|
||||||
|
b ^= (b >> 47);
|
||||||
|
b *= kMul;
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CITY_HASH_H_
|
|
@ -0,0 +1,43 @@
|
||||||
|
// Copyright (c) 2011 Google, Inc.
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
// of this software and associated documentation files (the "Software"), to deal
|
||||||
|
// in the Software without restriction, including without limitation the rights
|
||||||
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the Software is
|
||||||
|
// furnished to do so, subject to the following conditions:
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be included in
|
||||||
|
// all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
// THE SOFTWARE.
|
||||||
|
//
|
||||||
|
// CityHash, by Geoff Pike and Jyrki Alakuijala
|
||||||
|
//
|
||||||
|
// This file declares the subset of the CityHash functions that require
|
||||||
|
// _mm_crc32_u64(). See the CityHash README for details.
|
||||||
|
//
|
||||||
|
// Functions in the CityHash family are not suitable for cryptography.
|
||||||
|
|
||||||
|
#ifndef CITY_HASH_CRC_H_
|
||||||
|
#define CITY_HASH_CRC_H_
|
||||||
|
|
||||||
|
#include <city.h>
|
||||||
|
|
||||||
|
// Hash function for a byte array.
|
||||||
|
uint128 CityHashCrc128(const char *s, size_t len);
|
||||||
|
|
||||||
|
// Hash function for a byte array. For convenience, a 128-bit seed is also
|
||||||
|
// hashed into the result.
|
||||||
|
uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed);
|
||||||
|
|
||||||
|
// Hash function for a byte array. Sets result[0] ... result[3].
|
||||||
|
void CityHashCrc256(const char *s, size_t len, uint64 *result);
|
||||||
|
|
||||||
|
#endif // CITY_HASH_CRC_H_
|
|
@ -0,0 +1,40 @@
|
||||||
|
#ifndef _CITY_HASHER_HH
|
||||||
|
#define _CITY_HASHER_HH
|
||||||
|
|
||||||
|
#include <cityhash/city.h>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
/*! CityHasher is a std::hash-style wrapper around CityHash. We
|
||||||
|
* encourage using CityHasher instead of the default std::hash if
|
||||||
|
* possible. */
|
||||||
|
template <class Key>
|
||||||
|
class CityHasher {
|
||||||
|
public:
|
||||||
|
size_t operator()(const Key& k) const {
|
||||||
|
if (sizeof(size_t) < 8) {
|
||||||
|
return CityHash32((const char*) &k, sizeof(k));
|
||||||
|
}
|
||||||
|
/* Although the following line should be optimized away on 32-bit
|
||||||
|
* builds, the cast is still necessary to stop MSVC emitting a
|
||||||
|
* truncation warning. */
|
||||||
|
return static_cast<size_t>(CityHash64((const char*) &k, sizeof(k)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! This is a template specialization of CityHasher for
|
||||||
|
* std::string. */
|
||||||
|
template <>
|
||||||
|
class CityHasher<std::string> {
|
||||||
|
public:
|
||||||
|
size_t operator()(const std::string& k) const {
|
||||||
|
if (sizeof(size_t) < 8) {
|
||||||
|
return CityHash32(k.c_str(), k.size());
|
||||||
|
}
|
||||||
|
/* Although the following line should be optimized away on 32-bit
|
||||||
|
* builds, the cast is still necessary to stop MSVC emitting a
|
||||||
|
* truncation warning. */
|
||||||
|
return static_cast<size_t>(CityHash64(k.c_str(), k.size()));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // _CITY_HASHER_HH
|
|
@ -0,0 +1,34 @@
|
||||||
|
/** \file */
|
||||||
|
|
||||||
|
#ifndef _CUCKOOHASH_CONFIG_HH
|
||||||
|
#define _CUCKOOHASH_CONFIG_HH
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
|
//! The default maximum number of keys per bucket
|
||||||
|
const size_t DEFAULT_SLOT_PER_BUCKET = 4;
|
||||||
|
|
||||||
|
//! The default number of elements in an empty hash table
|
||||||
|
const size_t DEFAULT_SIZE = (1U << 16) * DEFAULT_SLOT_PER_BUCKET;
|
||||||
|
|
||||||
|
//! On a scale of 0 to 16, the memory granularity of the locks array. 0 is the
|
||||||
|
//! least granular, meaning the array is a contiguous array and thus offers the
|
||||||
|
//! best performance but the greatest memory overhead. 16 is the most granular,
|
||||||
|
//! offering the least memory overhead but worse performance.
|
||||||
|
const size_t LOCK_ARRAY_GRANULARITY = 0;
|
||||||
|
|
||||||
|
//! The default minimum load factor that the table allows for automatic
|
||||||
|
//! expansion. It must be a number between 0.0 and 1.0. The table will throw
|
||||||
|
//! libcuckoo_load_factor_too_low if the load factor falls below this value
|
||||||
|
//! during an automatic expansion.
|
||||||
|
const double DEFAULT_MINIMUM_LOAD_FACTOR = 0.05;
|
||||||
|
|
||||||
|
//! An alias for the value that sets no limit on the maximum hashpower. If this
|
||||||
|
//! value is set as the maximum hashpower limit, there will be no limit. Since 0
|
||||||
|
//! is the only hashpower that can never occur, it should stay at 0.
|
||||||
|
const size_t NO_MAXIMUM_HASHPOWER = 0;
|
||||||
|
|
||||||
|
//! set LIBCUCKOO_DEBUG to 1 to enable debug output
|
||||||
|
#define LIBCUCKOO_DEBUG 0
|
||||||
|
|
||||||
|
#endif // _CUCKOOHASH_CONFIG_HH
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,185 @@
|
||||||
|
/** \file */
|
||||||
|
|
||||||
|
#ifndef _CUCKOOHASH_UTIL_HH
|
||||||
|
#define _CUCKOOHASH_UTIL_HH
|
||||||
|
|
||||||
|
#include <exception>
|
||||||
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
#include "cuckoohash_config.hh" // for LIBCUCKOO_DEBUG
|
||||||
|
|
||||||
|
#if LIBCUCKOO_DEBUG
|
||||||
|
# define LIBCUCKOO_DBG(fmt, ...) \
|
||||||
|
fprintf(stderr, "\x1b[32m""[libcuckoo:%s:%d:%lu] " fmt"" "\x1b[0m", \
|
||||||
|
__FILE__,__LINE__, (unsigned long)std::this_thread::get_id(), \
|
||||||
|
__VA_ARGS__)
|
||||||
|
#else
|
||||||
|
# define LIBCUCKOO_DBG(fmt, ...) do {} while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* alignas() requires GCC >= 4.9, so we stick with the alignment attribute for
|
||||||
|
* GCC.
|
||||||
|
*/
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#define LIBCUCKOO_ALIGNAS(x) __attribute__((aligned(x)))
|
||||||
|
#else
|
||||||
|
#define LIBCUCKOO_ALIGNAS(x) alignas(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* At higher warning levels, MSVC produces an annoying warning that alignment
|
||||||
|
* may cause wasted space: "structure was padded due to __declspec(align())".
|
||||||
|
*/
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define LIBCUCKOO_SQUELCH_PADDING_WARNING __pragma(warning(suppress : 4324))
|
||||||
|
#else
|
||||||
|
#define LIBCUCKOO_SQUELCH_PADDING_WARNING
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* thread_local requires GCC >= 4.8 and is not supported in some clang versions,
|
||||||
|
* so we use __thread if thread_local is not supported
|
||||||
|
*/
|
||||||
|
#define LIBCUCKOO_THREAD_LOCAL thread_local
|
||||||
|
#if defined(__clang__)
|
||||||
|
# if !__has_feature(cxx_thread_local)
|
||||||
|
# undef LIBCUCKOO_THREAD_LOCAL
|
||||||
|
# define LIBCUCKOO_THREAD_LOCAL __thread
|
||||||
|
# endif
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
# if __GNUC__ == 4 && __GNUC_MINOR__ < 8
|
||||||
|
# undef LIBCUCKOO_THREAD_LOCAL
|
||||||
|
# define LIBCUCKOO_THREAD_LOCAL __thread
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// For enabling certain methods based on a condition. Here's an example.
|
||||||
|
// ENABLE_IF(some_cond, type, static, inline) method() {
|
||||||
|
// ...
|
||||||
|
// }
|
||||||
|
#define ENABLE_IF(preamble, condition, return_type) \
|
||||||
|
template <class Bogus=void*> \
|
||||||
|
preamble typename std::enable_if<sizeof(Bogus) && \
|
||||||
|
condition, return_type>::type
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thrown when an automatic expansion is triggered, but the load factor of the
|
||||||
|
* table is below a minimum threshold, which can be set by the \ref
|
||||||
|
* cuckoohash_map::minimum_load_factor method. This can happen if the hash
|
||||||
|
* function does not properly distribute keys, or for certain adversarial
|
||||||
|
* workloads.
|
||||||
|
*/
|
||||||
|
class libcuckoo_load_factor_too_low : public std::exception {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Constructor
|
||||||
|
*
|
||||||
|
* @param lf the load factor of the table when the exception was thrown
|
||||||
|
*/
|
||||||
|
libcuckoo_load_factor_too_low(const double lf)
|
||||||
|
: load_factor_(lf) {}
|
||||||
|
|
||||||
|
virtual const char* what() const noexcept override {
|
||||||
|
return "Automatic expansion triggered when load factor was below "
|
||||||
|
"minimum threshold";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the load factor of the table when the exception was thrown
|
||||||
|
*/
|
||||||
|
double load_factor() {
|
||||||
|
return load_factor_;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
const double load_factor_;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thrown when an expansion is triggered, but the hashpower specified is greater
|
||||||
|
* than the maximum, which can be set with the \ref
|
||||||
|
* cuckoohash_map::maximum_hashpower method.
|
||||||
|
*/
|
||||||
|
class libcuckoo_maximum_hashpower_exceeded : public std::exception {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Constructor
|
||||||
|
*
|
||||||
|
* @param hp the hash power we were trying to expand to
|
||||||
|
*/
|
||||||
|
libcuckoo_maximum_hashpower_exceeded(const size_t hp)
|
||||||
|
: hashpower_(hp) {}
|
||||||
|
|
||||||
|
virtual const char* what() const noexcept override {
|
||||||
|
return "Expansion beyond maximum hashpower";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the hashpower we were trying to expand to
|
||||||
|
*/
|
||||||
|
size_t hashpower() {
|
||||||
|
return hashpower_;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
const size_t hashpower_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Allocates an array of the given size and value-initializes each element with
|
||||||
|
// the 0-argument constructor
|
||||||
|
template <class T, class Alloc>
|
||||||
|
T* create_array(const size_t size) {
|
||||||
|
Alloc allocator;
|
||||||
|
T* arr = allocator.allocate(size);
|
||||||
|
// Initialize all the elements, safely deallocating and destroying
|
||||||
|
// everything in case of error.
|
||||||
|
size_t i;
|
||||||
|
try {
|
||||||
|
for (i = 0; i < size; ++i) {
|
||||||
|
allocator.construct(&arr[i]);
|
||||||
|
}
|
||||||
|
} catch (...) {
|
||||||
|
for (size_t j = 0; j < i; ++j) {
|
||||||
|
allocator.destroy(&arr[j]);
|
||||||
|
}
|
||||||
|
allocator.deallocate(arr, size);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
return arr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Destroys every element of an array of the given size and then deallocates the
|
||||||
|
// memory.
|
||||||
|
template <class T, class Alloc>
|
||||||
|
void destroy_array(T* arr, const size_t size) {
|
||||||
|
Alloc allocator;
|
||||||
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
allocator.destroy(&arr[i]);
|
||||||
|
}
|
||||||
|
allocator.deallocate(arr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// executes the function over the given range split over num_threads threads
|
||||||
|
template <class F>
|
||||||
|
static void parallel_exec(size_t start, size_t end,
|
||||||
|
size_t num_threads, F func) {
|
||||||
|
size_t work_per_thread = (end - start) / num_threads;
|
||||||
|
std::vector<std::thread> threads(num_threads);
|
||||||
|
std::vector<std::exception_ptr> eptrs(num_threads, nullptr);
|
||||||
|
for (size_t i = 0; i < num_threads - 1; ++i) {
|
||||||
|
threads[i] = std::thread(func, start, start + work_per_thread,
|
||||||
|
std::ref(eptrs[i]));
|
||||||
|
start += work_per_thread;
|
||||||
|
}
|
||||||
|
threads[num_threads - 1] = std::thread(
|
||||||
|
func, start, end, std::ref(eptrs[num_threads - 1]));
|
||||||
|
for (std::thread& t : threads) {
|
||||||
|
t.join();
|
||||||
|
}
|
||||||
|
for (std::exception_ptr& eptr : eptrs) {
|
||||||
|
if (eptr) {
|
||||||
|
std::rethrow_exception(eptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _CUCKOOHASH_UTIL_HH
|
|
@ -0,0 +1,29 @@
|
||||||
|
#ifndef _DEFAULT_HASHER_HH
|
||||||
|
#define _DEFAULT_HASHER_HH
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
/*! DefaultHasher is the default hash class used in the table. It overloads a
|
||||||
|
* few types that std::hash does badly on (namely integers), and falls back to
|
||||||
|
* std::hash for anything else. */
|
||||||
|
template <class Key>
|
||||||
|
class DefaultHasher {
|
||||||
|
std::hash<Key> fallback;
|
||||||
|
|
||||||
|
public:
|
||||||
|
template <class T = Key>
|
||||||
|
typename std::enable_if<std::is_integral<T>::value, size_t>::type
|
||||||
|
operator()(const Key& k) const {
|
||||||
|
// This constant is found in the CityHash code
|
||||||
|
return k * 0x9ddfea08eb382d69ULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T = Key>
|
||||||
|
typename std::enable_if<!std::is_integral<T>::value, size_t>::type
|
||||||
|
operator()(const Key& k) const {
|
||||||
|
return fallback(k);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // _DEFAULT_HASHER_HH
|
|
@ -0,0 +1,119 @@
|
||||||
|
/** \file */
|
||||||
|
|
||||||
|
#ifndef _LAZY_ARRAY_HH
|
||||||
|
#define _LAZY_ARRAY_HH
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "cuckoohash_util.hh"
|
||||||
|
|
||||||
|
// lazy array. A fixed-size array, broken up into segments that are dynamically
|
||||||
|
// allocated, only when requested. The array size and segment size are
|
||||||
|
// pre-defined, and are powers of two. The user must make sure the necessary
|
||||||
|
// segments are allocated before accessing the array.
|
||||||
|
template <uint8_t OFFSET_BITS, uint8_t SEGMENT_BITS,
|
||||||
|
class T, class Alloc = std::allocator<T>
|
||||||
|
>
|
||||||
|
class lazy_array {
|
||||||
|
static_assert(SEGMENT_BITS + OFFSET_BITS <= sizeof(size_t)*8,
|
||||||
|
"The number of segment and offset bits cannot exceed "
|
||||||
|
" the number of bits in a size_t");
|
||||||
|
private:
|
||||||
|
static const size_t SEGMENT_SIZE = 1UL << OFFSET_BITS;
|
||||||
|
static const size_t NUM_SEGMENTS = 1UL << SEGMENT_BITS;
|
||||||
|
// The segments array itself is mutable, so that the const subscript
|
||||||
|
// operator can still add segments
|
||||||
|
mutable std::array<T*, NUM_SEGMENTS> segments_;
|
||||||
|
|
||||||
|
void move_other_array(lazy_array&& arr) {
|
||||||
|
clear();
|
||||||
|
std::copy(arr.segments_.begin(), arr.segments_.end(),
|
||||||
|
segments_.begin());
|
||||||
|
std::fill(arr.segments_.begin(), arr.segments_.end(), nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t get_segment(size_t i) {
|
||||||
|
return i >> OFFSET_BITS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const size_t OFFSET_MASK = ((1UL << OFFSET_BITS) - 1);
|
||||||
|
inline size_t get_offset(size_t i) {
|
||||||
|
return i & OFFSET_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
lazy_array(): segments_{{nullptr}} {}
|
||||||
|
|
||||||
|
// No copying
|
||||||
|
lazy_array(const lazy_array&) = delete;
|
||||||
|
lazy_array& operator=(const lazy_array&) = delete;
|
||||||
|
|
||||||
|
// Moving is allowed
|
||||||
|
lazy_array(lazy_array&& arr) : segments_{{nullptr}} {
|
||||||
|
move_other_array(std::move(arr));
|
||||||
|
}
|
||||||
|
lazy_array& operator=(lazy_array&& arr) {
|
||||||
|
move_other_vector(std::move(arr));
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
~lazy_array() {
|
||||||
|
clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear() {
|
||||||
|
for (size_t i = 0; i < segments_.size(); ++i) {
|
||||||
|
if (segments_[i] != nullptr) {
|
||||||
|
destroy_array<T, Alloc>(segments_[i], SEGMENT_SIZE);
|
||||||
|
segments_[i] = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
T& operator[](size_t i) {
|
||||||
|
assert(segments_[get_segment(i)] != nullptr);
|
||||||
|
return segments_[get_segment(i)][get_offset(i)];
|
||||||
|
}
|
||||||
|
|
||||||
|
const T& operator[](size_t i) const {
|
||||||
|
assert(segments_[get_segment(i)] != nullptr);
|
||||||
|
return segments_[get_segment(i)][get_offset(i)];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensures that the array has enough segments to index target elements, not
|
||||||
|
// exceeding the total size. The user must ensure that the array is properly
|
||||||
|
// allocated before accessing a certain index. This saves having to check
|
||||||
|
// every index operation.
|
||||||
|
void allocate(size_t target) {
|
||||||
|
assert(target <= size());
|
||||||
|
if (target == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const size_t last_segment = get_segment(target - 1);
|
||||||
|
for (size_t i = 0; i <= last_segment; ++i) {
|
||||||
|
if (segments_[i] == nullptr) {
|
||||||
|
segments_[i] = create_array<T, Alloc>(SEGMENT_SIZE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the number of elements in the array that can be indexed, starting
|
||||||
|
// contiguously from the beginning.
|
||||||
|
size_t allocated_size() const {
|
||||||
|
size_t num_allocated_segments = 0;
|
||||||
|
for (;
|
||||||
|
(num_allocated_segments < NUM_SEGMENTS &&
|
||||||
|
segments_[num_allocated_segments] != nullptr);
|
||||||
|
++num_allocated_segments) {}
|
||||||
|
return num_allocated_segments * SEGMENT_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr size_t size() {
|
||||||
|
return 1UL << (OFFSET_BITS + SEGMENT_BITS);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // _LAZY_ARRAY_HH
|
|
@ -376,6 +376,7 @@ include_directories(${PROJECT_SOURCE_DIR}/3rdParty/linenoise-ng/include)
|
||||||
include_directories(${PROJECT_SOURCE_DIR}/3rdParty/linenoise-ng/src)
|
include_directories(${PROJECT_SOURCE_DIR}/3rdParty/linenoise-ng/src)
|
||||||
include_directories(${PROJECT_SOURCE_DIR}/3rdParty/velocypack/include)
|
include_directories(${PROJECT_SOURCE_DIR}/3rdParty/velocypack/include)
|
||||||
include_directories(${PROJECT_SOURCE_DIR}/3rdParty/rocksdb/rocksdb/include)
|
include_directories(${PROJECT_SOURCE_DIR}/3rdParty/rocksdb/rocksdb/include)
|
||||||
|
include_directories(${PROJECT_SOURCE_DIR}/3rdParty/libcuckoo/include)
|
||||||
|
|
||||||
include_directories(${PROJECT_BINARY_DIR})
|
include_directories(${PROJECT_BINARY_DIR})
|
||||||
include_directories(${PROJECT_BINARY_DIR}/lib)
|
include_directories(${PROJECT_BINARY_DIR}/lib)
|
||||||
|
|
|
@ -360,7 +360,7 @@ SET(ARANGOD_SOURCES
|
||||||
Wal/Slot.cpp
|
Wal/Slot.cpp
|
||||||
Wal/Slots.cpp
|
Wal/Slots.cpp
|
||||||
Wal/SynchronizerThread.cpp
|
Wal/SynchronizerThread.cpp
|
||||||
Pregel/AggregatorUsage.cpp
|
Pregel/AggregatorHandler.cpp
|
||||||
Pregel/AlgoRegistry.cpp
|
Pregel/AlgoRegistry.cpp
|
||||||
Pregel/Algos/SSSP.cpp
|
Pregel/Algos/SSSP.cpp
|
||||||
Pregel/Algos/PageRank.cpp
|
Pregel/Algos/PageRank.cpp
|
||||||
|
@ -396,6 +396,7 @@ target_link_libraries(${BIN_ARANGOD}
|
||||||
${SYSTEM_LIBRARIES}
|
${SYSTEM_LIBRARIES}
|
||||||
boost_boost
|
boost_boost
|
||||||
boost_system
|
boost_system
|
||||||
|
libcuckoo
|
||||||
)
|
)
|
||||||
|
|
||||||
install(
|
install(
|
||||||
|
|
|
@ -50,12 +50,13 @@ class Aggregator {
|
||||||
// virtual void setValue(VPackSlice slice) = 0;
|
// virtual void setValue(VPackSlice slice) = 0;
|
||||||
virtual VPackValue vpackValue() = 0;
|
virtual VPackValue vpackValue() = 0;
|
||||||
|
|
||||||
virtual void reset() {};
|
virtual void reset(){};
|
||||||
virtual bool isPermanent() {return _permanent;}
|
bool isPermanent() { return _permanent; }
|
||||||
};
|
};
|
||||||
|
|
||||||
class FloatMaxAggregator : public Aggregator {
|
class FloatMaxAggregator : public Aggregator {
|
||||||
float _value, _initial;
|
float _value, _initial;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
FloatMaxAggregator(float init) : _value(init), _initial(init) {}
|
FloatMaxAggregator(float init) : _value(init), _initial(init) {}
|
||||||
|
|
||||||
|
@ -77,50 +78,43 @@ class FloatMaxAggregator : public Aggregator {
|
||||||
void reset() override { _value = _initial; }
|
void reset() override { _value = _initial; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template <typename T>
|
||||||
class ValueAggregator : public Aggregator {
|
class ValueAggregator : public Aggregator {
|
||||||
static_assert(std::is_arithmetic<T>::value, "Type must be numeric");
|
static_assert(std::is_arithmetic<T>::value, "Type must be numeric");
|
||||||
|
|
||||||
T _value;
|
T _value;
|
||||||
public:
|
|
||||||
|
public:
|
||||||
ValueAggregator(T val) : Aggregator(true), _value(val) {}
|
ValueAggregator(T val) : Aggregator(true), _value(val) {}
|
||||||
|
|
||||||
void aggregate(void const* valuePtr) override {
|
void aggregate(void const* valuePtr) override { _value = *((T*)valuePtr); };
|
||||||
_value = *((T*)valuePtr);
|
void aggregate(VPackSlice slice) override { _value = slice.getNumber<T>(); }
|
||||||
};
|
|
||||||
void aggregate(VPackSlice slice) override {
|
|
||||||
_value = slice.getNumber<T>();
|
|
||||||
}
|
|
||||||
|
|
||||||
void const* getValue() const override { return &_value; };
|
void const* getValue() const override { return &_value; };
|
||||||
/*void setValue(VPackSlice slice) override {
|
/*void setValue(VPackSlice slice) override {
|
||||||
_value = (float)slice.getDouble();
|
_value = (float)slice.getDouble();
|
||||||
}*/
|
}*/
|
||||||
VPackValue vpackValue() override { return VPackValue(_value); };
|
VPackValue vpackValue() override { return VPackValue(_value); };
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template <typename T>
|
||||||
class SumAggregator : public Aggregator {
|
class SumAggregator : public Aggregator {
|
||||||
static_assert(std::is_arithmetic<T>::value, "Type must be numeric");
|
static_assert(std::is_arithmetic<T>::value, "Type must be numeric");
|
||||||
|
|
||||||
T _value;
|
T _value;
|
||||||
public:
|
|
||||||
|
public:
|
||||||
SumAggregator(T val) : Aggregator(true), _value(val) {}
|
SumAggregator(T val) : Aggregator(true), _value(val) {}
|
||||||
|
|
||||||
void aggregate(void const* valuePtr) override {
|
void aggregate(void const* valuePtr) override { _value += *((T*)valuePtr); };
|
||||||
_value += *((T*)valuePtr);
|
void aggregate(VPackSlice slice) override { _value += slice.getNumber<T>(); }
|
||||||
};
|
|
||||||
void aggregate(VPackSlice slice) override {
|
|
||||||
_value += slice.getNumber<T>();
|
|
||||||
}
|
|
||||||
|
|
||||||
void const* getValue() const override { return &_value; };
|
void const* getValue() const override { return &_value; };
|
||||||
/*void setValue(VPackSlice slice) override {
|
/*void setValue(VPackSlice slice) override {
|
||||||
_value = (float)slice.getDouble();
|
_value = (float)slice.getDouble();
|
||||||
}*/
|
}*/
|
||||||
VPackValue vpackValue() override { return VPackValue(_value); };
|
VPackValue vpackValue() override { return VPackValue(_value); };
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -20,21 +20,22 @@
|
||||||
/// @author Simon Grätzer
|
/// @author Simon Grätzer
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#include "Pregel/AggregatorUsage.h"
|
#include "Pregel/AggregatorHandler.h"
|
||||||
#include "Pregel/Aggregator.h"
|
#include "Pregel/Aggregator.h"
|
||||||
#include "Pregel/Algorithm.h"
|
#include "Pregel/Algorithm.h"
|
||||||
|
|
||||||
using namespace arangodb;
|
using namespace arangodb;
|
||||||
using namespace arangodb::pregel;
|
using namespace arangodb::pregel;
|
||||||
|
|
||||||
AggregatorUsage::~AggregatorUsage() {
|
AggregatorHandler::~AggregatorHandler() {
|
||||||
for (auto const& it : _values) {
|
for (auto const& it : _values) {
|
||||||
delete it.second;
|
delete it.second;
|
||||||
}
|
}
|
||||||
_values.clear();
|
_values.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void AggregatorUsage::aggregate(std::string const& name, const void* valuePtr) {
|
void AggregatorHandler::aggregate(std::string const& name,
|
||||||
|
const void* valuePtr) {
|
||||||
auto it = _values.find(name);
|
auto it = _values.find(name);
|
||||||
if (it != _values.end()) {
|
if (it != _values.end()) {
|
||||||
it->second->aggregate(valuePtr);
|
it->second->aggregate(valuePtr);
|
||||||
|
@ -48,7 +49,8 @@ void AggregatorUsage::aggregate(std::string const& name, const void* valuePtr) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const void* AggregatorUsage::getAggregatedValue(std::string const& name) const {
|
const void* AggregatorHandler::getAggregatedValue(
|
||||||
|
std::string const& name) const {
|
||||||
auto const& it = _values.find(name);
|
auto const& it = _values.find(name);
|
||||||
if (it != _values.end()) {
|
if (it != _values.end()) {
|
||||||
return it->second->getValue();
|
return it->second->getValue();
|
||||||
|
@ -56,7 +58,7 @@ const void* AggregatorUsage::getAggregatedValue(std::string const& name) const {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AggregatorUsage::resetValues() {
|
void AggregatorHandler::resetValues() {
|
||||||
for (auto& it : _values) {
|
for (auto& it : _values) {
|
||||||
if (!it.second->isPermanent()) {
|
if (!it.second->isPermanent()) {
|
||||||
it.second->reset();
|
it.second->reset();
|
||||||
|
@ -64,7 +66,7 @@ void AggregatorUsage::resetValues() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AggregatorUsage::aggregateValues(AggregatorUsage const& workerValues) {
|
void AggregatorHandler::aggregateValues(AggregatorHandler const& workerValues) {
|
||||||
for (auto const& pair : workerValues._values) {
|
for (auto const& pair : workerValues._values) {
|
||||||
std::string const& name = pair.first;
|
std::string const& name = pair.first;
|
||||||
auto my = _values.find(name);
|
auto my = _values.find(name);
|
||||||
|
@ -81,7 +83,7 @@ void AggregatorUsage::aggregateValues(AggregatorUsage const& workerValues) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AggregatorUsage::aggregateValues(VPackSlice workerValues) {
|
void AggregatorHandler::aggregateValues(VPackSlice workerValues) {
|
||||||
for (auto const& keyValue : VPackObjectIterator(workerValues)) {
|
for (auto const& keyValue : VPackObjectIterator(workerValues)) {
|
||||||
std::string name = keyValue.key.copyString();
|
std::string name = keyValue.key.copyString();
|
||||||
auto const& it = _values.find(name);
|
auto const& it = _values.find(name);
|
||||||
|
@ -98,10 +100,10 @@ void AggregatorUsage::aggregateValues(VPackSlice workerValues) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AggregatorUsage::serializeValues(VPackBuilder& b) const {
|
void AggregatorHandler::serializeValues(VPackBuilder& b) const {
|
||||||
for (auto const& pair : _values) {
|
for (auto const& pair : _values) {
|
||||||
b.add(pair.first, pair.second->vpackValue());
|
b.add(pair.first, pair.second->vpackValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t AggregatorUsage::size() { return _values.size(); }
|
size_t AggregatorHandler::size() { return _values.size(); }
|
|
@ -20,31 +20,32 @@
|
||||||
/// @author Simon Grätzer
|
/// @author Simon Grätzer
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#ifndef ARANGODB_PREGEL_AGGRGS_USAGE_H
|
#ifndef ARANGODB_PREGEL_AGGREGATOR_HANDLER_H
|
||||||
#define ARANGODB_PREGEL_AGGRGS_USAGE_H 1
|
#define ARANGODB_PREGEL_AGGREGATOR_HANDLER_H 1
|
||||||
|
|
||||||
#include <velocypack/vpack.h>
|
#include <velocypack/Builder.h>
|
||||||
|
#include <velocypack/Slice.h>
|
||||||
#include <velocypack/velocypack-aliases.h>
|
#include <velocypack/velocypack-aliases.h>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
namespace arangodb {
|
namespace arangodb {
|
||||||
namespace pregel {
|
namespace pregel {
|
||||||
|
|
||||||
struct IAlgorithm;
|
struct IAlgorithm;
|
||||||
class Aggregator;
|
class Aggregator;
|
||||||
|
|
||||||
class AggregatorUsage {
|
class AggregatorHandler {
|
||||||
const IAlgorithm* _create;
|
const IAlgorithm* _create;
|
||||||
std::map<std::string, Aggregator*> _values;
|
std::map<std::string, Aggregator*> _values;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
AggregatorUsage(const IAlgorithm* c) : _create(c) {}
|
AggregatorHandler(const IAlgorithm* c) : _create(c) {}
|
||||||
~AggregatorUsage();
|
~AggregatorHandler();
|
||||||
void aggregate(std::string const& name, const void* valuePtr);
|
void aggregate(std::string const& name, const void* valuePtr);
|
||||||
const void* getAggregatedValue(std::string const& name) const;
|
const void* getAggregatedValue(std::string const& name) const;
|
||||||
void resetValues();
|
void resetValues();
|
||||||
void aggregateValues(AggregatorUsage const& workerValues);
|
void aggregateValues(AggregatorHandler const& workerValues);
|
||||||
void aggregateValues(VPackSlice workerValues);
|
void aggregateValues(VPackSlice workerValues);
|
||||||
void serializeValues(VPackBuilder& b) const;
|
void serializeValues(VPackBuilder& b) const;
|
||||||
size_t size();
|
size_t size();
|
|
@ -31,11 +31,14 @@ struct TRI_vocbase_t;
|
||||||
namespace arangodb {
|
namespace arangodb {
|
||||||
namespace pregel {
|
namespace pregel {
|
||||||
struct AlgoRegistry {
|
struct AlgoRegistry {
|
||||||
static IAlgorithm* createAlgorithm(std::string const& algorithm, VPackSlice userParams);
|
static IAlgorithm* createAlgorithm(std::string const& algorithm,
|
||||||
|
VPackSlice userParams);
|
||||||
static IWorker* createWorker(TRI_vocbase_t* vocbase, VPackSlice body);
|
static IWorker* createWorker(TRI_vocbase_t* vocbase, VPackSlice body);
|
||||||
private:
|
|
||||||
|
private:
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
static IWorker* createWorker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo, VPackSlice body);
|
static IWorker* createWorker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo,
|
||||||
|
VPackSlice body);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,9 +30,9 @@
|
||||||
|
|
||||||
#include "Basics/Common.h"
|
#include "Basics/Common.h"
|
||||||
#include "GraphFormat.h"
|
#include "GraphFormat.h"
|
||||||
|
#include "MasterContext.h"
|
||||||
#include "MessageCombiner.h"
|
#include "MessageCombiner.h"
|
||||||
#include "MessageFormat.h"
|
#include "MessageFormat.h"
|
||||||
#include "MasterContext.h"
|
|
||||||
#include "WorkerContext.h"
|
#include "WorkerContext.h"
|
||||||
|
|
||||||
namespace arangodb {
|
namespace arangodb {
|
||||||
|
@ -40,7 +40,7 @@ namespace pregel {
|
||||||
|
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
class VertexComputation;
|
class VertexComputation;
|
||||||
|
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
class VertexCompensation;
|
class VertexCompensation;
|
||||||
|
|
||||||
|
@ -48,31 +48,30 @@ class Aggregator;
|
||||||
|
|
||||||
struct IAlgorithm {
|
struct IAlgorithm {
|
||||||
virtual ~IAlgorithm() {}
|
virtual ~IAlgorithm() {}
|
||||||
|
|
||||||
// virtual bool isFixpointAlgorithm() const {return false;}
|
// virtual bool isFixpointAlgorithm() const {return false;}
|
||||||
// virtual bool preserveTransactions() const { return false; }
|
|
||||||
virtual bool supportsCompensation() const {
|
virtual bool supportsAsyncMode() const { return false; }
|
||||||
return false;
|
|
||||||
}
|
virtual bool supportsCompensation() const { return false; }
|
||||||
|
|
||||||
virtual Aggregator* aggregator(std::string const& name) const {
|
virtual Aggregator* aggregator(std::string const& name) const {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual MasterContext* masterContext(VPackSlice userParams) const {
|
virtual MasterContext* masterContext(VPackSlice userParams) const {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string const& name() const { return _name; }
|
std::string const& name() const { return _name; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
IAlgorithm(std::string const& name) : _name(name){};
|
IAlgorithm(std::string const& name) : _name(name){};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::string _name;
|
std::string _name;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// specify serialization, whatever
|
// specify serialization, whatever
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
struct Algorithm : IAlgorithm {
|
struct Algorithm : IAlgorithm {
|
||||||
|
@ -87,7 +86,8 @@ struct Algorithm : IAlgorithm {
|
||||||
virtual VertexCompensation<V, E, M>* createCompensation(uint64_t gss) const {
|
virtual VertexCompensation<V, E, M>* createCompensation(uint64_t gss) const {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
protected:
|
|
||||||
|
protected:
|
||||||
Algorithm(std::string const& name) : IAlgorithm(name){};
|
Algorithm(std::string const& name) : IAlgorithm(name){};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,10 @@ struct PageRankAlgorithm : public SimpleAlgorithm<float, float, float> {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
PageRankAlgorithm(arangodb::velocypack::Slice params);
|
PageRankAlgorithm(arangodb::velocypack::Slice params);
|
||||||
|
|
||||||
|
bool supportsAsyncMode() const override { return true; }
|
||||||
|
bool supportsCompensation() const override { return true; }
|
||||||
|
MasterContext* masterContext(VPackSlice userParams) const override;
|
||||||
|
|
||||||
GraphFormat<float, float>* inputFormat() const override;
|
GraphFormat<float, float>* inputFormat() const override;
|
||||||
MessageFormat<float>* messageFormat() const override;
|
MessageFormat<float>* messageFormat() const override;
|
||||||
|
@ -44,8 +48,6 @@ struct PageRankAlgorithm : public SimpleAlgorithm<float, float, float> {
|
||||||
const override;
|
const override;
|
||||||
VertexCompensation<float, float, float>* createCompensation(uint64_t gss) const override;
|
VertexCompensation<float, float, float>* createCompensation(uint64_t gss) const override;
|
||||||
Aggregator* aggregator(std::string const& name) const override;
|
Aggregator* aggregator(std::string const& name) const override;
|
||||||
|
|
||||||
MasterContext* masterContext(VPackSlice userParams) const override;
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,7 +67,7 @@ void Conductor::start(std::string const& algoName, VPackSlice userConfig) {
|
||||||
} else {
|
} else {
|
||||||
_userParams.add(userConfig);
|
_userParams.add(userConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
_startTimeSecs = TRI_microtime();
|
_startTimeSecs = TRI_microtime();
|
||||||
_globalSuperstep = 0;
|
_globalSuperstep = 0;
|
||||||
_state = ExecutionState::RUNNING;
|
_state = ExecutionState::RUNNING;
|
||||||
|
@ -76,7 +76,11 @@ void Conductor::start(std::string const& algoName, VPackSlice userConfig) {
|
||||||
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER,
|
THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER,
|
||||||
"Algorithm not found");
|
"Algorithm not found");
|
||||||
}
|
}
|
||||||
_aggregatorUsage.reset(new AggregatorUsage(_algorithm.get()));
|
_aggregators.reset(new AggregatorHandler(_algorithm.get()));
|
||||||
|
// configure the async mode as optional
|
||||||
|
VPackSlice async = _userParams.slice().get("async");
|
||||||
|
_asyncMode = _algorithm->supportsAsyncMode();
|
||||||
|
_asyncMode = _asyncMode && (async.isNone() || async.getBoolean());
|
||||||
|
|
||||||
int res = _initializeWorkers(Utils::startExecutionPath, VPackSlice());
|
int res = _initializeWorkers(Utils::startExecutionPath, VPackSlice());
|
||||||
if (res != TRI_ERROR_NO_ERROR) {
|
if (res != TRI_ERROR_NO_ERROR) {
|
||||||
|
@ -92,15 +96,15 @@ bool Conductor::_startGlobalStep() {
|
||||||
b.openObject();
|
b.openObject();
|
||||||
b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
|
b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
|
||||||
b.add(Utils::globalSuperstepKey, VPackValue(_globalSuperstep));
|
b.add(Utils::globalSuperstepKey, VPackValue(_globalSuperstep));
|
||||||
if (_aggregatorUsage->size() > 0) {
|
if (_aggregators->size() > 0) {
|
||||||
b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object));
|
b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object));
|
||||||
_aggregatorUsage->serializeValues(b);
|
_aggregators->serializeValues(b);
|
||||||
b.close();
|
b.close();
|
||||||
}
|
}
|
||||||
b.close();
|
b.close();
|
||||||
|
|
||||||
// reset values which are calculated during the superstep
|
// reset values which are calculated during the superstep
|
||||||
_aggregatorUsage->resetValues();
|
_aggregators->resetValues();
|
||||||
_workerStats.activeCount = 0;
|
_workerStats.activeCount = 0;
|
||||||
|
|
||||||
// first allow all workers to run worker level operations
|
// first allow all workers to run worker level operations
|
||||||
|
@ -128,11 +132,11 @@ void Conductor::finishedWorkerStartup(VPackSlice& data) {
|
||||||
LOG(WARN) << "We are not in a state where we expect a response";
|
LOG(WARN) << "We are not in a state where we expect a response";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
_ensureCorrectness(data);
|
_ensureUniqueResponse(data);
|
||||||
if (_respondedServers.size() != _dbServers.size()) {
|
if (_respondedServers.size() != _dbServers.size()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_startGlobalStep()) {
|
if (_startGlobalStep()) {
|
||||||
// listens for changing primary DBServers on each collection shard
|
// listens for changing primary DBServers on each collection shard
|
||||||
RecoveryManager* mngr = PregelFeature::instance()->recoveryManager();
|
RecoveryManager* mngr = PregelFeature::instance()->recoveryManager();
|
||||||
|
@ -142,31 +146,38 @@ void Conductor::finishedWorkerStartup(VPackSlice& data) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Conductor::finishedGlobalStep(VPackSlice& data) {
|
void Conductor::finishedWorkerStep(VPackSlice& data) {
|
||||||
MUTEX_LOCKER(guard, _callbackMutex);
|
MUTEX_LOCKER(guard, _callbackMutex);
|
||||||
|
// this method can be called multiple times in a superstep depending on
|
||||||
|
// whether we are in the async mode
|
||||||
uint64_t gss = data.get(Utils::globalSuperstepKey).getUInt();
|
uint64_t gss = data.get(Utils::globalSuperstepKey).getUInt();
|
||||||
if (gss != _globalSuperstep) {
|
if (gss != _globalSuperstep ||
|
||||||
|
!(_state == ExecutionState::RUNNING ||
|
||||||
|
_state == ExecutionState::CANCELED)) {
|
||||||
LOG(WARN) << "Conductor did received a callback from the wrong superstep";
|
LOG(WARN) << "Conductor did received a callback from the wrong superstep";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
_ensureCorrectness(data);
|
VPackSlice slice = data.get(Utils::gssDone);
|
||||||
|
bool gssDone = slice.isBool() && slice.getBool();
|
||||||
// collect worker information
|
if (!_asyncMode || gssDone) {
|
||||||
VPackSlice workerValues = data.get(Utils::aggregatorValuesKey);
|
_ensureUniqueResponse(data);
|
||||||
if (workerValues.isObject()) {
|
|
||||||
_aggregatorUsage->aggregateValues(workerValues);
|
// collect worker information
|
||||||
|
slice = data.get(Utils::aggregatorValuesKey);
|
||||||
|
if (slice.isObject()) {
|
||||||
|
_aggregators->aggregateValues(slice);
|
||||||
|
}
|
||||||
|
_workerStats.accumulate(data);
|
||||||
}
|
}
|
||||||
_workerStats.accumulate(data);
|
|
||||||
if (_respondedServers.size() != _dbServers.size()) {
|
if (_respondedServers.size() != _dbServers.size()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool proceed = true;
|
bool proceed = true;
|
||||||
if (_masterContext) { // ask algorithm to evaluate aggregated values
|
if (_masterContext) { // ask algorithm to evaluate aggregated values
|
||||||
proceed = _masterContext->postGlobalSuperstep(_globalSuperstep);
|
proceed = _masterContext->postGlobalSuperstep(_globalSuperstep);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG(INFO) << "Finished gss " << _globalSuperstep;
|
LOG(INFO) << "Finished gss " << _globalSuperstep;
|
||||||
_globalSuperstep++;
|
_globalSuperstep++;
|
||||||
|
|
||||||
|
@ -178,7 +189,7 @@ void Conductor::finishedGlobalStep(VPackSlice& data) {
|
||||||
proceed = proceed && _globalSuperstep <= 100;
|
proceed = proceed && _globalSuperstep <= 100;
|
||||||
|
|
||||||
if (proceed && !workersDone && _state == ExecutionState::RUNNING) {
|
if (proceed && !workersDone && _state == ExecutionState::RUNNING) {
|
||||||
_startGlobalStep();// trigger next superstep
|
_startGlobalStep(); // trigger next superstep
|
||||||
} else if (_state == ExecutionState::RUNNING ||
|
} else if (_state == ExecutionState::RUNNING ||
|
||||||
_state == ExecutionState::CANCELED) {
|
_state == ExecutionState::CANCELED) {
|
||||||
if (_state == ExecutionState::CANCELED) {
|
if (_state == ExecutionState::CANCELED) {
|
||||||
|
@ -195,7 +206,7 @@ void Conductor::finishedGlobalStep(VPackSlice& data) {
|
||||||
// tells workers to store / discard results
|
// tells workers to store / discard results
|
||||||
_finalizeWorkers();
|
_finalizeWorkers();
|
||||||
|
|
||||||
} else {// this prop shouldn't occur,
|
} else { // this prop shouldn't occur,
|
||||||
LOG(WARN) << "No further action taken after receiving all responses";
|
LOG(WARN) << "No further action taken after receiving all responses";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -206,12 +217,11 @@ void Conductor::finishedRecovery(VPackSlice& data) {
|
||||||
LOG(WARN) << "We are not in a state where we expect a recovery response";
|
LOG(WARN) << "We are not in a state where we expect a recovery response";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
_ensureCorrectness(data);
|
_ensureUniqueResponse(data);
|
||||||
if (_respondedServers.size() != _dbServers.size()) {
|
if (_respondedServers.size() != _dbServers.size()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (_algorithm->supportsCompensation()) {
|
if (_algorithm->supportsCompensation()) {
|
||||||
bool proceed = false;
|
bool proceed = false;
|
||||||
if (_masterContext) {
|
if (_masterContext) {
|
||||||
|
@ -222,15 +232,15 @@ void Conductor::finishedRecovery(VPackSlice& data) {
|
||||||
b.openObject();
|
b.openObject();
|
||||||
b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
|
b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
|
||||||
b.add(Utils::globalSuperstepKey, VPackValue(_globalSuperstep));
|
b.add(Utils::globalSuperstepKey, VPackValue(_globalSuperstep));
|
||||||
if (_aggregatorUsage->size() > 0) {
|
if (_aggregators->size() > 0) {
|
||||||
b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object));
|
b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object));
|
||||||
_aggregatorUsage->serializeValues(b);
|
_aggregators->serializeValues(b);
|
||||||
b.close();
|
b.close();
|
||||||
}
|
}
|
||||||
b.close();
|
b.close();
|
||||||
|
|
||||||
// reset values which are calculated during the superstep
|
// reset values which are calculated during the superstep
|
||||||
_aggregatorUsage->resetValues();
|
_aggregators->resetValues();
|
||||||
_workerStats.activeCount = 0;
|
_workerStats.activeCount = 0;
|
||||||
|
|
||||||
// first allow all workers to run worker level operations
|
// first allow all workers to run worker level operations
|
||||||
|
@ -248,8 +258,10 @@ void Conductor::finishedRecovery(VPackSlice& data) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Conductor::cancel() {
|
void Conductor::cancel() {
|
||||||
|
if (_state == ExecutionState::RUNNING ||
|
||||||
if (_state == ExecutionState::RUNNING || _state == ExecutionState::RECOVERING) {
|
_state == ExecutionState::RECOVERING) {
|
||||||
|
_state = ExecutionState::CANCELED;
|
||||||
|
|
||||||
VPackBuilder b;
|
VPackBuilder b;
|
||||||
b.openObject();
|
b.openObject();
|
||||||
b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
|
b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
|
||||||
|
@ -257,7 +269,6 @@ void Conductor::cancel() {
|
||||||
b.close();
|
b.close();
|
||||||
_sendToAllDBServers(Utils::cancelGSSPath, b.slice());
|
_sendToAllDBServers(Utils::cancelGSSPath, b.slice());
|
||||||
}
|
}
|
||||||
|
|
||||||
_state = ExecutionState::CANCELED;
|
_state = ExecutionState::CANCELED;
|
||||||
// stop monitoring shards
|
// stop monitoring shards
|
||||||
RecoveryManager* mngr = PregelFeature::instance()->recoveryManager();
|
RecoveryManager* mngr = PregelFeature::instance()->recoveryManager();
|
||||||
|
@ -296,7 +307,7 @@ void Conductor::startRecovery() {
|
||||||
cancel();
|
cancel();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
VPackBuilder b;
|
VPackBuilder b;
|
||||||
b.openObject();
|
b.openObject();
|
||||||
b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
|
b.add(Utils::executionNumberKey, VPackValue(_executionNumber));
|
||||||
|
@ -304,8 +315,9 @@ void Conductor::startRecovery() {
|
||||||
b.close();
|
b.close();
|
||||||
_dbServers = goodServers;
|
_dbServers = goodServers;
|
||||||
_sendToAllDBServers(Utils::cancelGSSPath, b.slice());
|
_sendToAllDBServers(Utils::cancelGSSPath, b.slice());
|
||||||
|
usleep(5 * 1000000);// workers may need a little bit
|
||||||
|
|
||||||
|
// Let's try recovery
|
||||||
if (_algorithm->supportsCompensation()) {
|
if (_algorithm->supportsCompensation()) {
|
||||||
if (_masterContext) {
|
if (_masterContext) {
|
||||||
_masterContext->preCompensation(_globalSuperstep);
|
_masterContext->preCompensation(_globalSuperstep);
|
||||||
|
@ -314,13 +326,13 @@ void Conductor::startRecovery() {
|
||||||
VPackBuilder b;
|
VPackBuilder b;
|
||||||
b.openObject();
|
b.openObject();
|
||||||
b.add(Utils::recoveryMethodKey, VPackValue(Utils::compensate));
|
b.add(Utils::recoveryMethodKey, VPackValue(Utils::compensate));
|
||||||
if (_aggregatorUsage->size() > 0) {
|
if (_aggregators->size() > 0) {
|
||||||
b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object));
|
b.add(Utils::aggregatorValuesKey, VPackValue(VPackValueType::Object));
|
||||||
_aggregatorUsage->serializeValues(b);
|
_aggregators->serializeValues(b);
|
||||||
b.close();
|
b.close();
|
||||||
}
|
}
|
||||||
b.close();
|
b.close();
|
||||||
_aggregatorUsage->resetValues();
|
_aggregators->resetValues();
|
||||||
_workerStats.activeCount = 0;
|
_workerStats.activeCount = 0;
|
||||||
|
|
||||||
// initialize workers will reconfigure the workers and set the
|
// initialize workers will reconfigure the workers and set the
|
||||||
|
@ -396,7 +408,7 @@ int Conductor::_initializeWorkers(std::string const& suffix,
|
||||||
if (_masterContext && _masterContext->_vertexCount == 0) {
|
if (_masterContext && _masterContext->_vertexCount == 0) {
|
||||||
_masterContext->_vertexCount = vertexCount;
|
_masterContext->_vertexCount = vertexCount;
|
||||||
_masterContext->_edgeCount = edgeCount;
|
_masterContext->_edgeCount = edgeCount;
|
||||||
_masterContext->_aggregators = _aggregatorUsage.get();
|
_masterContext->_aggregators = _aggregators.get();
|
||||||
_masterContext->preApplication();
|
_masterContext->preApplication();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -517,7 +529,7 @@ int Conductor::_sendToAllDBServers(std::string const& suffix,
|
||||||
return nrGood == requests.size() ? TRI_ERROR_NO_ERROR : TRI_ERROR_FAILED;
|
return nrGood == requests.size() ? TRI_ERROR_NO_ERROR : TRI_ERROR_FAILED;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Conductor::_ensureCorrectness(VPackSlice body) {
|
void Conductor::_ensureUniqueResponse(VPackSlice body) {
|
||||||
// check if this the only time we received this
|
// check if this the only time we received this
|
||||||
ServerID sender = body.get(Utils::senderKey).copyString();
|
ServerID sender = body.get(Utils::senderKey).copyString();
|
||||||
if (_respondedServers.find(sender) != _respondedServers.end()) {
|
if (_respondedServers.find(sender) != _respondedServers.end()) {
|
||||||
|
|
|
@ -35,55 +35,61 @@ namespace arangodb {
|
||||||
class RestPregelHandler;
|
class RestPregelHandler;
|
||||||
namespace pregel {
|
namespace pregel {
|
||||||
|
|
||||||
enum ExecutionState { DEFAULT,// before calling start
|
enum ExecutionState {
|
||||||
RUNNING,// during normal operation
|
DEFAULT, // before calling start
|
||||||
DONE,// after everyting is done
|
RUNNING, // during normal operation
|
||||||
CANCELED,// after an error or manual canceling
|
DONE, // after everyting is done
|
||||||
RECOVERING// during recovery
|
CANCELED, // after an error or manual canceling
|
||||||
|
RECOVERING // during recovery
|
||||||
};
|
};
|
||||||
|
|
||||||
class MasterContext;
|
class MasterContext;
|
||||||
class AggregatorUsage;
|
class AggregatorHandler;
|
||||||
struct IAlgorithm;
|
struct IAlgorithm;
|
||||||
|
|
||||||
class Conductor {
|
class Conductor {
|
||||||
friend class arangodb::RestPregelHandler;
|
friend class arangodb::RestPregelHandler;
|
||||||
|
|
||||||
ExecutionState _state = ExecutionState::DEFAULT;
|
ExecutionState _state = ExecutionState::DEFAULT;
|
||||||
const VocbaseGuard _vocbaseGuard;
|
const VocbaseGuard _vocbaseGuard;
|
||||||
const uint64_t _executionNumber;
|
const uint64_t _executionNumber;
|
||||||
std::unique_ptr<IAlgorithm> _algorithm;
|
std::unique_ptr<IAlgorithm> _algorithm;
|
||||||
VPackBuilder _userParams;
|
VPackBuilder _userParams;
|
||||||
Mutex _callbackMutex; // prevents concurrent calls to finishedGlobalStep
|
Mutex _callbackMutex; // prevents concurrent calls to finishedGlobalStep
|
||||||
|
|
||||||
std::vector<std::shared_ptr<LogicalCollection>> _vertexCollections;
|
std::vector<std::shared_ptr<LogicalCollection>> _vertexCollections;
|
||||||
std::vector<std::shared_ptr<LogicalCollection>> _edgeCollections;
|
std::vector<std::shared_ptr<LogicalCollection>> _edgeCollections;
|
||||||
std::vector<ServerID> _dbServers;
|
std::vector<ServerID> _dbServers;
|
||||||
|
|
||||||
// initialized on startup
|
// initialized on startup
|
||||||
std::unique_ptr<AggregatorUsage> _aggregatorUsage;
|
std::unique_ptr<AggregatorHandler> _aggregators;
|
||||||
std::unique_ptr<MasterContext> _masterContext;
|
std::unique_ptr<MasterContext> _masterContext;
|
||||||
|
/// some tracking info
|
||||||
double _startTimeSecs = 0, _endTimeSecs = 0;
|
double _startTimeSecs = 0, _endTimeSecs = 0;
|
||||||
uint64_t _globalSuperstep = 0;
|
uint64_t _globalSuperstep = 0;
|
||||||
|
/// tracks the servers which responded, only used for stages where we expect an
|
||||||
|
/// unique response, not necessarily during the async mode
|
||||||
std::set<ServerID> _respondedServers;
|
std::set<ServerID> _respondedServers;
|
||||||
|
bool _asyncMode = false;
|
||||||
|
/// persistent tracking of active vertices, send messages, runtimes
|
||||||
WorkerStats _workerStats;
|
WorkerStats _workerStats;
|
||||||
|
|
||||||
bool _startGlobalStep();
|
bool _startGlobalStep();
|
||||||
int _initializeWorkers(std::string const& suffix, VPackSlice additional);
|
int _initializeWorkers(std::string const& suffix, VPackSlice additional);
|
||||||
int _finalizeWorkers();
|
int _finalizeWorkers();
|
||||||
int _sendToAllDBServers(std::string const& suffix, VPackSlice const& message);
|
int _sendToAllDBServers(std::string const& suffix, VPackSlice const& message);
|
||||||
void _ensureCorrectness(VPackSlice body);
|
void _ensureUniqueResponse(VPackSlice body);
|
||||||
|
|
||||||
// === REST callbacks ===
|
// === REST callbacks ===
|
||||||
void finishedWorkerStartup(VPackSlice& data);
|
void finishedWorkerStartup(VPackSlice& data);
|
||||||
void finishedGlobalStep(VPackSlice& data);
|
void finishedWorkerStep(VPackSlice& data);
|
||||||
void finishedRecovery(VPackSlice& data);
|
void finishedRecovery(VPackSlice& data);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Conductor(uint64_t executionNumber, TRI_vocbase_t* vocbase,
|
Conductor(
|
||||||
std::vector<std::shared_ptr<LogicalCollection>> const& vertexCollections,
|
uint64_t executionNumber, TRI_vocbase_t* vocbase,
|
||||||
std::vector<std::shared_ptr<LogicalCollection>> const& edgeCollections);
|
std::vector<std::shared_ptr<LogicalCollection>> const& vertexCollections,
|
||||||
|
std::vector<std::shared_ptr<LogicalCollection>> const& edgeCollections);
|
||||||
~Conductor();
|
~Conductor();
|
||||||
|
|
||||||
void start(std::string const& algoName, VPackSlice userConfig);
|
void start(std::string const& algoName, VPackSlice userConfig);
|
||||||
|
@ -91,10 +97,11 @@ class Conductor {
|
||||||
void startRecovery();
|
void startRecovery();
|
||||||
|
|
||||||
ExecutionState getState() const { return _state; }
|
ExecutionState getState() const { return _state; }
|
||||||
WorkerStats workerStats() const {return _workerStats;}
|
WorkerStats workerStats() const { return _workerStats; }
|
||||||
uint64_t globalSuperstep() const {return _globalSuperstep;}
|
uint64_t globalSuperstep() const { return _globalSuperstep; }
|
||||||
double totalRuntimeSecs() {
|
double totalRuntimeSecs() {
|
||||||
return _endTimeSecs == 0 ? TRI_microtime() - _startTimeSecs : _endTimeSecs - _startTimeSecs;
|
return _endTimeSecs == 0 ? TRI_microtime() - _startTimeSecs
|
||||||
|
: _endTimeSecs - _startTimeSecs;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,7 @@ namespace arangodb {
|
||||||
namespace pregel {
|
namespace pregel {
|
||||||
|
|
||||||
typedef uint16_t prgl_shard_t;
|
typedef uint16_t prgl_shard_t;
|
||||||
|
|
||||||
/// @brief header entry for the edge file
|
/// @brief header entry for the edge file
|
||||||
template <typename E>
|
template <typename E>
|
||||||
class Edge {
|
class Edge {
|
||||||
|
@ -35,15 +35,11 @@ class Edge {
|
||||||
prgl_shard_t _targetShard;
|
prgl_shard_t _targetShard;
|
||||||
std::string _toKey;
|
std::string _toKey;
|
||||||
E _data;
|
E _data;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// EdgeEntry() : _nextEntryOffset(0), _dataSize(0), _vertexIDSize(0) {}
|
// EdgeEntry() : _nextEntryOffset(0), _dataSize(0), _vertexIDSize(0) {}
|
||||||
Edge(prgl_shard_t source,
|
Edge(prgl_shard_t source, prgl_shard_t target, std::string const& key)
|
||||||
prgl_shard_t target,
|
: _sourceShard(source), _targetShard(target), _toKey(key) {}
|
||||||
std::string const& key)
|
|
||||||
: _sourceShard(source),
|
|
||||||
_targetShard(target),
|
|
||||||
_toKey(key) {}
|
|
||||||
|
|
||||||
// size_t getSize() { return sizeof(EdgeEntry) + _vertexIDSize + _dataSize; }
|
// size_t getSize() { return sizeof(EdgeEntry) + _vertexIDSize + _dataSize; }
|
||||||
std::string const& toKey() const { return _toKey; }
|
std::string const& toKey() const { return _toKey; }
|
||||||
|
@ -51,19 +47,15 @@ class Edge {
|
||||||
inline E* data() {
|
inline E* data() {
|
||||||
return &_data; // static_cast<E>(this + sizeof(EdgeEntry) + _vertexIDSize);
|
return &_data; // static_cast<E>(this + sizeof(EdgeEntry) + _vertexIDSize);
|
||||||
}
|
}
|
||||||
inline prgl_shard_t sourceShard() const {
|
inline prgl_shard_t sourceShard() const { return _sourceShard; }
|
||||||
return _sourceShard;
|
inline prgl_shard_t targetShard() const { return _targetShard; }
|
||||||
}
|
|
||||||
inline prgl_shard_t targetShard() const {
|
|
||||||
return _targetShard;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class VertexEntry {
|
class VertexEntry {
|
||||||
template <typename V, typename E>
|
template <typename V, typename E>
|
||||||
friend class GraphStore;
|
friend class GraphStore;
|
||||||
|
|
||||||
const prgl_shard_t _shard;// TODO optimize and remove
|
const prgl_shard_t _shard; // TODO optimize and remove
|
||||||
const std::string _key;
|
const std::string _key;
|
||||||
size_t _vertexDataOffset; // size_t vertexID length
|
size_t _vertexDataOffset; // size_t vertexID length
|
||||||
size_t _edgeDataOffset;
|
size_t _edgeDataOffset;
|
||||||
|
@ -85,14 +77,14 @@ class VertexEntry {
|
||||||
inline size_t getSize() { return sizeof(VertexEntry); }
|
inline size_t getSize() { return sizeof(VertexEntry); }
|
||||||
inline bool active() const { return _active; }
|
inline bool active() const { return _active; }
|
||||||
inline void setActive(bool bb) { _active = bb; }
|
inline void setActive(bool bb) { _active = bb; }
|
||||||
|
|
||||||
inline prgl_shard_t shard() const {return _shard;}
|
inline prgl_shard_t shard() const { return _shard; }
|
||||||
inline std::string const& key() const { return _key; };
|
inline std::string const& key() const { return _key; };
|
||||||
/*std::string const& key() const {
|
/*std::string const& key() const {
|
||||||
return std::string(_key, _keySize);
|
return std::string(_key, _keySize);
|
||||||
};*/
|
};*/
|
||||||
};
|
};
|
||||||
|
|
||||||
// unused right now
|
// unused right now
|
||||||
/*class LinkedListIterator {
|
/*class LinkedListIterator {
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -70,7 +70,7 @@ class IntegerGraphFormat : public GraphFormat<int64_t, int64_t> {
|
||||||
int64_t readVertexData(const void* ptr) override { return *((int64_t*)ptr); }
|
int64_t readVertexData(const void* ptr) override { return *((int64_t*)ptr); }
|
||||||
int64_t readEdgeData(const void* ptr) override { return *((int64_t*)ptr); }
|
int64_t readEdgeData(const void* ptr) override { return *((int64_t*)ptr); }
|
||||||
|
|
||||||
size_t copyVertexData(arangodb::velocypack::Slice document, void* targetPtr,
|
size_t copyVertexData(arangodb::velocypack::Slice document, void* targetPtr,
|
||||||
size_t maxSize) override {
|
size_t maxSize) override {
|
||||||
arangodb::velocypack::Slice val = document.get(_sourceField);
|
arangodb::velocypack::Slice val = document.get(_sourceField);
|
||||||
*((int64_t*)targetPtr) = val.isInteger() ? val.getInt() : _vDefault;
|
*((int64_t*)targetPtr) = val.isInteger() ? val.getInt() : _vDefault;
|
||||||
|
@ -84,19 +84,19 @@ class IntegerGraphFormat : public GraphFormat<int64_t, int64_t> {
|
||||||
return sizeof(int64_t);
|
return sizeof(int64_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
void buildVertexDocument(arangodb::velocypack::Builder& b, const void* targetPtr,
|
void buildVertexDocument(arangodb::velocypack::Builder& b,
|
||||||
size_t size) override {
|
const void* targetPtr, size_t size) override {
|
||||||
b.add(_resultField, VPackValue(readVertexData(targetPtr)));
|
b.add(_resultField, VPackValue(readVertexData(targetPtr)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void buildEdgeDocument(arangodb::velocypack::Builder& b, const void* targetPtr,
|
void buildEdgeDocument(arangodb::velocypack::Builder& b,
|
||||||
size_t size) override {
|
const void* targetPtr, size_t size) override {
|
||||||
b.add(_resultField, VPackValue(readEdgeData(targetPtr)));
|
b.add(_resultField, VPackValue(readEdgeData(targetPtr)));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class FloatGraphFormat : public GraphFormat<float, float> {
|
class FloatGraphFormat : public GraphFormat<float, float> {
|
||||||
protected:
|
protected:
|
||||||
const std::string _sourceField, _resultField;
|
const std::string _sourceField, _resultField;
|
||||||
const float _vDefault, _eDefault;
|
const float _vDefault, _eDefault;
|
||||||
|
|
||||||
|
@ -125,13 +125,13 @@ protected:
|
||||||
return sizeof(float);
|
return sizeof(float);
|
||||||
}
|
}
|
||||||
|
|
||||||
void buildVertexDocument(arangodb::velocypack::Builder& b, const void* targetPtr,
|
void buildVertexDocument(arangodb::velocypack::Builder& b,
|
||||||
size_t size) override {
|
const void* targetPtr, size_t size) override {
|
||||||
b.add(_resultField, VPackValue(readVertexData(targetPtr)));
|
b.add(_resultField, VPackValue(readVertexData(targetPtr)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void buildEdgeDocument(arangodb::velocypack::Builder& b, const void* targetPtr,
|
void buildEdgeDocument(arangodb::velocypack::Builder& b,
|
||||||
size_t size) override {
|
const void* targetPtr, size_t size) override {
|
||||||
b.add(_resultField, VPackValue(readEdgeData(targetPtr)));
|
b.add(_resultField, VPackValue(readEdgeData(targetPtr)));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -145,7 +145,7 @@ protected:
|
||||||
const std::string _sourceField, _resultField;
|
const std::string _sourceField, _resultField;
|
||||||
const V _vDefault;
|
const V _vDefault;
|
||||||
const E _eDefault;
|
const E _eDefault;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
NumberGraphFormat(std::string const& source, std::string const& result,
|
NumberGraphFormat(std::string const& source, std::string const& result,
|
||||||
V vertexNull, E edgeNull)
|
V vertexNull, E edgeNull)
|
||||||
|
@ -153,10 +153,10 @@ public:
|
||||||
_resultField(result),
|
_resultField(result),
|
||||||
_vDefault(vertexNull),
|
_vDefault(vertexNull),
|
||||||
_eDefault(edgeNull) {}
|
_eDefault(edgeNull) {}
|
||||||
|
|
||||||
V readVertexData(void* ptr) override { return *((V*)ptr); }
|
V readVertexData(void* ptr) override { return *((V*)ptr); }
|
||||||
E readEdgeData(void* ptr) override { return *((E*)ptr); }
|
E readEdgeData(void* ptr) override { return *((E*)ptr); }
|
||||||
|
|
||||||
size_t copyVertexData(arangodb::velocypack::Slice document, void* targetPtr,
|
size_t copyVertexData(arangodb::velocypack::Slice document, void* targetPtr,
|
||||||
size_t maxSize) override {
|
size_t maxSize) override {
|
||||||
arangodb::velocypack::Slice val = document.get(_sourceField);
|
arangodb::velocypack::Slice val = document.get(_sourceField);
|
||||||
|
@ -171,7 +171,7 @@ public:
|
||||||
}
|
}
|
||||||
return sizeof(V);
|
return sizeof(V);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t copyEdgeData(arangodb::velocypack::Slice document, void* targetPtr,
|
size_t copyEdgeData(arangodb::velocypack::Slice document, void* targetPtr,
|
||||||
size_t maxSize) override {
|
size_t maxSize) override {
|
||||||
arangodb::velocypack::Slice val = document.get(_sourceField);
|
arangodb::velocypack::Slice val = document.get(_sourceField);
|
||||||
|
@ -186,13 +186,15 @@ public:
|
||||||
}
|
}
|
||||||
return sizeof(E);
|
return sizeof(E);
|
||||||
}
|
}
|
||||||
|
|
||||||
void buildVertexDocument(arangodb::velocypack::Builder& b, const void* targetPtr,
|
void buildVertexDocument(arangodb::velocypack::Builder& b, const void*
|
||||||
|
targetPtr,
|
||||||
size_t size) override {
|
size_t size) override {
|
||||||
b.add(_resultField, VPackValue(readVertexData(targetPtr)));
|
b.add(_resultField, VPackValue(readVertexData(targetPtr)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void buildEdgeDocument(arangodb::velocypack::Builder& b, const void* targetPtr,
|
void buildEdgeDocument(arangodb::velocypack::Builder& b, const void*
|
||||||
|
targetPtr,
|
||||||
size_t size) override {
|
size_t size) override {
|
||||||
b.add(_resultField, VPackValue(readEdgeData(targetPtr)));
|
b.add(_resultField, VPackValue(readEdgeData(targetPtr)));
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,16 +43,8 @@ using namespace arangodb;
|
||||||
using namespace arangodb::pregel;
|
using namespace arangodb::pregel;
|
||||||
|
|
||||||
template <typename V, typename E>
|
template <typename V, typename E>
|
||||||
GraphStore<V, E>::GraphStore(TRI_vocbase_t* vb, WorkerState const& state,
|
GraphStore<V, E>::GraphStore(TRI_vocbase_t* vb, GraphFormat<V, E>* graphFormat)
|
||||||
GraphFormat<V, E>* graphFormat)
|
: _vocbaseGuard(vb), _graphFormat(graphFormat) {}
|
||||||
: _vocbaseGuard(vb), _graphFormat(graphFormat) {
|
|
||||||
// _edgeCollection = ClusterInfo::instance()->getCollection(
|
|
||||||
// vb->name(), state->edgeCollectionPlanId());
|
|
||||||
|
|
||||||
loadShards(state);
|
|
||||||
LOG(INFO) << "Loaded " << _index.size() << "vertices and " << _edges.size()
|
|
||||||
<< " edges";
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename V, typename E>
|
template <typename V, typename E>
|
||||||
GraphStore<V, E>::~GraphStore() {
|
GraphStore<V, E>::~GraphStore() {
|
||||||
|
@ -61,22 +53,7 @@ GraphStore<V, E>::~GraphStore() {
|
||||||
|
|
||||||
template <typename V, typename E>
|
template <typename V, typename E>
|
||||||
void GraphStore<V, E>::loadShards(WorkerState const& state) {
|
void GraphStore<V, E>::loadShards(WorkerState const& state) {
|
||||||
std::vector<std::string> readColls, writeColls;
|
_createReadTransaction(state);
|
||||||
for (auto shard : state.localVertexShardIDs()) {
|
|
||||||
readColls.push_back(shard);
|
|
||||||
}
|
|
||||||
for (auto shard : state.localEdgeShardIDs()) {
|
|
||||||
readColls.push_back(shard);
|
|
||||||
}
|
|
||||||
double lockTimeout =
|
|
||||||
(double)(TRI_TRANSACTION_DEFAULT_LOCK_TIMEOUT / 1000000ULL);
|
|
||||||
_transaction = new ExplicitTransaction(
|
|
||||||
StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()), readColls,
|
|
||||||
writeColls, lockTimeout, false, false);
|
|
||||||
int res = _transaction->begin();
|
|
||||||
if (res != TRI_ERROR_NO_ERROR) {
|
|
||||||
THROW_ARANGO_EXCEPTION(res);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::map<CollectionID, std::vector<ShardID>> const& vertexMap =
|
std::map<CollectionID, std::vector<ShardID>> const& vertexMap =
|
||||||
state.vertexCollectionShards();
|
state.vertexCollectionShards();
|
||||||
|
@ -108,6 +85,50 @@ void GraphStore<V, E>::loadShards(WorkerState const& state) {
|
||||||
_cleanupTransactions();
|
_cleanupTransactions();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename V, typename E>
|
||||||
|
void GraphStore<V, E>::loadDocument(WorkerState const& state,
|
||||||
|
ShardID const& shard,
|
||||||
|
std::string const& _key) {
|
||||||
|
/*if (_readTrx == nullptr) {
|
||||||
|
_createReadTransaction(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
prgl_shard_t sourceShard = (prgl_shard_t)state.shardId(shard);
|
||||||
|
bool storeData = _graphFormat->storesVertexData();
|
||||||
|
|
||||||
|
VPackBuilder builder;
|
||||||
|
builder.openObject();
|
||||||
|
builder.add(StaticStrings::KeyString, VPackValue(_key));
|
||||||
|
builder.close();
|
||||||
|
|
||||||
|
OperationOptions options;
|
||||||
|
options.ignoreRevs = false;
|
||||||
|
|
||||||
|
TRI_voc_cid_t cid = _readTrx->addCollectionAtRuntime(shard);
|
||||||
|
_readTrx->orderDitch(cid); // will throw when it fails
|
||||||
|
OperationResult opResult = _readTrx->document(shard, builder.slice(),
|
||||||
|
options);
|
||||||
|
if (!opResult.successful()) {
|
||||||
|
_cleanupTransactions();
|
||||||
|
THROW_ARANGO_EXCEPTION(opResult.code);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
VertexEntry entry(sourceShard, _key);
|
||||||
|
if (storeData) {
|
||||||
|
V vertexData;
|
||||||
|
size_t size =
|
||||||
|
_graphFormat->copyVertexData(opResult.slice(), &vertexData, sizeof(V));
|
||||||
|
if (size > 0) {
|
||||||
|
entry._vertexDataOffset = _vertexData.size();
|
||||||
|
_vertexData.push_back(vertexData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::string documentId = _readTrx->extractIdString(opResult.slice());
|
||||||
|
_loadEdges(state, edgeShard, entry, documentId);
|
||||||
|
_index.push_back(entry);*/
|
||||||
|
}
|
||||||
|
|
||||||
template <typename V, typename E>
|
template <typename V, typename E>
|
||||||
RangeIterator<VertexEntry> GraphStore<V, E>::vertexIterator() {
|
RangeIterator<VertexEntry> GraphStore<V, E>::vertexIterator() {
|
||||||
return vertexIterator(0, _index.size());
|
return vertexIterator(0, _index.size());
|
||||||
|
@ -144,16 +165,36 @@ RangeIterator<Edge<E>> GraphStore<V, E>::edgeIterator(
|
||||||
return RangeIterator<Edge<E>>(_edges, entry->_edgeDataOffset, end);
|
return RangeIterator<Edge<E>>(_edges, entry->_edgeDataOffset, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename V, typename E>
|
||||||
|
void GraphStore<V, E>::_createReadTransaction(WorkerState const& state) {
|
||||||
|
std::vector<std::string> readColls, writeColls;
|
||||||
|
for (auto shard : state.localVertexShardIDs()) {
|
||||||
|
readColls.push_back(shard);
|
||||||
|
}
|
||||||
|
for (auto shard : state.localEdgeShardIDs()) {
|
||||||
|
readColls.push_back(shard);
|
||||||
|
}
|
||||||
|
double lockTimeout =
|
||||||
|
(double)(TRI_TRANSACTION_DEFAULT_LOCK_TIMEOUT / 1000000ULL);
|
||||||
|
_readTrx = new ExplicitTransaction(
|
||||||
|
StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()), readColls,
|
||||||
|
writeColls, lockTimeout, false, false);
|
||||||
|
int res = _readTrx->begin();
|
||||||
|
if (res != TRI_ERROR_NO_ERROR) {
|
||||||
|
THROW_ARANGO_EXCEPTION(res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename V, typename E>
|
template <typename V, typename E>
|
||||||
void GraphStore<V, E>::_cleanupTransactions() {
|
void GraphStore<V, E>::_cleanupTransactions() {
|
||||||
if (_transaction) {
|
if (_readTrx) {
|
||||||
if (_transaction->getStatus() == TRI_TRANSACTION_RUNNING) {
|
if (_readTrx->getStatus() == TRI_TRANSACTION_RUNNING) {
|
||||||
if (_transaction->commit() != TRI_ERROR_NO_ERROR) {
|
if (_readTrx->commit() != TRI_ERROR_NO_ERROR) {
|
||||||
LOG(WARN) << "Pregel worker: Failed to commit on a read transaction";
|
LOG(WARN) << "Pregel worker: Failed to commit on a read transaction";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
delete _transaction;
|
delete _readTrx;
|
||||||
_transaction = nullptr;
|
_readTrx = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -164,18 +205,18 @@ void GraphStore<V, E>::_loadVertices(WorkerState const& state,
|
||||||
//_graphFormat->willUseCollection(vocbase, vertexShard, false);
|
//_graphFormat->willUseCollection(vocbase, vertexShard, false);
|
||||||
bool storeData = _graphFormat->storesVertexData();
|
bool storeData = _graphFormat->storesVertexData();
|
||||||
|
|
||||||
TRI_voc_cid_t cid = _transaction->addCollectionAtRuntime(vertexShard);
|
TRI_voc_cid_t cid = _readTrx->addCollectionAtRuntime(vertexShard);
|
||||||
_transaction->orderDitch(cid); // will throw when it fails
|
_readTrx->orderDitch(cid); // will throw when it fails
|
||||||
prgl_shard_t sourceShard = (prgl_shard_t)state.shardId(vertexShard);
|
prgl_shard_t sourceShard = (prgl_shard_t)state.shardId(vertexShard);
|
||||||
|
|
||||||
/*int res = _transaction->lockRead();
|
/*int res = _readTrx->lockRead();
|
||||||
if (res != TRI_ERROR_NO_ERROR) {
|
if (res != TRI_ERROR_NO_ERROR) {
|
||||||
THROW_ARANGO_EXCEPTION_FORMAT(res, "while looking up vertices '%s'",
|
THROW_ARANGO_EXCEPTION_FORMAT(res, "while looking up vertices '%s'",
|
||||||
vertexShard.c_str());
|
vertexShard.c_str());
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
ManagedDocumentResult mmdr(_transaction);
|
ManagedDocumentResult mmdr(_readTrx);
|
||||||
std::unique_ptr<OperationCursor> cursor = _transaction->indexScan(
|
std::unique_ptr<OperationCursor> cursor = _readTrx->indexScan(
|
||||||
vertexShard, Transaction::CursorType::ALL, Transaction::IndexHandle(), {},
|
vertexShard, Transaction::CursorType::ALL, Transaction::IndexHandle(), {},
|
||||||
&mmdr, 0, UINT64_MAX, 1000, false);
|
&mmdr, 0, UINT64_MAX, 1000, false);
|
||||||
|
|
||||||
|
@ -192,13 +233,13 @@ void GraphStore<V, E>::_loadVertices(WorkerState const& state,
|
||||||
cursor->getMoreMptr(result, 1000);
|
cursor->getMoreMptr(result, 1000);
|
||||||
for (auto const& element : result) {
|
for (auto const& element : result) {
|
||||||
TRI_voc_rid_t revisionId = element.revisionId();
|
TRI_voc_rid_t revisionId = element.revisionId();
|
||||||
if (collection->readRevision(_transaction, mmdr, revisionId)) {
|
if (collection->readRevision(_readTrx, mmdr, revisionId)) {
|
||||||
VPackSlice document(mmdr.vpack());
|
VPackSlice document(mmdr.vpack());
|
||||||
if (document.isExternal()) {
|
if (document.isExternal()) {
|
||||||
document = document.resolveExternal();
|
document = document.resolveExternal();
|
||||||
}
|
}
|
||||||
|
|
||||||
//LOG(INFO) << "Loaded Vertex: " << document.toJson();
|
// LOG(INFO) << "Loaded Vertex: " << document.toJson();
|
||||||
std::string key = document.get(StaticStrings::KeyString).copyString();
|
std::string key = document.get(StaticStrings::KeyString).copyString();
|
||||||
|
|
||||||
VertexEntry entry(sourceShard, key);
|
VertexEntry entry(sourceShard, key);
|
||||||
|
@ -214,7 +255,7 @@ void GraphStore<V, E>::_loadVertices(WorkerState const& state,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string documentId = _transaction->extractIdString(document);
|
std::string documentId = _readTrx->extractIdString(document);
|
||||||
_loadEdges(state, edgeShard, entry, documentId);
|
_loadEdges(state, edgeShard, entry, documentId);
|
||||||
_index.push_back(entry);
|
_index.push_back(entry);
|
||||||
}
|
}
|
||||||
|
@ -230,10 +271,10 @@ void GraphStore<V, E>::_loadEdges(WorkerState const& state,
|
||||||
const bool storeData = _graphFormat->storesEdgeData();
|
const bool storeData = _graphFormat->storesEdgeData();
|
||||||
|
|
||||||
// Transaction* trx = readTransaction(shard);
|
// Transaction* trx = readTransaction(shard);
|
||||||
traverser::EdgeCollectionInfo info(_transaction, edgeShard, TRI_EDGE_OUT,
|
traverser::EdgeCollectionInfo info(_readTrx, edgeShard, TRI_EDGE_OUT,
|
||||||
StaticStrings::FromString, 0);
|
StaticStrings::FromString, 0);
|
||||||
|
|
||||||
ManagedDocumentResult mmdr(_transaction);
|
ManagedDocumentResult mmdr(_readTrx);
|
||||||
auto cursor = info.getEdges(documentID, &mmdr);
|
auto cursor = info.getEdges(documentID, &mmdr);
|
||||||
if (cursor->failed()) {
|
if (cursor->failed()) {
|
||||||
THROW_ARANGO_EXCEPTION_FORMAT(cursor->code,
|
THROW_ARANGO_EXCEPTION_FORMAT(cursor->code,
|
||||||
|
@ -253,7 +294,7 @@ void GraphStore<V, E>::_loadEdges(WorkerState const& state,
|
||||||
cursor->getMoreMptr(result, 1000);
|
cursor->getMoreMptr(result, 1000);
|
||||||
for (auto const& element : result) {
|
for (auto const& element : result) {
|
||||||
TRI_voc_rid_t revisionId = element.revisionId();
|
TRI_voc_rid_t revisionId = element.revisionId();
|
||||||
if (collection->readRevision(_transaction, mmdr, revisionId)) {
|
if (collection->readRevision(_readTrx, mmdr, revisionId)) {
|
||||||
VPackSlice document(mmdr.vpack());
|
VPackSlice document(mmdr.vpack());
|
||||||
if (document.isExternal()) {
|
if (document.isExternal()) {
|
||||||
document = document.resolveExternal();
|
document = document.resolveExternal();
|
||||||
|
@ -262,7 +303,7 @@ void GraphStore<V, E>::_loadEdges(WorkerState const& state,
|
||||||
// ====== actual loading ======
|
// ====== actual loading ======
|
||||||
vertexEntry._edgeCount += 1;
|
vertexEntry._edgeCount += 1;
|
||||||
|
|
||||||
//LOG(INFO) << "Loaded Edge: " << document.toJson();
|
// LOG(INFO) << "Loaded Edge: " << document.toJson();
|
||||||
std::string toValue =
|
std::string toValue =
|
||||||
document.get(StaticStrings::ToString).copyString();
|
document.get(StaticStrings::ToString).copyString();
|
||||||
|
|
||||||
|
@ -301,28 +342,6 @@ void GraphStore<V, E>::_loadEdges(WorkerState const& state,
|
||||||
}*/
|
}*/
|
||||||
}
|
}
|
||||||
|
|
||||||
/*template <typename V, typename E>
|
|
||||||
SingleCollectionTransaction* GraphStore<V, E>::writeTransaction(ShardID const&
|
|
||||||
shard) {
|
|
||||||
|
|
||||||
auto it = _transactions.find(shard);
|
|
||||||
if (it != _transactions.end()) {
|
|
||||||
return it->second;
|
|
||||||
} else {
|
|
||||||
auto trx = std::make_unique<SingleCollectionTransaction>(
|
|
||||||
StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()),
|
|
||||||
shard,
|
|
||||||
TRI_TRANSACTION_WRITE);
|
|
||||||
int res = trx->begin();
|
|
||||||
if (res != TRI_ERROR_NO_ERROR) {
|
|
||||||
THROW_ARANGO_EXCEPTION_FORMAT(res, "during transaction of shard '%s'",
|
|
||||||
shard.c_str());
|
|
||||||
}
|
|
||||||
_transactions[shard] = trx.get();
|
|
||||||
return trx.release();
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
|
|
||||||
template <typename V, typename E>
|
template <typename V, typename E>
|
||||||
void GraphStore<V, E>::storeResults(WorkerState const& state) {
|
void GraphStore<V, E>::storeResults(WorkerState const& state) {
|
||||||
std::vector<std::string> readColls, writeColls;
|
std::vector<std::string> readColls, writeColls;
|
||||||
|
@ -337,10 +356,10 @@ void GraphStore<V, E>::storeResults(WorkerState const& state) {
|
||||||
//}
|
//}
|
||||||
double lockTimeout =
|
double lockTimeout =
|
||||||
(double)(TRI_TRANSACTION_DEFAULT_LOCK_TIMEOUT / 1000000ULL);
|
(double)(TRI_TRANSACTION_DEFAULT_LOCK_TIMEOUT / 1000000ULL);
|
||||||
_transaction = new ExplicitTransaction(
|
ExplicitTransaction writeTrx(
|
||||||
StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()), readColls,
|
StandaloneTransactionContext::Create(_vocbaseGuard.vocbase()), readColls,
|
||||||
writeColls, lockTimeout, false, false);
|
writeColls, lockTimeout, false, false);
|
||||||
int res = _transaction->begin();
|
int res = writeTrx.begin();
|
||||||
if (res != TRI_ERROR_NO_ERROR) {
|
if (res != TRI_ERROR_NO_ERROR) {
|
||||||
THROW_ARANGO_EXCEPTION(res);
|
THROW_ARANGO_EXCEPTION(res);
|
||||||
}
|
}
|
||||||
|
@ -356,14 +375,17 @@ void GraphStore<V, E>::storeResults(WorkerState const& state) {
|
||||||
_graphFormat->buildVertexDocument(b, data, sizeof(V));
|
_graphFormat->buildVertexDocument(b, data, sizeof(V));
|
||||||
b.close();
|
b.close();
|
||||||
|
|
||||||
OperationResult result = _transaction->update(shard, b.slice(), options);
|
OperationResult result = writeTrx.update(shard, b.slice(), options);
|
||||||
if (result.code != TRI_ERROR_NO_ERROR) {
|
if (result.code != TRI_ERROR_NO_ERROR) {
|
||||||
THROW_ARANGO_EXCEPTION(result.code);
|
THROW_ARANGO_EXCEPTION(result.code);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO loop over edges
|
// TODO loop over edges
|
||||||
}
|
}
|
||||||
_cleanupTransactions();
|
res = writeTrx.finish(res);
|
||||||
|
if (res != TRI_ERROR_NO_ERROR) {
|
||||||
|
THROW_ARANGO_EXCEPTION(res);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template class arangodb::pregel::GraphStore<int64_t, int64_t>;
|
template class arangodb::pregel::GraphStore<int64_t, int64_t>;
|
||||||
|
|
|
@ -37,7 +37,7 @@ namespace arangodb {
|
||||||
class Transaction;
|
class Transaction;
|
||||||
class LogicalCollection;
|
class LogicalCollection;
|
||||||
namespace pregel {
|
namespace pregel {
|
||||||
|
|
||||||
class WorkerState;
|
class WorkerState;
|
||||||
template <typename V, typename E>
|
template <typename V, typename E>
|
||||||
struct GraphFormat;
|
struct GraphFormat;
|
||||||
|
@ -47,11 +47,10 @@ struct GraphFormat;
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
template <typename V, typename E>
|
template <typename V, typename E>
|
||||||
class GraphStore {
|
class GraphStore {
|
||||||
|
|
||||||
VocbaseGuard _vocbaseGuard;
|
VocbaseGuard _vocbaseGuard;
|
||||||
const std::unique_ptr<GraphFormat<V, E>> _graphFormat;
|
const std::unique_ptr<GraphFormat<V, E>> _graphFormat;
|
||||||
Transaction *_transaction;// temporary transaction
|
Transaction* _readTrx; // temporary transaction
|
||||||
|
|
||||||
// int _indexFd, _vertexFd, _edgeFd;
|
// int _indexFd, _vertexFd, _edgeFd;
|
||||||
// void *_indexMapping, *_vertexMapping, *_edgeMapping;
|
// void *_indexMapping, *_vertexMapping, *_edgeMapping;
|
||||||
// size_t _indexSize, _vertexSize, _edgeSize;
|
// size_t _indexSize, _vertexSize, _edgeSize;
|
||||||
|
@ -65,25 +64,23 @@ class GraphStore {
|
||||||
std::set<ShardID> _loadedShards;
|
std::set<ShardID> _loadedShards;
|
||||||
size_t _localVerticeCount;
|
size_t _localVerticeCount;
|
||||||
size_t _localEdgeCount;
|
size_t _localEdgeCount;
|
||||||
|
|
||||||
|
void _createReadTransaction(WorkerState const& state);
|
||||||
void _cleanupTransactions();
|
void _cleanupTransactions();
|
||||||
void _loadVertices(WorkerState const& state,
|
void _loadVertices(WorkerState const& state, ShardID const& vertexShard,
|
||||||
ShardID const& vertexShard,
|
|
||||||
ShardID const& edgeShard);
|
ShardID const& edgeShard);
|
||||||
void _loadEdges(WorkerState const& state,
|
void _loadEdges(WorkerState const& state, ShardID const& shard,
|
||||||
ShardID const& shard,
|
VertexEntry& vertexEntry, std::string const& documentID);
|
||||||
VertexEntry& vertexEntry,
|
|
||||||
std::string const& documentID);
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GraphStore(TRI_vocbase_t* vocbase, WorkerState const& state,
|
GraphStore(TRI_vocbase_t* vocbase, GraphFormat<V, E>* graphFormat);
|
||||||
GraphFormat<V, E>* graphFormat);
|
|
||||||
~GraphStore();
|
~GraphStore();
|
||||||
|
|
||||||
void loadShards(WorkerState const& state);
|
void loadShards(WorkerState const& state);
|
||||||
inline size_t vertexCount() {
|
void loadDocument(WorkerState const& state, ShardID const& shard,
|
||||||
return _index.size();
|
std::string const& _key);
|
||||||
}
|
|
||||||
|
inline size_t vertexCount() { return _index.size(); }
|
||||||
RangeIterator<VertexEntry> vertexIterator();
|
RangeIterator<VertexEntry> vertexIterator();
|
||||||
RangeIterator<VertexEntry> vertexIterator(size_t start, size_t count);
|
RangeIterator<VertexEntry> vertexIterator(size_t start, size_t count);
|
||||||
RangeIterator<Edge<E>> edgeIterator(VertexEntry const* entry);
|
RangeIterator<Edge<E>> edgeIterator(VertexEntry const* entry);
|
||||||
|
@ -91,7 +88,7 @@ class GraphStore {
|
||||||
void* mutableVertexData(VertexEntry const* entry);
|
void* mutableVertexData(VertexEntry const* entry);
|
||||||
V copyVertexData(VertexEntry const* entry);
|
V copyVertexData(VertexEntry const* entry);
|
||||||
void replaceVertexData(VertexEntry const* entry, void* data, size_t size);
|
void replaceVertexData(VertexEntry const* entry, void* data, size_t size);
|
||||||
|
|
||||||
/// Write results to database
|
/// Write results to database
|
||||||
void storeResults(WorkerState const& state);
|
void storeResults(WorkerState const& state);
|
||||||
};
|
};
|
||||||
|
|
|
@ -30,6 +30,9 @@
|
||||||
#include <velocypack/Iterator.h>
|
#include <velocypack/Iterator.h>
|
||||||
#include <velocypack/velocypack-aliases.h>
|
#include <velocypack/velocypack-aliases.h>
|
||||||
|
|
||||||
|
//#include <libcuckoo/city_hasher.hh>
|
||||||
|
//#include <libcuckoo/cuckoohash_map.hh>
|
||||||
|
|
||||||
using namespace arangodb;
|
using namespace arangodb;
|
||||||
using namespace arangodb::pregel;
|
using namespace arangodb::pregel;
|
||||||
|
|
||||||
|
@ -87,7 +90,7 @@ void ArrayInCache<M>::mergeCache(InCache<M> const* otherCache) {
|
||||||
|
|
||||||
// cannot call setDirect since it locks
|
// cannot call setDirect since it locks
|
||||||
for (auto const& pair : other->_shardMap) {
|
for (auto const& pair : other->_shardMap) {
|
||||||
HMap &vertexMap = _shardMap[pair.first];
|
HMap& vertexMap = _shardMap[pair.first];
|
||||||
for (auto& vertexMessage : pair.second) {
|
for (auto& vertexMessage : pair.second) {
|
||||||
std::vector<M>& a = vertexMap[vertexMessage.first];
|
std::vector<M>& a = vertexMap[vertexMessage.first];
|
||||||
std::vector<M> const& b = vertexMessage.second;
|
std::vector<M> const& b = vertexMessage.second;
|
||||||
|
@ -119,7 +122,6 @@ void ArrayInCache<M>::clear() {
|
||||||
_shardMap.clear();
|
_shardMap.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename M>
|
template <typename M>
|
||||||
void ArrayInCache<M>::erase(prgl_shard_t shard, std::string const& key) {
|
void ArrayInCache<M>::erase(prgl_shard_t shard, std::string const& key) {
|
||||||
MUTEX_LOCKER(guard, this->_writeLock);
|
MUTEX_LOCKER(guard, this->_writeLock);
|
||||||
|
@ -134,6 +136,19 @@ void CombiningInCache<M>::setDirect(prgl_shard_t shard, std::string const& key,
|
||||||
M const& newValue) {
|
M const& newValue) {
|
||||||
MUTEX_LOCKER(guard, this->_writeLock);
|
MUTEX_LOCKER(guard, this->_writeLock);
|
||||||
|
|
||||||
|
/*cuckoohash_map<int, std::string, CityHasher<int>> Table;
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
Table[i] = "hello"+std::to_string(i);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 101; i++) {
|
||||||
|
std::string out;
|
||||||
|
if (Table.find(i, out)) {
|
||||||
|
LOG(INFO) << i << " " << out;
|
||||||
|
} else {
|
||||||
|
LOG(INFO) << i << " NOT FOUND";
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
|
||||||
this->_receivedMessageCount++;
|
this->_receivedMessageCount++;
|
||||||
HMap& vertexMap = _shardMap[shard];
|
HMap& vertexMap = _shardMap[shard];
|
||||||
auto vmsg = vertexMap.find(key);
|
auto vmsg = vertexMap.find(key);
|
||||||
|
|
|
@ -44,7 +44,6 @@ processing */
|
||||||
template <typename M>
|
template <typename M>
|
||||||
class InCache {
|
class InCache {
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
mutable Mutex _writeLock;
|
mutable Mutex _writeLock;
|
||||||
size_t _receivedMessageCount = 0;
|
size_t _receivedMessageCount = 0;
|
||||||
MessageFormat<M> const* _format;
|
MessageFormat<M> const* _format;
|
||||||
|
@ -53,12 +52,12 @@ class InCache {
|
||||||
: _receivedMessageCount(0), _format(format) {}
|
: _receivedMessageCount(0), _format(format) {}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
virtual ~InCache() {};
|
virtual ~InCache(){};
|
||||||
|
|
||||||
MessageFormat<M> const* format() const {return _format;}
|
MessageFormat<M> const* format() const { return _format; }
|
||||||
void parseMessages(VPackSlice messages);
|
void parseMessages(VPackSlice messages);
|
||||||
size_t receivedMessageCount() const { return _receivedMessageCount; }
|
size_t receivedMessageCount() const { return _receivedMessageCount; }
|
||||||
|
|
||||||
/// @brief internal method to direclty set the messages for a vertex. Only
|
/// @brief internal method to direclty set the messages for a vertex. Only
|
||||||
/// valid with already combined messages
|
/// valid with already combined messages
|
||||||
virtual void setDirect(prgl_shard_t shard, std::string const& vertexId,
|
virtual void setDirect(prgl_shard_t shard, std::string const& vertexId,
|
||||||
|
@ -66,7 +65,8 @@ class InCache {
|
||||||
virtual void mergeCache(InCache<M> const* otherCache) = 0;
|
virtual void mergeCache(InCache<M> const* otherCache) = 0;
|
||||||
/// @brief get messages for vertex id. (Don't use keys from _from or _to
|
/// @brief get messages for vertex id. (Don't use keys from _from or _to
|
||||||
/// directly, they contain the collection name)
|
/// directly, they contain the collection name)
|
||||||
virtual MessageIterator<M> getMessages(prgl_shard_t shard, std::string const& key) = 0;
|
virtual MessageIterator<M> getMessages(prgl_shard_t shard,
|
||||||
|
std::string const& key) = 0;
|
||||||
/// clear cache
|
/// clear cache
|
||||||
virtual void clear() = 0;
|
virtual void clear() = 0;
|
||||||
virtual void erase(prgl_shard_t shard, std::string const& key) = 0;
|
virtual void erase(prgl_shard_t shard, std::string const& key) = 0;
|
||||||
|
@ -83,7 +83,8 @@ class ArrayInCache : public InCache<M> {
|
||||||
void setDirect(prgl_shard_t shard, std::string const& vertexId,
|
void setDirect(prgl_shard_t shard, std::string const& vertexId,
|
||||||
M const& data) override;
|
M const& data) override;
|
||||||
void mergeCache(InCache<M> const* otherCache) override;
|
void mergeCache(InCache<M> const* otherCache) override;
|
||||||
MessageIterator<M> getMessages(prgl_shard_t shard, std::string const& key) override;
|
MessageIterator<M> getMessages(prgl_shard_t shard,
|
||||||
|
std::string const& key) override;
|
||||||
void clear() override;
|
void clear() override;
|
||||||
void erase(prgl_shard_t shard, std::string const& key) override;
|
void erase(prgl_shard_t shard, std::string const& key) override;
|
||||||
};
|
};
|
||||||
|
@ -91,21 +92,22 @@ class ArrayInCache : public InCache<M> {
|
||||||
template <typename M>
|
template <typename M>
|
||||||
class CombiningInCache : public InCache<M> {
|
class CombiningInCache : public InCache<M> {
|
||||||
typedef std::unordered_map<std::string, M> HMap;
|
typedef std::unordered_map<std::string, M> HMap;
|
||||||
|
|
||||||
MessageCombiner<M> const* _combiner;
|
MessageCombiner<M> const* _combiner;
|
||||||
std::map<prgl_shard_t, HMap> _shardMap;
|
std::map<prgl_shard_t, HMap> _shardMap;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
CombiningInCache(MessageFormat<M> const* format,
|
CombiningInCache(MessageFormat<M> const* format,
|
||||||
MessageCombiner<M> const* combiner)
|
MessageCombiner<M> const* combiner)
|
||||||
: InCache<M>(format), _combiner(combiner) {}
|
: InCache<M>(format), _combiner(combiner) {}
|
||||||
|
|
||||||
MessageCombiner<M> const* combiner() const {return _combiner;}
|
MessageCombiner<M> const* combiner() const { return _combiner; }
|
||||||
|
|
||||||
void setDirect(prgl_shard_t shard, std::string const& vertexId,
|
void setDirect(prgl_shard_t shard, std::string const& vertexId,
|
||||||
M const& data) override;
|
M const& data) override;
|
||||||
void mergeCache(InCache<M> const* otherCache) override;
|
void mergeCache(InCache<M> const* otherCache) override;
|
||||||
MessageIterator<M> getMessages(prgl_shard_t shard, std::string const& key) override;
|
MessageIterator<M> getMessages(prgl_shard_t shard,
|
||||||
|
std::string const& key) override;
|
||||||
void clear() override;
|
void clear() override;
|
||||||
void erase(prgl_shard_t shard, std::string const& key) override;
|
void erase(prgl_shard_t shard, std::string const& key) override;
|
||||||
};
|
};
|
||||||
|
|
|
@ -31,7 +31,7 @@ class MessageIterator {
|
||||||
M const* _data;
|
M const* _data;
|
||||||
size_t _current = 0;
|
size_t _current = 0;
|
||||||
const size_t _size = 1;
|
const size_t _size = 1;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
MessageIterator() : _data(nullptr), _current(0), _size(0) {}
|
MessageIterator() : _data(nullptr), _current(0), _size(0) {}
|
||||||
|
|
||||||
|
@ -74,22 +74,21 @@ class MessageIterator {
|
||||||
|
|
||||||
size_t size() const { return _size; }
|
size_t size() const { return _size; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class RangeIterator {
|
class RangeIterator {
|
||||||
private:
|
private:
|
||||||
// void *_begin, *_end, *_current;
|
// void *_begin, *_end, *_current;
|
||||||
std::vector<T>& _vector;
|
std::vector<T>& _vector;
|
||||||
size_t _begin, _end, _current;
|
size_t _begin, _end, _current;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef RangeIterator<T> iterator;
|
typedef RangeIterator<T> iterator;
|
||||||
typedef const RangeIterator<T> const_iterator;
|
typedef const RangeIterator<T> const_iterator;
|
||||||
|
|
||||||
RangeIterator(std::vector<T>& v, size_t begin, size_t end)
|
RangeIterator(std::vector<T>& v, size_t begin, size_t end)
|
||||||
: _vector(v), _begin(begin), _end(end), _current(begin) {}
|
: _vector(v), _begin(begin), _end(end), _current(begin) {}
|
||||||
|
|
||||||
iterator begin() { return RangeIterator(_vector, _begin, _end); }
|
iterator begin() { return RangeIterator(_vector, _begin, _end); }
|
||||||
const_iterator begin() const { return RangeIterator(_vector, _begin, _end); }
|
const_iterator begin() const { return RangeIterator(_vector, _begin, _end); }
|
||||||
iterator end() {
|
iterator end() {
|
||||||
|
@ -102,31 +101,31 @@ public:
|
||||||
it._current = it._end;
|
it._current = it._end;
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
|
||||||
// prefix ++
|
// prefix ++
|
||||||
RangeIterator& operator++() {
|
RangeIterator& operator++() {
|
||||||
_current++;
|
_current++;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
// postfix ++
|
// postfix ++
|
||||||
RangeIterator<T>& operator++(int) {
|
RangeIterator<T>& operator++(int) {
|
||||||
RangeIterator<T> result(*this);
|
RangeIterator<T> result(*this);
|
||||||
++(*this);
|
++(*this);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
T* operator*() const {
|
T* operator*() const {
|
||||||
T* el = _vector.data();
|
T* el = _vector.data();
|
||||||
return _current != _end ? el + _current : nullptr;
|
return _current != _end ? el + _current : nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator!=(RangeIterator<T> const& other) const {
|
bool operator!=(RangeIterator<T> const& other) const {
|
||||||
return _current != other._current;
|
return _current != other._current;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t size() const { return _end - _begin; }
|
size_t size() const { return _end - _begin; }
|
||||||
|
|
||||||
/*EdgeIterator(void* beginPtr, void* endPtr)
|
/*EdgeIterator(void* beginPtr, void* endPtr)
|
||||||
: _begin(beginPtr), _end(endPtr), _current(_begin) {}
|
: _begin(beginPtr), _end(endPtr), _current(_begin) {}
|
||||||
iterator begin() { return EdgeIterator(_begin, _end); }
|
iterator begin() { return EdgeIterator(_begin, _end); }
|
||||||
|
@ -141,19 +140,18 @@ public:
|
||||||
it._current = it._end;
|
it._current = it._end;
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
|
||||||
// prefix ++
|
// prefix ++
|
||||||
EdgeIterator<E>& operator++() {
|
EdgeIterator<E>& operator++() {
|
||||||
EdgeEntry<E>* entry = static_cast<EdgeEntry<E>>(_current);
|
EdgeEntry<E>* entry = static_cast<EdgeEntry<E>>(_current);
|
||||||
_current += entry->getSize();
|
_current += entry->getSize();
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
EdgeEntry<E>* operator*() const {
|
EdgeEntry<E>* operator*() const {
|
||||||
return _current != _end ? static_cast<EdgeEntry<E>>(_current) : nullptr;
|
return _current != _end ? static_cast<EdgeEntry<E>>(_current) : nullptr;
|
||||||
}*/
|
}*/
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -25,9 +25,9 @@
|
||||||
|
|
||||||
#include <velocypack/Slice.h>
|
#include <velocypack/Slice.h>
|
||||||
#include <velocypack/velocypack-aliases.h>
|
#include <velocypack/velocypack-aliases.h>
|
||||||
#include "AggregatorUsage.h"
|
|
||||||
#include "Basics/Common.h"
|
#include "Basics/Common.h"
|
||||||
#include "Utils.h"
|
#include "Pregel/AggregatorHandler.h"
|
||||||
|
#include "Pregel/Utils.h"
|
||||||
|
|
||||||
namespace arangodb {
|
namespace arangodb {
|
||||||
namespace pregel {
|
namespace pregel {
|
||||||
|
@ -36,7 +36,7 @@ class MasterContext {
|
||||||
friend class Conductor;
|
friend class Conductor;
|
||||||
|
|
||||||
uint64_t _vertexCount, _edgeCount;
|
uint64_t _vertexCount, _edgeCount;
|
||||||
AggregatorUsage* _aggregators;
|
AggregatorHandler* _aggregators;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
@ -50,21 +50,21 @@ class MasterContext {
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void preApplication(){};
|
virtual void preApplication(){};
|
||||||
|
|
||||||
/// @brief called before supersteps
|
/// @brief called before supersteps
|
||||||
/// @return true to continue the computation
|
/// @return true to continue the computation
|
||||||
virtual bool preGlobalSuperstep(uint64_t gss) {return true;};
|
virtual bool preGlobalSuperstep(uint64_t gss) { return true; };
|
||||||
/// @brief called after supersteps
|
/// @brief called after supersteps
|
||||||
/// @return true to continue the computation
|
/// @return true to continue the computation
|
||||||
virtual bool postGlobalSuperstep(uint64_t gss) {return true;};
|
virtual bool postGlobalSuperstep(uint64_t gss) { return true; };
|
||||||
virtual void postApplication(){};
|
virtual void postApplication(){};
|
||||||
|
|
||||||
/// should indicate if compensation is supposed to start by returning true
|
/// should indicate if compensation is supposed to start by returning true
|
||||||
virtual bool preCompensation(uint64_t gss) {return true;}
|
virtual bool preCompensation(uint64_t gss) { return true; }
|
||||||
/// should indicate if compensation is finished, by returning false.
|
/// should indicate if compensation is finished, by returning false.
|
||||||
/// otherwise workers will be called again with the aggregated values
|
/// otherwise workers will be called again with the aggregated values
|
||||||
virtual bool postCompensation(uint64_t gss) {return false;}
|
virtual bool postCompensation(uint64_t gss) { return false; }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
MasterContext(VPackSlice params){};
|
MasterContext(VPackSlice params){};
|
||||||
|
|
||||||
|
|
|
@ -36,8 +36,7 @@ struct MessageCombiner {
|
||||||
|
|
||||||
struct IntegerMinCombiner : public MessageCombiner<int64_t> {
|
struct IntegerMinCombiner : public MessageCombiner<int64_t> {
|
||||||
IntegerMinCombiner() {}
|
IntegerMinCombiner() {}
|
||||||
void combine(int64_t& firstValue,
|
void combine(int64_t& firstValue, int64_t const& secondValue) const override {
|
||||||
int64_t const& secondValue) const override {
|
|
||||||
if (firstValue > secondValue) {
|
if (firstValue > secondValue) {
|
||||||
firstValue = secondValue;
|
firstValue = secondValue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,7 @@ struct MessageFormat {
|
||||||
virtual void unwrapValue(VPackSlice body, M& value) const = 0;
|
virtual void unwrapValue(VPackSlice body, M& value) const = 0;
|
||||||
virtual void addValue(VPackBuilder& arrayBuilder, M const& val) const = 0;
|
virtual void addValue(VPackBuilder& arrayBuilder, M const& val) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct IntegerMessageFormat : public MessageFormat<int64_t> {
|
struct IntegerMessageFormat : public MessageFormat<int64_t> {
|
||||||
IntegerMessageFormat() {}
|
IntegerMessageFormat() {}
|
||||||
void unwrapValue(VPackSlice s, int64_t& value) const override {
|
void unwrapValue(VPackSlice s, int64_t& value) const override {
|
||||||
|
@ -58,7 +58,7 @@ struct FloatMessageFormat : public MessageFormat<float> {
|
||||||
arrayBuilder.add(VPackValue(val));
|
arrayBuilder.add(VPackValue(val));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
template <typename M>
|
template <typename M>
|
||||||
struct NumberMessageFormat : public MessageFormat<M> {
|
struct NumberMessageFormat : public MessageFormat<M> {
|
||||||
|
|
|
@ -60,7 +60,7 @@ void ArrayOutCache<M>::appendMessage(prgl_shard_t shard, std::string const& key,
|
||||||
M const& data) {
|
M const& data) {
|
||||||
if (this->_state->isLocalVertexShard(shard)) {
|
if (this->_state->isLocalVertexShard(shard)) {
|
||||||
this->_localCache->setDirect(shard, key, data);
|
this->_localCache->setDirect(shard, key, data);
|
||||||
//LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data;
|
// LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data;
|
||||||
this->_sendMessages++;
|
this->_sendMessages++;
|
||||||
} else {
|
} else {
|
||||||
_shardMap[shard][key].push_back(data);
|
_shardMap[shard][key].push_back(data);
|
||||||
|
@ -149,17 +149,17 @@ void CombiningOutCache<M>::appendMessage(prgl_shard_t shard,
|
||||||
M const& data) {
|
M const& data) {
|
||||||
if (this->_state->isLocalVertexShard(shard)) {
|
if (this->_state->isLocalVertexShard(shard)) {
|
||||||
this->_localCache->setDirect(shard, key, data);
|
this->_localCache->setDirect(shard, key, data);
|
||||||
//LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data;
|
// LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data;
|
||||||
this->_sendMessages++;
|
this->_sendMessages++;
|
||||||
} else {
|
} else {
|
||||||
std::unordered_map<std::string, M>& vertexMap = _shardMap[shard];
|
std::unordered_map<std::string, M>& vertexMap = _shardMap[shard];
|
||||||
auto it = vertexMap.find(key);
|
auto it = vertexMap.find(key);
|
||||||
if (it != vertexMap.end()) { // more than one message
|
if (it != vertexMap.end()) { // more than one message
|
||||||
_combiner->combine(vertexMap[key], data);
|
_combiner->combine(vertexMap[key], data);
|
||||||
} else { // first message for this vertex
|
} else { // first message for this vertex
|
||||||
vertexMap.emplace(key, data);
|
vertexMap.emplace(key, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->_containedMessages++ > this->_batchSize) {
|
if (this->_containedMessages++ > this->_batchSize) {
|
||||||
flushMessages();
|
flushMessages();
|
||||||
}
|
}
|
||||||
|
@ -181,7 +181,7 @@ void CombiningOutCache<M>::flushMessages() {
|
||||||
VPackOptions options = VPackOptions::Defaults;
|
VPackOptions options = VPackOptions::Defaults;
|
||||||
options.buildUnindexedArrays = true;
|
options.buildUnindexedArrays = true;
|
||||||
options.buildUnindexedObjects = true;
|
options.buildUnindexedObjects = true;
|
||||||
|
|
||||||
VPackBuilder package(&options);
|
VPackBuilder package(&options);
|
||||||
package.openObject();
|
package.openObject();
|
||||||
package.add(Utils::messagesKey, VPackValue(VPackValueType::Array));
|
package.add(Utils::messagesKey, VPackValue(VPackValueType::Array));
|
||||||
|
|
|
@ -27,10 +27,10 @@
|
||||||
#include "Cluster/ClusterInfo.h"
|
#include "Cluster/ClusterInfo.h"
|
||||||
#include "VocBase/voc-types.h"
|
#include "VocBase/voc-types.h"
|
||||||
|
|
||||||
|
#include "Pregel/GraphStore.h"
|
||||||
#include "Pregel/MessageCombiner.h"
|
#include "Pregel/MessageCombiner.h"
|
||||||
#include "Pregel/MessageFormat.h"
|
#include "Pregel/MessageFormat.h"
|
||||||
#include "Pregel/WorkerState.h"
|
#include "Pregel/WorkerState.h"
|
||||||
#include "Pregel/GraphStore.h"
|
|
||||||
|
|
||||||
namespace arangodb {
|
namespace arangodb {
|
||||||
namespace pregel {
|
namespace pregel {
|
||||||
|
@ -45,66 +45,70 @@ class CombiningInCache;
|
||||||
|
|
||||||
template <typename M>
|
template <typename M>
|
||||||
class ArrayInCache;
|
class ArrayInCache;
|
||||||
|
|
||||||
template <typename M>
|
template <typename M>
|
||||||
class OutCache {
|
class OutCache {
|
||||||
protected:
|
protected:
|
||||||
WorkerState const* _state;
|
WorkerState const* _state;
|
||||||
MessageFormat<M> const* _format;
|
MessageFormat<M> const* _format;
|
||||||
InCache<M>* _localCache;
|
InCache<M>* _localCache;
|
||||||
std::string _baseUrl;
|
std::string _baseUrl;
|
||||||
uint32_t _batchSize = 1000;
|
uint32_t _batchSize = 1000;
|
||||||
|
|
||||||
/// @brief current number of vertices stored
|
/// @brief current number of vertices stored
|
||||||
size_t _containedMessages = 0;
|
size_t _containedMessages = 0;
|
||||||
size_t _sendMessages = 0;
|
size_t _sendMessages = 0;
|
||||||
bool shouldFlushCache();
|
bool shouldFlushCache();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
OutCache(WorkerState* state, InCache<M>* cache);
|
OutCache(WorkerState* state, InCache<M>* cache);
|
||||||
virtual ~OutCache() {};
|
virtual ~OutCache(){};
|
||||||
|
|
||||||
size_t sendMessageCount() const { return _sendMessages; }
|
size_t sendMessageCount() const { return _sendMessages; }
|
||||||
uint32_t batchSize() const {return _batchSize;}
|
uint32_t batchSize() const { return _batchSize; }
|
||||||
void setBatchSize(uint32_t bs) {_batchSize = bs;}
|
void setBatchSize(uint32_t bs) { _batchSize = bs; }
|
||||||
|
|
||||||
virtual void clear() = 0;
|
virtual void clear() = 0;
|
||||||
virtual void appendMessage(prgl_shard_t shard, std::string const& key, M const& data) = 0;
|
virtual void appendMessage(prgl_shard_t shard, std::string const& key,
|
||||||
|
M const& data) = 0;
|
||||||
virtual void flushMessages() = 0;
|
virtual void flushMessages() = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename M>
|
template <typename M>
|
||||||
class ArrayOutCache : public OutCache<M> {
|
class ArrayOutCache : public OutCache<M> {
|
||||||
/// @brief two stage map: shard -> vertice -> message
|
/// @brief two stage map: shard -> vertice -> message
|
||||||
std::unordered_map<prgl_shard_t,
|
std::unordered_map<prgl_shard_t,
|
||||||
std::unordered_map<std::string, std::vector<M>>> _shardMap;
|
std::unordered_map<std::string, std::vector<M>>>
|
||||||
|
_shardMap;
|
||||||
public:
|
|
||||||
|
public:
|
||||||
ArrayOutCache(WorkerState* state, InCache<M>* cache)
|
ArrayOutCache(WorkerState* state, InCache<M>* cache)
|
||||||
: OutCache<M>(state, cache) {}
|
: OutCache<M>(state, cache) {}
|
||||||
~ArrayOutCache();
|
~ArrayOutCache();
|
||||||
|
|
||||||
void clear() override;
|
void clear() override;
|
||||||
void appendMessage(prgl_shard_t shard, std::string const& key, M const& data) override;
|
void appendMessage(prgl_shard_t shard, std::string const& key,
|
||||||
|
M const& data) override;
|
||||||
void flushMessages() override;
|
void flushMessages() override;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename M>
|
template <typename M>
|
||||||
class CombiningOutCache : public OutCache<M> {
|
class CombiningOutCache : public OutCache<M> {
|
||||||
MessageCombiner<M> const* _combiner;
|
MessageCombiner<M> const* _combiner;
|
||||||
|
|
||||||
/// @brief two stage map: shard -> vertice -> message
|
/// @brief two stage map: shard -> vertice -> message
|
||||||
std::unordered_map<prgl_shard_t, std::unordered_map<std::string, M>> _shardMap;
|
std::unordered_map<prgl_shard_t, std::unordered_map<std::string, M>>
|
||||||
|
_shardMap;
|
||||||
public:
|
|
||||||
|
public:
|
||||||
CombiningOutCache(WorkerState* state, CombiningInCache<M>* cache);
|
CombiningOutCache(WorkerState* state, CombiningInCache<M>* cache);
|
||||||
~CombiningOutCache();
|
~CombiningOutCache();
|
||||||
|
|
||||||
void clear() override;
|
void clear() override;
|
||||||
void appendMessage(prgl_shard_t shard, std::string const& key, M const& data) override;
|
void appendMessage(prgl_shard_t shard, std::string const& key,
|
||||||
|
M const& data) override;
|
||||||
void flushMessages() override;
|
void flushMessages() override;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -57,10 +57,10 @@ class PregelFeature final : public application_features::ApplicationFeature {
|
||||||
|
|
||||||
void cleanup(uint64_t executionNumber);
|
void cleanup(uint64_t executionNumber);
|
||||||
void cleanupAll();
|
void cleanupAll();
|
||||||
|
|
||||||
basics::ThreadPool* threadPool() {return _threadPool.get();}
|
basics::ThreadPool* threadPool() { return _threadPool.get(); }
|
||||||
RecoveryManager* recoveryManager() {return _recoveryManager.get();}
|
RecoveryManager* recoveryManager() { return _recoveryManager.get(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Mutex _mutex;
|
Mutex _mutex;
|
||||||
std::unique_ptr<RecoveryManager> _recoveryManager;
|
std::unique_ptr<RecoveryManager> _recoveryManager;
|
||||||
|
|
|
@ -88,27 +88,26 @@ void RecoveryManager::_monitorShard(CollectionID const& cid,
|
||||||
ShardID const& shard) {
|
ShardID const& shard) {
|
||||||
std::function<bool(VPackSlice const& result)> listener =
|
std::function<bool(VPackSlice const& result)> listener =
|
||||||
[this, shard](VPackSlice const& result) {
|
[this, shard](VPackSlice const& result) {
|
||||||
MUTEX_LOCKER(guard, _lock);// we are editing _primaryServers
|
MUTEX_LOCKER(guard, _lock); // we are editing _primaryServers
|
||||||
|
|
||||||
auto const& conductors = _listeners.find(shard);
|
auto const& conductors = _listeners.find(shard);
|
||||||
if (conductors == _listeners.end()) {
|
if (conductors == _listeners.end()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.isArray()) {
|
if (result.isArray()) {
|
||||||
|
|
||||||
if (result.length() > 0) {
|
if (result.length() > 0) {
|
||||||
ServerID nextPrimary = result.at(0).copyString();
|
ServerID nextPrimary = result.at(0).copyString();
|
||||||
auto const& currentPrimary = _primaryServers.find(shard);
|
auto const& currentPrimary = _primaryServers.find(shard);
|
||||||
if (currentPrimary != _primaryServers.end()
|
if (currentPrimary != _primaryServers.end() &&
|
||||||
&& currentPrimary->second != nextPrimary) {
|
currentPrimary->second != nextPrimary) {
|
||||||
_primaryServers[shard] = nextPrimary;
|
_primaryServers[shard] = nextPrimary;
|
||||||
for (Conductor *cc : conductors->second) {
|
for (Conductor* cc : conductors->second) {
|
||||||
cc->startRecovery();
|
cc->startRecovery();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (Conductor *cc : conductors->second) {
|
for (Conductor* cc : conductors->second) {
|
||||||
cc->cancel();
|
cc->cancel();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,56 +23,57 @@
|
||||||
#ifndef ARANGODB_PREGEL_RECOVERY_H
|
#ifndef ARANGODB_PREGEL_RECOVERY_H
|
||||||
#define ARANGODB_PREGEL_RECOVERY_H 1
|
#define ARANGODB_PREGEL_RECOVERY_H 1
|
||||||
|
|
||||||
#include "Basics/Mutex.h"
|
|
||||||
#include "Cluster/ClusterInfo.h"
|
|
||||||
#include "Agency/AgencyComm.h"
|
|
||||||
#include "Agency/AgencyCallbackRegistry.h"
|
|
||||||
#include <velocypack/velocypack-aliases.h>
|
#include <velocypack/velocypack-aliases.h>
|
||||||
#include <velocypack/vpack.h>
|
#include <velocypack/vpack.h>
|
||||||
|
#include "Agency/AgencyCallbackRegistry.h"
|
||||||
|
#include "Agency/AgencyComm.h"
|
||||||
|
#include "Basics/Mutex.h"
|
||||||
|
#include "Cluster/ClusterInfo.h"
|
||||||
|
|
||||||
namespace arangodb {
|
namespace arangodb {
|
||||||
namespace pregel {
|
namespace pregel {
|
||||||
|
|
||||||
template<typename V, typename E>
|
template <typename V, typename E>
|
||||||
class GraphStore;
|
class GraphStore;
|
||||||
class Conductor;
|
class Conductor;
|
||||||
|
|
||||||
class RecoveryManager {
|
class RecoveryManager {
|
||||||
|
|
||||||
Mutex _lock;
|
Mutex _lock;
|
||||||
AgencyComm _agency;
|
AgencyComm _agency;
|
||||||
AgencyCallbackRegistry *_agencyCallbackRegistry;//weak
|
AgencyCallbackRegistry* _agencyCallbackRegistry; // weak
|
||||||
|
|
||||||
std::map<ShardID, std::set<Conductor*>> _listeners;
|
std::map<ShardID, std::set<Conductor*>> _listeners;
|
||||||
std::map<ShardID, ServerID> _primaryServers;
|
std::map<ShardID, ServerID> _primaryServers;
|
||||||
std::map<ShardID, std::shared_ptr<AgencyCallback>> _agencyCallbacks;
|
std::map<ShardID, std::shared_ptr<AgencyCallback>> _agencyCallbacks;
|
||||||
|
|
||||||
void _monitorShard(CollectionID const& cid, ShardID const& shard);
|
void _monitorShard(CollectionID const& cid, ShardID const& shard);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
RecoveryManager(AgencyCallbackRegistry *registry);
|
RecoveryManager(AgencyCallbackRegistry* registry);
|
||||||
~RecoveryManager();
|
~RecoveryManager();
|
||||||
|
|
||||||
void monitorCollections(std::vector<std::shared_ptr<LogicalCollection>> const& collections, Conductor*);
|
void monitorCollections(
|
||||||
|
std::vector<std::shared_ptr<LogicalCollection>> const& collections,
|
||||||
|
Conductor*);
|
||||||
void stopMonitoring(Conductor*);
|
void stopMonitoring(Conductor*);
|
||||||
int filterGoodServers(std::vector<ServerID> const& servers, std::vector<ServerID> &goodServers);
|
int filterGoodServers(std::vector<ServerID> const& servers,
|
||||||
//bool allServersAvailable(std::vector<ServerID> const& dbServers);
|
std::vector<ServerID>& goodServers);
|
||||||
|
// bool allServersAvailable(std::vector<ServerID> const& dbServers);
|
||||||
};
|
};
|
||||||
|
|
||||||
class RecoveryWorker {
|
class RecoveryWorker {
|
||||||
friend class RestPregelHandler;
|
friend class RestPregelHandler;
|
||||||
|
|
||||||
std::map<ShardID, ServerID> _secondaries;
|
std::map<ShardID, ServerID> _secondaries;
|
||||||
ServerID const* secondaryForShard(ShardID const& shard) {return nullptr;}
|
ServerID const* secondaryForShard(ShardID const& shard) { return nullptr; }
|
||||||
|
|
||||||
//receivedBackupData(VPackSlice slice);
|
// receivedBackupData(VPackSlice slice);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
template<typename V, typename E>
|
template <typename V, typename E>
|
||||||
void replicateGraphData(GraphStore<V,E> *graphStore) {}
|
void replicateGraphData(GraphStore<V, E>* graphStore) {}
|
||||||
|
|
||||||
void reloadPlanData() {_secondaries.clear();}
|
void reloadPlanData() { _secondaries.clear(); }
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,23 +23,23 @@
|
||||||
#ifndef ARANGODB_PREGEL_STATISTICS_H
|
#ifndef ARANGODB_PREGEL_STATISTICS_H
|
||||||
#define ARANGODB_PREGEL_STATISTICS_H 1
|
#define ARANGODB_PREGEL_STATISTICS_H 1
|
||||||
|
|
||||||
#include <velocypack/Slice.h>
|
|
||||||
#include <velocypack/Builder.h>
|
#include <velocypack/Builder.h>
|
||||||
|
#include <velocypack/Slice.h>
|
||||||
#include <velocypack/velocypack-aliases.h>
|
#include <velocypack/velocypack-aliases.h>
|
||||||
#include "Pregel/Utils.h"
|
#include "Pregel/Utils.h"
|
||||||
|
|
||||||
namespace arangodb {
|
namespace arangodb {
|
||||||
namespace pregel {
|
namespace pregel {
|
||||||
|
|
||||||
|
|
||||||
struct WorkerStats {
|
struct WorkerStats {
|
||||||
size_t activeCount = 0;
|
size_t activeCount = 0;
|
||||||
size_t sendCount = 0;
|
size_t sendCount = 0;
|
||||||
size_t receivedCount = 0;
|
size_t receivedCount = 0;
|
||||||
double superstepRuntimeSecs= 0;
|
double superstepRuntimeSecs = 0;
|
||||||
|
|
||||||
WorkerStats() {}
|
WorkerStats() {}
|
||||||
WorkerStats(size_t a, size_t s, size_t r) : activeCount(a), sendCount(s), receivedCount(r) {}
|
WorkerStats(size_t a, size_t s, size_t r)
|
||||||
|
: activeCount(a), sendCount(s), receivedCount(r) {}
|
||||||
|
|
||||||
void accumulate(WorkerStats const& other) {
|
void accumulate(WorkerStats const& other) {
|
||||||
activeCount += other.activeCount;
|
activeCount += other.activeCount;
|
||||||
|
@ -47,7 +47,7 @@ struct WorkerStats {
|
||||||
receivedCount += other.receivedCount;
|
receivedCount += other.receivedCount;
|
||||||
superstepRuntimeSecs += other.superstepRuntimeSecs;
|
superstepRuntimeSecs += other.superstepRuntimeSecs;
|
||||||
}
|
}
|
||||||
|
|
||||||
void accumulate(VPackSlice statValues) {
|
void accumulate(VPackSlice statValues) {
|
||||||
VPackSlice p = statValues.get(Utils::activeCountKey);
|
VPackSlice p = statValues.get(Utils::activeCountKey);
|
||||||
if (p.isInteger()) {
|
if (p.isInteger()) {
|
||||||
|
@ -73,13 +73,17 @@ struct WorkerStats {
|
||||||
b.add(Utils::receivedCountKey, VPackValue(receivedCount));
|
b.add(Utils::receivedCountKey, VPackValue(receivedCount));
|
||||||
b.add(Utils::superstepRuntimeKey, VPackValue(superstepRuntimeSecs));
|
b.add(Utils::superstepRuntimeKey, VPackValue(superstepRuntimeSecs));
|
||||||
}
|
}
|
||||||
|
|
||||||
void reset() {
|
void reset() {
|
||||||
activeCount = 0;
|
activeCount = 0;
|
||||||
sendCount = 0;
|
sendCount = 0;
|
||||||
receivedCount = 0;
|
receivedCount = 0;
|
||||||
superstepRuntimeSecs = 0;
|
superstepRuntimeSecs = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isDone() {
|
||||||
|
return activeCount == 0 && sendCount == receivedCount;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,7 @@ std::string const Utils::startExecutionPath = "startExecution";
|
||||||
std::string const Utils::finishedStartupPath = "finishedStartup";
|
std::string const Utils::finishedStartupPath = "finishedStartup";
|
||||||
std::string const Utils::prepareGSSPath = "prepareGSS";
|
std::string const Utils::prepareGSSPath = "prepareGSS";
|
||||||
std::string const Utils::startGSSPath = "startGSS";
|
std::string const Utils::startGSSPath = "startGSS";
|
||||||
std::string const Utils::finishedGSSPath = "finishedGSS";
|
std::string const Utils::finishedWorkerStepPath = "finishedStep";
|
||||||
std::string const Utils::cancelGSSPath = "cancelGSS";
|
std::string const Utils::cancelGSSPath = "cancelGSS";
|
||||||
std::string const Utils::messagesPath = "messages";
|
std::string const Utils::messagesPath = "messages";
|
||||||
std::string const Utils::finalizeExecutionPath = "finalizeExecution";
|
std::string const Utils::finalizeExecutionPath = "finalizeExecution";
|
||||||
|
@ -56,6 +56,7 @@ std::string const Utils::globalShardListKey = "globalShardList";
|
||||||
std::string const Utils::totalVertexCount = "vertexCount";
|
std::string const Utils::totalVertexCount = "vertexCount";
|
||||||
std::string const Utils::totalEdgeCount = "edgeCount";
|
std::string const Utils::totalEdgeCount = "edgeCount";
|
||||||
std::string const Utils::asyncMode = "async";
|
std::string const Utils::asyncMode = "async";
|
||||||
|
std::string const Utils::gssDone = "gssDone";
|
||||||
|
|
||||||
std::string const Utils::coordinatorIdKey = "coordinatorId";
|
std::string const Utils::coordinatorIdKey = "coordinatorId";
|
||||||
std::string const Utils::algorithmKey = "algorithm";
|
std::string const Utils::algorithmKey = "algorithm";
|
||||||
|
@ -73,7 +74,6 @@ std::string const Utils::receivedCountKey = "receivedCount";
|
||||||
std::string const Utils::sendCountKey = "sendCount";
|
std::string const Utils::sendCountKey = "sendCount";
|
||||||
std::string const Utils::superstepRuntimeKey = "superstepRuntime";
|
std::string const Utils::superstepRuntimeKey = "superstepRuntime";
|
||||||
|
|
||||||
|
|
||||||
std::string const Utils::userParametersKey = "userparams";
|
std::string const Utils::userParametersKey = "userparams";
|
||||||
|
|
||||||
std::string Utils::baseUrl(std::string dbName) {
|
std::string Utils::baseUrl(std::string dbName) {
|
||||||
|
|
|
@ -45,7 +45,7 @@ class Utils {
|
||||||
static std::string const finishedStartupPath;
|
static std::string const finishedStartupPath;
|
||||||
static std::string const prepareGSSPath;
|
static std::string const prepareGSSPath;
|
||||||
static std::string const startGSSPath;
|
static std::string const startGSSPath;
|
||||||
static std::string const finishedGSSPath;
|
static std::string const finishedWorkerStepPath;
|
||||||
static std::string const cancelGSSPath;
|
static std::string const cancelGSSPath;
|
||||||
static std::string const messagesPath;
|
static std::string const messagesPath;
|
||||||
static std::string const finalizeExecutionPath;
|
static std::string const finalizeExecutionPath;
|
||||||
|
@ -63,6 +63,7 @@ class Utils {
|
||||||
static std::string const totalVertexCount;
|
static std::string const totalVertexCount;
|
||||||
static std::string const totalEdgeCount;
|
static std::string const totalEdgeCount;
|
||||||
static std::string const asyncMode;
|
static std::string const asyncMode;
|
||||||
|
static std::string const gssDone;
|
||||||
|
|
||||||
static std::string const globalSuperstepKey;
|
static std::string const globalSuperstepKey;
|
||||||
static std::string const messagesKey;
|
static std::string const messagesKey;
|
||||||
|
@ -70,14 +71,13 @@ class Utils {
|
||||||
static std::string const recoveryMethodKey;
|
static std::string const recoveryMethodKey;
|
||||||
static std::string const compensate;
|
static std::string const compensate;
|
||||||
static std::string const rollback;
|
static std::string const rollback;
|
||||||
|
|
||||||
static std::string const storeResultsKey;
|
static std::string const storeResultsKey;
|
||||||
static std::string const aggregatorValuesKey;
|
static std::string const aggregatorValuesKey;
|
||||||
static std::string const activeCountKey;
|
static std::string const activeCountKey;
|
||||||
static std::string const receivedCountKey;
|
static std::string const receivedCountKey;
|
||||||
static std::string const sendCountKey;
|
static std::string const sendCountKey;
|
||||||
static std::string const superstepRuntimeKey;
|
static std::string const superstepRuntimeKey;
|
||||||
|
|
||||||
|
|
||||||
// User parameters
|
// User parameters
|
||||||
static std::string const userParametersKey;
|
static std::string const userParametersKey;
|
||||||
|
@ -88,11 +88,9 @@ class Utils {
|
||||||
static int64_t countDocuments(TRI_vocbase_t* vocbase,
|
static int64_t countDocuments(TRI_vocbase_t* vocbase,
|
||||||
std::string const& collection);
|
std::string const& collection);
|
||||||
static std::shared_ptr<LogicalCollection> resolveCollection(
|
static std::shared_ptr<LogicalCollection> resolveCollection(
|
||||||
std::string const& database,
|
std::string const& database, std::string const& collectionName,
|
||||||
std::string const& collectionName,
|
|
||||||
std::map<std::string, std::string> const& collectionPlanIdMap);
|
std::map<std::string, std::string> const& collectionPlanIdMap);
|
||||||
static void resolveShard(LogicalCollection* info,
|
static void resolveShard(LogicalCollection* info, std::string const& shardKey,
|
||||||
std::string const& shardKey,
|
|
||||||
std::string const& vertexKey,
|
std::string const& vertexKey,
|
||||||
std::string& responsibleShard);
|
std::string& responsibleShard);
|
||||||
};
|
};
|
||||||
|
|
|
@ -35,7 +35,7 @@ namespace pregel {
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
class Worker;
|
class Worker;
|
||||||
class Aggregator;
|
class Aggregator;
|
||||||
|
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
class VertexContext {
|
class VertexContext {
|
||||||
friend class Worker<V, E, M>;
|
friend class Worker<V, E, M>;
|
||||||
|
@ -43,17 +43,16 @@ class VertexContext {
|
||||||
uint64_t _gss = 0;
|
uint64_t _gss = 0;
|
||||||
WorkerContext* _context;
|
WorkerContext* _context;
|
||||||
GraphStore<V, E>* _graphStore;
|
GraphStore<V, E>* _graphStore;
|
||||||
const AggregatorUsage* _conductorAggregators;
|
const AggregatorHandler* _conductorAggregators;
|
||||||
AggregatorUsage* _workerAggregators;
|
AggregatorHandler* _workerAggregators;
|
||||||
VertexEntry* _vertexEntry;
|
VertexEntry* _vertexEntry;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline const T* getAggregatedValue(std::string const& name) {
|
inline const T* getAggregatedValue(std::string const& name) {
|
||||||
return (const T*)_conductorAggregators->getAggregatedValue(name);
|
return (const T*)_conductorAggregators->getAggregatedValue(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline void aggregate(std::string const& name, const T* valuePtr) {
|
inline void aggregate(std::string const& name, const T* valuePtr) {
|
||||||
_workerAggregators->aggregate(name, valuePtr);
|
_workerAggregators->aggregate(name, valuePtr);
|
||||||
|
@ -61,47 +60,48 @@ class VertexContext {
|
||||||
|
|
||||||
inline WorkerContext const* context() { return _context; }
|
inline WorkerContext const* context() { return _context; }
|
||||||
|
|
||||||
template<typename T>
|
template <typename T>
|
||||||
T* mutableVertexData() {
|
T* mutableVertexData() {
|
||||||
return (T*) _graphStore->mutableVertexData(_vertexEntry);
|
return (T*)_graphStore->mutableVertexData(_vertexEntry);
|
||||||
}
|
}
|
||||||
|
|
||||||
V vertexData() { return _graphStore->copyVertexData(_vertexEntry); }
|
V vertexData() { return _graphStore->copyVertexData(_vertexEntry); }
|
||||||
|
|
||||||
RangeIterator<Edge<E>> getEdges() { return _graphStore->edgeIterator(_vertexEntry); }
|
RangeIterator<Edge<E>> getEdges() {
|
||||||
|
return _graphStore->edgeIterator(_vertexEntry);
|
||||||
|
}
|
||||||
|
|
||||||
/// store data, will potentially move the data around
|
/// store data, will potentially move the data around
|
||||||
void setVertexData(void const* ptr, size_t size) {
|
void setVertexData(void const* ptr, size_t size) {
|
||||||
_graphStore->replaceVertexData(_vertexEntry, (void*)ptr, size);
|
_graphStore->replaceVertexData(_vertexEntry, (void*)ptr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void voteHalt() {_vertexEntry->setActive(false); }
|
void voteHalt() { _vertexEntry->setActive(false); }
|
||||||
void voteActive() {_vertexEntry->setActive(true);}
|
void voteActive() { _vertexEntry->setActive(true); }
|
||||||
|
|
||||||
inline uint64_t globalSuperstep() const { return _gss; }
|
inline uint64_t globalSuperstep() const { return _gss; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
class VertexComputation : public VertexContext<V, E, M> {
|
class VertexComputation : public VertexContext<V, E, M> {
|
||||||
friend class Worker<V, E, M>;
|
friend class Worker<V, E, M>;
|
||||||
OutCache<M>* _outgoing;
|
OutCache<M>* _outgoing;
|
||||||
public:
|
|
||||||
|
public:
|
||||||
void sendMessage(Edge<E> const* edge, M const& data) {
|
void sendMessage(Edge<E> const* edge, M const& data) {
|
||||||
_outgoing->appendMessage(edge->targetShard(), edge->toKey(), data);
|
_outgoing->appendMessage(edge->targetShard(), edge->toKey(), data);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void compute(MessageIterator<M> const& messages) = 0;
|
virtual void compute(MessageIterator<M> const& messages) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
class VertexCompensation : public VertexContext<V, E, M> {
|
class VertexCompensation : public VertexContext<V, E, M> {
|
||||||
friend class Worker<V, E, M>;
|
friend class Worker<V, E, M>;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
virtual void compensate(bool inLostPartition) = 0;
|
virtual void compensate(bool inLostPartition) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -47,12 +47,15 @@ template <typename V, typename E, typename M>
|
||||||
Worker<V, E, M>::Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo,
|
Worker<V, E, M>::Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo,
|
||||||
VPackSlice initConfig)
|
VPackSlice initConfig)
|
||||||
: _running(true), _state(vocbase->name(), initConfig), _algorithm(algo) {
|
: _running(true), _state(vocbase->name(), initConfig), _algorithm(algo) {
|
||||||
|
|
||||||
VPackSlice userParams = initConfig.get(Utils::userParametersKey);
|
VPackSlice userParams = initConfig.get(Utils::userParametersKey);
|
||||||
_workerContext.reset(algo->workerContext(userParams));
|
_workerContext.reset(algo->workerContext(userParams));
|
||||||
_messageFormat.reset(algo->messageFormat());
|
_messageFormat.reset(algo->messageFormat());
|
||||||
_messageCombiner.reset(algo->messageCombiner());
|
_messageCombiner.reset(algo->messageCombiner());
|
||||||
_conductorAggregators.reset(new AggregatorUsage(algo));
|
_conductorAggregators.reset(new AggregatorHandler(algo));
|
||||||
_workerAggregators.reset(new AggregatorUsage(algo));
|
_workerAggregators.reset(new AggregatorHandler(algo));
|
||||||
|
_graphStore.reset(new GraphStore<V, E>(vocbase, _algorithm->inputFormat()));
|
||||||
|
|
||||||
if (_messageCombiner) {
|
if (_messageCombiner) {
|
||||||
_readCache.reset(
|
_readCache.reset(
|
||||||
new CombiningInCache<M>(_messageFormat.get(), _messageCombiner.get()));
|
new CombiningInCache<M>(_messageFormat.get(), _messageCombiner.get()));
|
||||||
|
@ -70,24 +73,24 @@ Worker<V, E, M>::Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo,
|
||||||
// of time. Therefore this is performed asynchronous
|
// of time. Therefore this is performed asynchronous
|
||||||
ThreadPool* pool = PregelFeature::instance()->threadPool();
|
ThreadPool* pool = PregelFeature::instance()->threadPool();
|
||||||
pool->enqueue([this, vocbase, vc, ec] {
|
pool->enqueue([this, vocbase, vc, ec] {
|
||||||
_graphStore.reset(
|
_graphStore->loadShards(this->_state);
|
||||||
new GraphStore<V, E>(vocbase, _state, _algorithm->inputFormat()));
|
|
||||||
|
// execute the user defined startup code
|
||||||
if (_workerContext) {
|
if (_workerContext) {
|
||||||
_workerContext->_conductorAggregators = _conductorAggregators.get();
|
_workerContext->_conductorAggregators = _conductorAggregators.get();
|
||||||
_workerContext->_workerAggregators = _workerAggregators.get();
|
_workerContext->_workerAggregators = _workerAggregators.get();
|
||||||
_workerContext->_vertexCount = vc;
|
_workerContext->_vertexCount = vc;
|
||||||
_workerContext->_edgeCount = ec;
|
_workerContext->_edgeCount = ec;
|
||||||
_workerContext->preApplication();
|
_workerContext->preApplication();
|
||||||
|
|
||||||
VPackBuilder package;
|
|
||||||
package.openObject();
|
|
||||||
package.add(Utils::senderKey,
|
|
||||||
VPackValue(ServerState::instance()->getId()));
|
|
||||||
package.add(Utils::executionNumberKey,
|
|
||||||
VPackValue(_state.executionNumber()));
|
|
||||||
package.close();
|
|
||||||
_callConductor(Utils::finishedStartupPath, package.slice());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VPackBuilder package;
|
||||||
|
package.openObject();
|
||||||
|
package.add(Utils::senderKey, VPackValue(ServerState::instance()->getId()));
|
||||||
|
package.add(Utils::executionNumberKey,
|
||||||
|
VPackValue(_state.executionNumber()));
|
||||||
|
package.close();
|
||||||
|
_callConductor(Utils::finishedStartupPath, package.slice());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -120,13 +123,13 @@ void Worker<V, E, M>::prepareGlobalStep(VPackSlice data) {
|
||||||
// clean up message caches, intialize gss
|
// clean up message caches, intialize gss
|
||||||
_state._globalSuperstep = gss;
|
_state._globalSuperstep = gss;
|
||||||
_swapIncomingCaches(); // write cache becomes the readable cache
|
_swapIncomingCaches(); // write cache becomes the readable cache
|
||||||
// parse aggregated values from conductor
|
_workerAggregators->resetValues();
|
||||||
_conductorAggregators->resetValues();
|
_conductorAggregators->resetValues();
|
||||||
|
// parse aggregated values from conductor
|
||||||
VPackSlice aggValues = data.get(Utils::aggregatorValuesKey);
|
VPackSlice aggValues = data.get(Utils::aggregatorValuesKey);
|
||||||
if (aggValues.isObject()) {
|
if (aggValues.isObject()) {
|
||||||
_conductorAggregators->aggregateValues(aggValues);
|
_conductorAggregators->aggregateValues(aggValues);
|
||||||
}
|
}
|
||||||
_workerAggregators->resetValues();
|
|
||||||
_superstepStats.reset(); // don't forget to reset before the superstep
|
_superstepStats.reset(); // don't forget to reset before the superstep
|
||||||
// execute context
|
// execute context
|
||||||
if (_workerContext != nullptr) {
|
if (_workerContext != nullptr) {
|
||||||
|
@ -136,7 +139,7 @@ void Worker<V, E, M>::prepareGlobalStep(VPackSlice data) {
|
||||||
|
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
void Worker<V, E, M>::receivedMessages(VPackSlice data) {
|
void Worker<V, E, M>::receivedMessages(VPackSlice data) {
|
||||||
//LOG(INFO) << "Worker received some messages: " << data.toJson();
|
// LOG(INFO) << "Worker received some messages: " << data.toJson();
|
||||||
|
|
||||||
VPackSlice gssSlice = data.get(Utils::globalSuperstepKey);
|
VPackSlice gssSlice = data.get(Utils::globalSuperstepKey);
|
||||||
VPackSlice messageSlice = data.get(Utils::messagesKey);
|
VPackSlice messageSlice = data.get(Utils::messagesKey);
|
||||||
|
@ -231,7 +234,7 @@ void Worker<V, E, M>::_executeGlobalStep(
|
||||||
outCache.reset(new ArrayOutCache<M>(&_state, inCache.get()));
|
outCache.reset(new ArrayOutCache<M>(&_state, inCache.get()));
|
||||||
}
|
}
|
||||||
|
|
||||||
AggregatorUsage workerAggregator(_algorithm.get());
|
AggregatorHandler workerAggregator(_algorithm.get());
|
||||||
|
|
||||||
// TODO look if we can avoid instantiating this
|
// TODO look if we can avoid instantiating this
|
||||||
std::unique_ptr<VertexComputation<V, E, M>> vertexComputation(
|
std::unique_ptr<VertexComputation<V, E, M>> vertexComputation(
|
||||||
|
@ -250,9 +253,9 @@ void Worker<V, E, M>::_executeGlobalStep(
|
||||||
vertexComputation->compute(messages);
|
vertexComputation->compute(messages);
|
||||||
if (vertexEntry->active()) {
|
if (vertexEntry->active()) {
|
||||||
activeCount++;
|
activeCount++;
|
||||||
}/* else {
|
} /* else {
|
||||||
LOG(INFO) << vertexEntry->key() << " vertex has halted";
|
LOG(INFO) << vertexEntry->key() << " vertex has halted";
|
||||||
}*/
|
}*/
|
||||||
}
|
}
|
||||||
// TODO delete read messages immediatly
|
// TODO delete read messages immediatly
|
||||||
// technically messages to non-existing vertices trigger
|
// technically messages to non-existing vertices trigger
|
||||||
|
@ -280,7 +283,7 @@ void Worker<V, E, M>::_executeGlobalStep(
|
||||||
|
|
||||||
// called at the end of a worker thread, needs mutex
|
// called at the end of a worker thread, needs mutex
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
void Worker<V, E, M>::_workerThreadDone(AggregatorUsage* threadAggregators,
|
void Worker<V, E, M>::_workerThreadDone(AggregatorHandler* threadAggregators,
|
||||||
WorkerStats const& threadStats) {
|
WorkerStats const& threadStats) {
|
||||||
MUTEX_LOCKER(guard, _threadMutex); // only one thread at a time
|
MUTEX_LOCKER(guard, _threadMutex); // only one thread at a time
|
||||||
|
|
||||||
|
@ -314,18 +317,22 @@ void Worker<V, E, M>::_workerThreadDone(AggregatorUsage* threadAggregators,
|
||||||
_workerAggregators->serializeValues(package);
|
_workerAggregators->serializeValues(package);
|
||||||
package.close();
|
package.close();
|
||||||
}
|
}
|
||||||
_superstepStats.serializeValues(package); // add stats
|
if (_superstepStats.isDone()) {
|
||||||
|
_superstepStats.serializeValues(package); // add stats
|
||||||
|
package.add(Utils::gssDone, VPackValue(true));
|
||||||
|
}
|
||||||
package.close();
|
package.close();
|
||||||
|
_workerAggregators->resetValues();
|
||||||
|
|
||||||
// TODO ask how to implement message sending without waiting for a response
|
// TODO ask how to implement message sending without waiting for a response
|
||||||
// ============ Call Coordinator ============
|
// ============ Call Coordinator ============
|
||||||
_callConductor(Utils::finishedGSSPath, package.slice());
|
_callConductor(Utils::finishedWorkerStepPath, package.slice());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
void Worker<V, E, M>::finalizeExecution(VPackSlice body) {
|
void Worker<V, E, M>::finalizeExecution(VPackSlice body) {
|
||||||
// Only expect serial calls from the conductor.
|
// Only expect serial calls from the conductor.
|
||||||
//Lock to prevent malicous activity
|
// Lock to prevent malicous activity
|
||||||
MUTEX_LOCKER(guard, _conductorMutex);
|
MUTEX_LOCKER(guard, _conductorMutex);
|
||||||
_running = false;
|
_running = false;
|
||||||
|
|
||||||
|
@ -356,7 +363,7 @@ void Worker<V, E, M>::finalizeExecution(VPackSlice body) {
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
void Worker<V, E, M>::startRecovery(VPackSlice data) {
|
void Worker<V, E, M>::startRecovery(VPackSlice data) {
|
||||||
MUTEX_LOCKER(guard, _conductorMutex);
|
MUTEX_LOCKER(guard, _conductorMutex);
|
||||||
|
|
||||||
_running = true;
|
_running = true;
|
||||||
VPackSlice method = data.get(Utils::recoveryMethodKey);
|
VPackSlice method = data.get(Utils::recoveryMethodKey);
|
||||||
if (method.compareString(Utils::compensate) == 0) {
|
if (method.compareString(Utils::compensate) == 0) {
|
||||||
|
@ -372,8 +379,8 @@ void Worker<V, E, M>::startRecovery(VPackSlice data) {
|
||||||
|
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
void Worker<V, E, M>::compensateStep(VPackSlice data) {
|
void Worker<V, E, M>::compensateStep(VPackSlice data) {
|
||||||
MUTEX_LOCKER(guard, _conductorMutex);
|
MUTEX_LOCKER(guard, _conductorMutex);
|
||||||
|
|
||||||
_conductorAggregators->resetValues();
|
_conductorAggregators->resetValues();
|
||||||
VPackSlice aggValues = data.get(Utils::aggregatorValuesKey);
|
VPackSlice aggValues = data.get(Utils::aggregatorValuesKey);
|
||||||
if (aggValues.isObject()) {
|
if (aggValues.isObject()) {
|
||||||
|
|
|
@ -25,11 +25,11 @@
|
||||||
|
|
||||||
#include "Basics/Common.h"
|
#include "Basics/Common.h"
|
||||||
#include "Basics/Mutex.h"
|
#include "Basics/Mutex.h"
|
||||||
#include "Pregel/AggregatorUsage.h"
|
#include "Pregel/AggregatorHandler.h"
|
||||||
#include "Pregel/Algorithm.h"
|
#include "Pregel/Algorithm.h"
|
||||||
|
#include "Pregel/Statistics.h"
|
||||||
#include "Pregel/WorkerContext.h"
|
#include "Pregel/WorkerContext.h"
|
||||||
#include "Pregel/WorkerState.h"
|
#include "Pregel/WorkerState.h"
|
||||||
#include "Pregel/Statistics.h"
|
|
||||||
|
|
||||||
struct TRI_vocbase_t;
|
struct TRI_vocbase_t;
|
||||||
namespace arangodb {
|
namespace arangodb {
|
||||||
|
@ -40,7 +40,7 @@ class IWorker {
|
||||||
public:
|
public:
|
||||||
virtual ~IWorker(){};
|
virtual ~IWorker(){};
|
||||||
virtual void prepareGlobalStep(VPackSlice data) = 0;
|
virtual void prepareGlobalStep(VPackSlice data) = 0;
|
||||||
virtual void startGlobalStep(VPackSlice data) = 0; // called by coordinator
|
virtual void startGlobalStep(VPackSlice data) = 0; // called by coordinator
|
||||||
virtual void cancelGlobalStep(VPackSlice data) = 0; // called by coordinator
|
virtual void cancelGlobalStep(VPackSlice data) = 0; // called by coordinator
|
||||||
virtual void receivedMessages(VPackSlice data) = 0;
|
virtual void receivedMessages(VPackSlice data) = 0;
|
||||||
virtual void finalizeExecution(VPackSlice data) = 0;
|
virtual void finalizeExecution(VPackSlice data) = 0;
|
||||||
|
@ -53,58 +53,62 @@ class GraphStore;
|
||||||
|
|
||||||
template <typename M>
|
template <typename M>
|
||||||
class InCache;
|
class InCache;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class RangeIterator;
|
class RangeIterator;
|
||||||
class VertexEntry;
|
class VertexEntry;
|
||||||
|
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
class VertexContext;
|
class VertexContext;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <typename V, typename E, typename M>
|
template <typename V, typename E, typename M>
|
||||||
class Worker : public IWorker {
|
class Worker : public IWorker {
|
||||||
//friend class arangodb::RestPregelHandler;
|
// friend class arangodb::RestPregelHandler;
|
||||||
|
|
||||||
bool _running = true;
|
bool _running = true;
|
||||||
WorkerState _state;
|
WorkerState _state;
|
||||||
WorkerStats _workerStats;
|
WorkerStats _workerStats;
|
||||||
uint64_t _expectedGSS = 0;
|
uint64_t _expectedGSS = 0;
|
||||||
std::unique_ptr<Algorithm<V, E, M>> _algorithm;
|
std::unique_ptr<Algorithm<V, E, M>> _algorithm;
|
||||||
std::unique_ptr<WorkerContext> _workerContext;
|
std::unique_ptr<WorkerContext> _workerContext;
|
||||||
Mutex _conductorMutex;// locks callbak methods
|
Mutex _conductorMutex; // locks callbak methods
|
||||||
mutable Mutex _threadMutex;// locks _workerThreadDone
|
mutable Mutex _threadMutex; // locks _workerThreadDone
|
||||||
|
|
||||||
// only valid while recovering to determine the offset
|
// only valid while recovering to determine the offset
|
||||||
// where new vertices were inserted
|
// where new vertices were inserted
|
||||||
size_t _preRecoveryTotal;
|
size_t _preRecoveryTotal;
|
||||||
|
|
||||||
|
std::unique_ptr<AggregatorHandler> _conductorAggregators;
|
||||||
|
std::unique_ptr<AggregatorHandler> _workerAggregators;
|
||||||
std::unique_ptr<GraphStore<V, E>> _graphStore;
|
std::unique_ptr<GraphStore<V, E>> _graphStore;
|
||||||
std::unique_ptr<InCache<M>> _readCache, _writeCache, _nextPhase;
|
|
||||||
std::unique_ptr<AggregatorUsage> _conductorAggregators;
|
|
||||||
std::unique_ptr<AggregatorUsage> _workerAggregators;
|
|
||||||
std::unique_ptr<MessageFormat<M>> _messageFormat;
|
std::unique_ptr<MessageFormat<M>> _messageFormat;
|
||||||
std::unique_ptr<MessageCombiner<M>> _messageCombiner;
|
std::unique_ptr<MessageCombiner<M>> _messageCombiner;
|
||||||
|
// from previous or current superstep
|
||||||
|
std::unique_ptr<InCache<M>> _readCache;
|
||||||
|
// for the current or next superstep
|
||||||
|
std::unique_ptr<InCache<M>> _writeCache;
|
||||||
|
// intended for the next superstep phase
|
||||||
|
std::unique_ptr<InCache<M>> _nextPhase;
|
||||||
|
|
||||||
WorkerStats _superstepStats;
|
WorkerStats _superstepStats;
|
||||||
size_t _runningThreads;
|
size_t _runningThreads;
|
||||||
|
|
||||||
void _swapIncomingCaches() {
|
void _swapIncomingCaches() {
|
||||||
_readCache.swap(_writeCache);
|
_readCache.swap(_writeCache);
|
||||||
_writeCache->clear();
|
_writeCache->clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void _initializeVertexContext(VertexContext<V, E, M> *ctx);
|
void _initializeVertexContext(VertexContext<V, E, M>* ctx);
|
||||||
void _executeGlobalStep(RangeIterator<VertexEntry> &vertexIterator);
|
void _executeGlobalStep(RangeIterator<VertexEntry>& vertexIterator);
|
||||||
void _workerThreadDone(AggregatorUsage *threadAggregators,
|
void _workerThreadDone(AggregatorHandler* threadAggregators,
|
||||||
WorkerStats const& threadStats);
|
WorkerStats const& threadStats);
|
||||||
void _callConductor(std::string path, VPackSlice message);
|
void _callConductor(std::string path, VPackSlice message);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algorithm,
|
Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algorithm,
|
||||||
VPackSlice params);
|
VPackSlice params);
|
||||||
~Worker();
|
~Worker();
|
||||||
|
|
||||||
// ====== called by rest handler =====
|
// ====== called by rest handler =====
|
||||||
void prepareGlobalStep(VPackSlice data) override;
|
void prepareGlobalStep(VPackSlice data) override;
|
||||||
void startGlobalStep(VPackSlice data) override;
|
void startGlobalStep(VPackSlice data) override;
|
||||||
|
|
|
@ -25,8 +25,8 @@
|
||||||
|
|
||||||
#include <velocypack/Slice.h>
|
#include <velocypack/Slice.h>
|
||||||
#include <velocypack/velocypack-aliases.h>
|
#include <velocypack/velocypack-aliases.h>
|
||||||
#include "Pregel/AggregatorUsage.h"
|
|
||||||
#include "Basics/Common.h"
|
#include "Basics/Common.h"
|
||||||
|
#include "Pregel/AggregatorHandler.h"
|
||||||
#include "Pregel/Utils.h"
|
#include "Pregel/Utils.h"
|
||||||
|
|
||||||
namespace arangodb {
|
namespace arangodb {
|
||||||
|
@ -37,8 +37,8 @@ class WorkerContext {
|
||||||
friend class Worker;
|
friend class Worker;
|
||||||
|
|
||||||
uint64_t _vertexCount, _edgeCount;
|
uint64_t _vertexCount, _edgeCount;
|
||||||
const AggregatorUsage* _conductorAggregators;
|
const AggregatorHandler* _conductorAggregators;
|
||||||
AggregatorUsage* _workerAggregators;
|
AggregatorHandler* _workerAggregators;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
@ -55,9 +55,9 @@ class WorkerContext {
|
||||||
virtual void preGlobalSuperstep(uint64_t gss){};
|
virtual void preGlobalSuperstep(uint64_t gss){};
|
||||||
virtual void postGlobalSuperstep(uint64_t gss){};
|
virtual void postGlobalSuperstep(uint64_t gss){};
|
||||||
virtual void postApplication(){};
|
virtual void postApplication(){};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
WorkerContext(VPackSlice params) {};
|
WorkerContext(VPackSlice params){};
|
||||||
|
|
||||||
inline uint64_t vertexCount() const { return _vertexCount; }
|
inline uint64_t vertexCount() const { return _vertexCount; }
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,7 @@ WorkerState::WorkerState(DatabaseID dbname, VPackSlice params)
|
||||||
VPackSlice execNum = params.get(Utils::executionNumberKey);
|
VPackSlice execNum = params.get(Utils::executionNumberKey);
|
||||||
VPackSlice collectionPlanIdMap = params.get(Utils::collectionPlanIdMapKey);
|
VPackSlice collectionPlanIdMap = params.get(Utils::collectionPlanIdMapKey);
|
||||||
VPackSlice globalShards = params.get(Utils::globalShardListKey);
|
VPackSlice globalShards = params.get(Utils::globalShardListKey);
|
||||||
|
//VPackSlice userParams = params.get(Utils::userParametersKey);
|
||||||
if (!coordID.isString() || !edgeShardMap.isObject() ||
|
if (!coordID.isString() || !edgeShardMap.isObject() ||
|
||||||
!vertexShardMap.isObject() || !execNum.isInteger() ||
|
!vertexShardMap.isObject() || !execNum.isInteger() ||
|
||||||
!collectionPlanIdMap.isObject() || !globalShards.isArray()) {
|
!collectionPlanIdMap.isObject() || !globalShards.isArray()) {
|
||||||
|
|
|
@ -23,8 +23,8 @@
|
||||||
#ifndef ARANGODB_PREGEL_WORKER_STATE_H
|
#ifndef ARANGODB_PREGEL_WORKER_STATE_H
|
||||||
#define ARANGODB_PREGEL_WORKER_STATE_H 1
|
#define ARANGODB_PREGEL_WORKER_STATE_H 1
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <velocypack/velocypack-aliases.h>
|
#include <velocypack/velocypack-aliases.h>
|
||||||
|
#include <algorithm>
|
||||||
#include "Basics/Common.h"
|
#include "Basics/Common.h"
|
||||||
#include "Cluster/ClusterInfo.h"
|
#include "Cluster/ClusterInfo.h"
|
||||||
|
|
||||||
|
@ -48,25 +48,30 @@ class WorkerState {
|
||||||
inline uint64_t executionNumber() const { return _executionNumber; }
|
inline uint64_t executionNumber() const { return _executionNumber; }
|
||||||
|
|
||||||
inline uint64_t globalSuperstep() const { return _globalSuperstep; }
|
inline uint64_t globalSuperstep() const { return _globalSuperstep; }
|
||||||
|
|
||||||
inline bool asynchronousMode() const {return _asynchronousMode;}
|
inline uint64_t localSuperstep() const { return _localSuperstep; }
|
||||||
|
|
||||||
|
inline bool asynchronousMode() const { return _asynchronousMode; }
|
||||||
|
|
||||||
inline std::string const& coordinatorId() const { return _coordinatorId; }
|
inline std::string const& coordinatorId() const { return _coordinatorId; }
|
||||||
|
|
||||||
inline std::string const& database() const { return _database; }
|
inline std::string const& database() const { return _database; }
|
||||||
|
|
||||||
inline std::map<CollectionID, std::vector<ShardID>> const& vertexCollectionShards() const {
|
inline std::map<CollectionID, std::vector<ShardID>> const&
|
||||||
|
vertexCollectionShards() const {
|
||||||
return _vertexCollectionShards;
|
return _vertexCollectionShards;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::map<CollectionID, std::vector<ShardID>> const& edgeCollectionShards() const {
|
inline std::map<CollectionID, std::vector<ShardID>> const&
|
||||||
|
edgeCollectionShards() const {
|
||||||
return _edgeCollectionShards;
|
return _edgeCollectionShards;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::map<CollectionID, std::string> const& collectionPlanIdMap() const {
|
inline std::map<CollectionID, std::string> const& collectionPlanIdMap()
|
||||||
|
const {
|
||||||
return _collectionPlanIdMap;
|
return _collectionPlanIdMap;
|
||||||
};
|
};
|
||||||
|
|
||||||
// same content on every worker, has to stay equal!!!!
|
// same content on every worker, has to stay equal!!!!
|
||||||
inline std::vector<ShardID> const& globalShardIDs() const {
|
inline std::vector<ShardID> const& globalShardIDs() const {
|
||||||
return _globalShardIDs;
|
return _globalShardIDs;
|
||||||
|
@ -83,30 +88,33 @@ class WorkerState {
|
||||||
return _localEdgeShardIDs;
|
return _localEdgeShardIDs;
|
||||||
};
|
};
|
||||||
inline size_t shardId(ShardID const& responsibleShard) const {
|
inline size_t shardId(ShardID const& responsibleShard) const {
|
||||||
auto it = std::find(_globalShardIDs.begin(), _globalShardIDs.end(), responsibleShard);
|
auto it = std::find(_globalShardIDs.begin(), _globalShardIDs.end(),
|
||||||
return it != _globalShardIDs.end() ? it - _globalShardIDs.begin() : (uint16_t)-1;
|
responsibleShard);
|
||||||
|
return it != _globalShardIDs.end() ? it - _globalShardIDs.begin()
|
||||||
|
: (uint16_t)-1;
|
||||||
}
|
}
|
||||||
// index in globalShardIDs
|
// index in globalShardIDs
|
||||||
inline bool isLocalVertexShard(size_t shardIndex) const {
|
inline bool isLocalVertexShard(size_t shardIndex) const {
|
||||||
// TODO cache this? prob small
|
// TODO cache this? prob small
|
||||||
ShardID const& shard = _globalShardIDs[shardIndex];
|
ShardID const& shard = _globalShardIDs[shardIndex];
|
||||||
return std::find(_localVertexShardIDs.begin(), _localVertexShardIDs.end(), shard)
|
return std::find(_localVertexShardIDs.begin(), _localVertexShardIDs.end(),
|
||||||
!= _localVertexShardIDs.end();
|
shard) != _localVertexShardIDs.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
uint64_t _executionNumber = 0;
|
uint64_t _executionNumber = 0;
|
||||||
uint64_t _globalSuperstep = 0;
|
uint64_t _globalSuperstep = 0;
|
||||||
|
uint64_t _localSuperstep = 0;
|
||||||
bool _asynchronousMode = false;
|
bool _asynchronousMode = false;
|
||||||
// uint64_t _numWorkerThreads = 1;
|
|
||||||
|
|
||||||
std::string _coordinatorId;
|
std::string _coordinatorId;
|
||||||
std::string _database;
|
std::string _database;
|
||||||
|
|
||||||
std::vector<ShardID> _globalShardIDs;
|
std::vector<ShardID> _globalShardIDs;
|
||||||
std::vector<ShardID> _localVertexShardIDs, _localEdgeShardIDs;
|
std::vector<ShardID> _localVertexShardIDs, _localEdgeShardIDs;
|
||||||
|
|
||||||
std::map<CollectionID, std::vector<ShardID>> _vertexCollectionShards, _edgeCollectionShards;
|
std::map<CollectionID, std::vector<ShardID>> _vertexCollectionShards,
|
||||||
|
_edgeCollectionShards;
|
||||||
std::map<std::string, std::string> _collectionPlanIdMap;
|
std::map<std::string, std::string> _collectionPlanIdMap;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,7 +51,7 @@ module.exports = function (gname, filename) {
|
||||||
|
|
||||||
graph[eColl].save(vColl+"/"+parts[0],
|
graph[eColl].save(vColl+"/"+parts[0],
|
||||||
vColl+"/"+parts[1],
|
vColl+"/"+parts[1],
|
||||||
{_vertex:parts[0], value:-1});
|
{_vertex:parts[0]});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
|
@ -118,10 +118,10 @@ RestStatus RestPregelHandler::execute() {
|
||||||
if (exe) {
|
if (exe) {
|
||||||
exe->receivedMessages(body);
|
exe->receivedMessages(body);
|
||||||
}
|
}
|
||||||
} else if (suffix[0] == Utils::finishedGSSPath) {
|
} else if (suffix[0] == Utils::finishedWorkerStepPath) {
|
||||||
Conductor *exe = PregelFeature::instance()->conductor(executionNumber);
|
Conductor *exe = PregelFeature::instance()->conductor(executionNumber);
|
||||||
if (exe) {
|
if (exe) {
|
||||||
exe->finishedGlobalStep(body);
|
exe->finishedWorkerStep(body);
|
||||||
} else {
|
} else {
|
||||||
LOG(ERR) << "Conductor not found: " << executionNumber;
|
LOG(ERR) << "Conductor not found: " << executionNumber;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue