From 3c0463b684e172bae514f21cc7ecf1d589f0b0f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Gra=CC=88tzer?= Date: Tue, 30 May 2017 10:27:20 +0200 Subject: [PATCH 1/6] Added arangoimp documentation --- CHANGELOG | 1 + .../Books/Manual/Administration/Arangoimp.md | 7 +++++++ .../Books/Manual/ReleaseNotes/NewFeatures32.md | 13 ++++++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index a76a07dd17..84dd8920db 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -32,6 +32,7 @@ devel * added distinction between hasUser and authorized within Foxx (cluster internal requests are authorized requests but don't have a user) +* arangoimp now has a `—threads`option to enable parallel imports of data v3.2.alpha4 (2017-04-25) diff --git a/Documentation/Books/Manual/Administration/Arangoimp.md b/Documentation/Books/Manual/Administration/Arangoimp.md index 29f62ae006..cf72654913 100644 --- a/Documentation/Books/Manual/Administration/Arangoimp.md +++ b/Documentation/Books/Manual/Administration/Arangoimp.md @@ -92,6 +92,13 @@ Please note that by default, _arangoimp_ will import data into the specified collection in the default database (*_system*). To specify a different database, use the *--server.database* option when invoking _arangoimp_. +The tool also supports parallel imports, with multiple threads. This will only +bring a notable performance increase with the new rocksdb engine. To specify +the number of parallel threads use the `--threads` option to specfiy the number. + + > arangoimp --threads 4 --file "data.json" --type json --collection "users" + + ### JSON input file formats diff --git a/Documentation/Books/Manual/ReleaseNotes/NewFeatures32.md b/Documentation/Books/Manual/ReleaseNotes/NewFeatures32.md index 46d4e0b8c2..84464337d7 100644 --- a/Documentation/Books/Manual/ReleaseNotes/NewFeatures32.md +++ b/Documentation/Books/Manual/ReleaseNotes/NewFeatures32.md @@ -250,9 +250,18 @@ Foxx uploaded file will be used as the service's main entry point. -Pregel +Distributed Graph Processing ------ +* We added support for executing distributed graph algorithms aka `Pregel`. +* Users can run arbitrary algorithms on an entire graph, including in cluster mode. +* We implemented a number of algorithms for various well-known graph measures: + * Connected Components + * PageRank + * Shortest Paths + * Centrality Measures (Centrality and Betweeness) + * Community Detection (via Label Propagation, Speakers-Listeners Label Propagation or DMID) +* Users can contribute their own algorithms AQL --- @@ -344,6 +353,8 @@ Client tools `--translate` works for CSV and TSV inputs only. +* added `--threads` option to arangoimp to specify the number of parallel import threads + * changed default value for client tools option `--server.max-packet-size` from 128 MB to 256 MB. this allows transferring bigger result sets from the server without the client tools rejecting them as invalid. From bc9aba57540fb36968ac311ce22e337dc3d0fd12 Mon Sep 17 00:00:00 2001 From: Kaveh Vahedipour Date: Tue, 30 May 2017 10:42:02 +0200 Subject: [PATCH 2/6] fixing glitch in dbservers aardvark route --- js/apps/system/_admin/aardvark/APP/cluster.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/apps/system/_admin/aardvark/APP/cluster.js b/js/apps/system/_admin/aardvark/APP/cluster.js index 74b149ffc2..b166aaf594 100644 --- a/js/apps/system/_admin/aardvark/APP/cluster.js +++ b/js/apps/system/_admin/aardvark/APP/cluster.js @@ -55,7 +55,7 @@ if (cluster.isCluster()) { res.json(list.map(n => { var r = { "id": n.serverId, "name": n.serverName, "role": "primary" }; r.status = "ok"; - const endpoint = global.ArangoClusterInfo.getServerEndpoint(n); + const endpoint = global.ArangoClusterInfo.getServerEndpoint(r.id); const proto = endpoint.substr(0, 6); if (proto === "tcp://") { r.protocol = "http"; From 74e4512b02b9fd844d810c69d2719e1f818ca055 Mon Sep 17 00:00:00 2001 From: jsteemann Date: Tue, 30 May 2017 10:49:36 +0200 Subject: [PATCH 3/6] updated threads documentation --- .../Books/Manual/Administration/Arangoimp.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Documentation/Books/Manual/Administration/Arangoimp.md b/Documentation/Books/Manual/Administration/Arangoimp.md index cf72654913..3047c51e65 100644 --- a/Documentation/Books/Manual/Administration/Arangoimp.md +++ b/Documentation/Books/Manual/Administration/Arangoimp.md @@ -92,13 +92,17 @@ Please note that by default, _arangoimp_ will import data into the specified collection in the default database (*_system*). To specify a different database, use the *--server.database* option when invoking _arangoimp_. -The tool also supports parallel imports, with multiple threads. This will only -bring a notable performance increase with the new rocksdb engine. To specify -the number of parallel threads use the `--threads` option to specfiy the number. - - > arangoimp --threads 4 --file "data.json" --type json --collection "users" +The tool also supports parallel imports, with multiple threads. Using multiple +threads may provide a speedup, especially when using the RocksDB storage engine. +To specify the number of parallel threads use the `--threads` option: + > arangoimp --threads 4 --file "data.json" --type json --collection "users" +Note that using multiple threads may lead to a non-sequential import of the input +data. Data that appears later in the input file may be imported earlier than data +that appears earlier in the input file. This is normally not a problem but may cause +issues when when there are data dependencies or duplicates in the import data. In +this case, the number of threads should be set to 1. ### JSON input file formats From 6bb6d8ba75643ecd4a30a47948085766750a437c Mon Sep 17 00:00:00 2001 From: jsteemann Date: Tue, 30 May 2017 10:50:02 +0200 Subject: [PATCH 4/6] updated CHANGELOG --- CHANGELOG | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 84dd8920db..ea1b935130 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -32,7 +32,8 @@ devel * added distinction between hasUser and authorized within Foxx (cluster internal requests are authorized requests but don't have a user) -* arangoimp now has a `—threads`option to enable parallel imports of data + +* arangoimp now has a `--threads` option to enable parallel imports of data v3.2.alpha4 (2017-04-25) From 64c0b03a99efd8c1ab8316e704d7709dbf37b596 Mon Sep 17 00:00:00 2001 From: Jan Christoph Uhde Date: Tue, 30 May 2017 09:48:27 +0200 Subject: [PATCH 5/6] Add missing changes to geo index --- arangod/RocksDBEngine/RocksDBCommon.h | 20 ++++++-- arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp | 46 +++---------------- tests/RocksDBEngine/TypeConversionTest.cpp | 5 +- 3 files changed, 26 insertions(+), 45 deletions(-) diff --git a/arangod/RocksDBEngine/RocksDBCommon.h b/arangod/RocksDBEngine/RocksDBCommon.h index 040d62d5f6..c5be720511 100644 --- a/arangod/RocksDBEngine/RocksDBCommon.h +++ b/arangod/RocksDBEngine/RocksDBCommon.h @@ -78,18 +78,32 @@ class Methods; } namespace rocksutils { +//// to persistent template typename std::enable_if::value,void>::type -toPersistent(T in, char* out){ +toPersistent(T in, char*& out){ using TT = typename std::decay::type; std::memcpy(out, &in, sizeof(TT)); out += sizeof(TT); } +//// from persistent template ::type>::value, int>::type = 0 > -typename std::decay::type fromPersistent(char const* in){ +typename std::decay::type fromPersistent(char const*& in){ + using TT = typename std::decay::type; + TT out; + std::memcpy(&out, in, sizeof(TT)); + in += sizeof(TT); + return out; +} + +//we need this overload or the template will match +template ::type>::value, int>::type = 1 + > +typename std::decay::type fromPersistent(char *& in){ using TT = typename std::decay::type; TT out; std::memcpy(&out, in, sizeof(TT)); @@ -133,7 +147,7 @@ void uint16ToPersistent(std::string& out, uint16_t value); RocksDBTransactionState* toRocksTransactionState(transaction::Methods* trx); RocksDBMethods* toRocksMethods(transaction::Methods* trx); - + rocksdb::TransactionDB* globalRocksDB(); RocksDBEngine* globalRocksEngine(); arangodb::Result globalRocksDBPut( diff --git a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp index 740ec38a1e..0f269300d9 100644 --- a/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp +++ b/arangod/RocksDBEngine/RocksDBGeoIndexImpl.cpp @@ -267,94 +267,72 @@ typedef struct { namespace arangodb { namespace rocksdbengine { -GeoCoordinate& fromPersistent(char const* in, GeoCoordinate& out){ +static GeoCoordinate& fromPersistent(char const* in, GeoCoordinate& out){ const char* start = in; //convert latituide and longitute to uint64 for network transfer / storage uint64_t fromStorage = rocksutils::fromPersistent(start); - start += sizeof(uint64_t); out.latitude = rocksutils::intToDouble(fromStorage); fromStorage = rocksutils::fromPersistent(start); - start += sizeof(uint64_t); out.longitude = rocksutils::intToDouble(fromStorage); out.data = rocksutils::fromPersistent(start); - start += sizeof(uint64_t); return out; } -void toPersistent(GeoCoordinate& in, char* out){ +static void toPersistent(GeoCoordinate& in, char* out){ char* start = out; uint64_t toStorage = rocksutils::doubleToInt(in.latitude); rocksutils::toPersistent(toStorage, start); - start += sizeof(in.latitude); - + toStorage = rocksutils::doubleToInt(in.longitude); rocksutils::toPersistent(toStorage, start); - start += sizeof(in.longitude); rocksutils::toPersistent(in.data, start); - start += sizeof(in.data); } -GeoPot& fromPersistent(char const* in, GeoPot& out){ +static GeoPot& fromPersistent(char const* in, GeoPot& out){ const char* start = in; out.LorLeaf = rocksutils::fromPersistent(start); - start += sizeof(int32_t); out.RorPoints = rocksutils::fromPersistent(start); - start += sizeof(int32_t); out.middle = rocksutils::fromPersistent(start); - start += sizeof(GeoString); for(std::size_t i = 0; i < GeoIndexFIXEDPOINTS; i++){ out.maxdist[i] = rocksutils::fromPersistent(start); - start += sizeof(GeoFix); } out.start = rocksutils::fromPersistent(start); - start += sizeof(GeoString); out.end = rocksutils::fromPersistent(start); - start += sizeof(GeoString); out.level = rocksutils::fromPersistent(start); - start += sizeof(int32_t); for(std::size_t i = 0; i < GeoIndexFIXEDPOINTS; i++){ out.points[i] = rocksutils::fromPersistent(start); - start += sizeof(int32_t); } return out; } -void toPersistent(GeoPot const& in, char* out){ +static void toPersistent(GeoPot const& in, char* out){ char* start = out; rocksutils::toPersistent(in.LorLeaf, start); - start += sizeof(int32_t); rocksutils::toPersistent(in.RorPoints, start); - start += sizeof(int32_t); rocksutils::toPersistent(in.middle, start); - start += sizeof(GeoString); for(std::size_t i = 0; i< GeoIndexFIXEDPOINTS; i++){ rocksutils::toPersistent(in.maxdist[i], start); - start += sizeof(GeoFix); } rocksutils::toPersistent(in.start, start); - start += sizeof(GeoString); rocksutils::toPersistent(in.end, start); - start += sizeof(GeoString); rocksutils::toPersistent(in.level, start); - start += sizeof(int32_t); for(std::size_t i = 0; i< GeoIndexFIXEDPOINTS; i++){ rocksutils::toPersistent(in.points[i], start); - start += sizeof(int32_t); } } @@ -413,7 +391,6 @@ void SlotRead(GeoIx * gix, int slot, GeoCoordinate * gc /*out param*/) std::string slotValue; RocksRead(gix, key, &slotValue); fromPersistent(slotValue.data(),*gc); - //memcpy(gc, slotValue.data(), slotValue.size()); } void SlotWrite(GeoIx * gix,int slot, GeoCoordinate * gc) { @@ -424,11 +401,6 @@ void SlotWrite(GeoIx * gix,int slot, GeoCoordinate * gc) GeoCoordinate test; fromPersistent(&data[0],test); - // RocksWrite(gix, key, rocksdb::Slice((char*)gc, sizeof(GeoCoordinate))); - - TRI_ASSERT(test.longitude == gc->longitude); - TRI_ASSERT(test.latitude == gc->latitude); - TRI_ASSERT(test.data == gc->data); } void PotRead(GeoIx * gix, int pot, GeoPot * gp) @@ -438,19 +410,13 @@ void PotRead(GeoIx * gix, int pot, GeoPot * gp) RocksRead(gix, key, &potValue); TRI_ASSERT(potValue.size() == sizeof(GeoPot)); fromPersistent(potValue.data(), *gp); - //memcpy(gp, potValue.data(), potValue.size()); } - + void PotWrite(GeoIx * gix, int pot, GeoPot * gp) { RocksDBKey key = RocksDBKey::GeoIndexValue(gix->objectId, pot, false); char data[sizeof (GeoPot)]; toPersistent(*gp, &data[0]); RocksWrite(gix, key, rocksdb::Slice(&data[0], sizeof(GeoPot))); - //RocksWrite(gix, key, rocksdb::Slice((char*)gp, sizeof(GeoPot))); - - GeoPot test; - fromPersistent(&data[0],test); - TRI_ASSERT(test.level == gp->level); } /* =================================================== */ diff --git a/tests/RocksDBEngine/TypeConversionTest.cpp b/tests/RocksDBEngine/TypeConversionTest.cpp index 9d605973b0..94e523b723 100644 --- a/tests/RocksDBEngine/TypeConversionTest.cpp +++ b/tests/RocksDBEngine/TypeConversionTest.cpp @@ -48,8 +48,9 @@ template void doFromToTest(T num){ T x = num , y; char s[sizeof(x)]; - toPersistent(x,&s[0]); - y = fromPersistent(&s[0]); + char* p = &s[0]; + toPersistent(x,p); + y = fromPersistent(p); CHECK((x == y)); } From 6abbe3fcc7a1ff3aad74f12c446efed8bc72d4b5 Mon Sep 17 00:00:00 2001 From: Kaveh Vahedipour Date: Tue, 30 May 2017 11:15:45 +0200 Subject: [PATCH 6/6] fix windows compile warnings with seeding RNG through integer precision --- lib/Random/RandomGenerator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Random/RandomGenerator.cpp b/lib/Random/RandomGenerator.cpp index f5086224c7..9df2d9f534 100644 --- a/lib/Random/RandomGenerator.cpp +++ b/lib/Random/RandomGenerator.cpp @@ -60,7 +60,7 @@ unsigned long RandomDevice::seed() { auto now = std::chrono::duration_cast( std::chrono::high_resolution_clock::now().time_since_epoch()).count(); - return dev + tid + now; + return reinterpret_cast(dev + tid + now); }