All necessities supported

2016-12-17 14:09:41 +01:00 · 2016-12-17 14:09:41 +01:00 · f084bd7b12
parent d1a47f5b29
commit f084bd7b12
4 changed files with 91 additions and 43 deletions
--- a/arangod/Pregel/OutgoingCache.cpp
+++ b/arangod/Pregel/OutgoingCache.cpp
@ -40,12 +40,12 @@ template <typename M>
 OutCache<M>::OutCache(WorkerConfig* state, InCache<M>* cache)
    : _state(state), _format(cache->format()), _localCache(cache) {
  _baseUrl = Utils::baseUrl(_state->database());
-  _gss = _state->globalSuperstep();
 }

 template <typename M>
-void OutCache<M>::sendNextGSS(bool np) {
-  _gss = _state->globalSuperstep() + (np ? 1 : 0);
+OutCache<M>::OutCache(WorkerConfig* state, InCache<M>* cache, InCache<M>* nextGSS)
+  : _state(state), _format(cache->format()), _localCache(cache), _localCacheNextGSS(nextGSS) {
+  _baseUrl = Utils::baseUrl(_state->database());
 }

 // ================= ArrayOutCache ==================
@ -65,9 +65,13 @@ template <typename M>
 void ArrayOutCache<M>::appendMessage(prgl_shard_t shard, std::string const& key,
                                     M const& data) {
  if (this->_state->isLocalVertexShard(shard)) {
-    this->_localCache->setDirect(shard, key, data);
-    // LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data;
-    this->_sendMessages++;
+    if (this->_sendToNextGSS) {
+      this->_localCacheNextGSS->setDirect(shard, key, data);
+      this->_sendCountNextGSS++;
+    } else {
+      this->_localCache->setDirect(shard, key, data);
+      this->_sendCount++;
+    }
  } else {
    _shardMap[shard][key].push_back(data);
    if (this->_containedMessages++ > this->_batchSize) {
@ -79,6 +83,10 @@ void ArrayOutCache<M>::appendMessage(prgl_shard_t shard, std::string const& key,
 template <typename M>
 void ArrayOutCache<M>::flushMessages() {
  LOG(INFO) << "Beginning to send messages to other machines";
+  uint64_t gss = this->_state->globalSuperstep();
+  if (this->_sendToNextGSS) {
+    gss += 1;
+  }
  
  std::vector<ClusterCommRequest> requests;
  for (auto const& it : _shardMap) {
@ -98,7 +106,11 @@ void ArrayOutCache<M>::flushMessages() {
      package.add(VPackValue(vertexMessagePair.first));
      for (M const& val : vertexMessagePair.second) {
        this->_format->addValue(package, val);
-        this->_sendMessages++;
+        if (this->_sendToNextGSS) {
+          this->_sendCountNextGSS++;
+        } else {
+          this->_sendCount++;
+        }
      }
      package.close();
    }
@ -106,7 +118,7 @@ void ArrayOutCache<M>::flushMessages() {
    package.add(Utils::senderKey, VPackValue(ServerState::instance()->getId()));
    package.add(Utils::executionNumberKey,
                VPackValue(this->_state->executionNumber()));
-    package.add(Utils::globalSuperstepKey, VPackValue(this->_gss));
+    package.add(Utils::globalSuperstepKey, VPackValue(gss));
    package.close();
    // add a request
    ShardID const& shardId = this->_state->globalShardIDs()[shard];
@ -137,6 +149,12 @@ CombiningOutCache<M>::CombiningOutCache(WorkerConfig* state,
                                        CombiningInCache<M>* cache)
    : OutCache<M>(state, cache), _combiner(cache->combiner()) {}

+template <typename M>
+CombiningOutCache<M>::CombiningOutCache(WorkerConfig* state,
+                                        CombiningInCache<M>* cache,
+                                        InCache<M> *nextPhase)
+: OutCache<M>(state, cache, nextPhase), _combiner(cache->combiner()) {}
+
 template <typename M>
 CombiningOutCache<M>::~CombiningOutCache() {
  clear();
@ -153,9 +171,13 @@ void CombiningOutCache<M>::appendMessage(prgl_shard_t shard,
                                         std::string const& key,
                                         M const& data) {
  if (this->_state->isLocalVertexShard(shard)) {
-    this->_localCache->setDirect(shard, key, data);
-    // LOG(INFO) << "Worker: Got messages for myself " << key << " <- " << data;
-    this->_sendMessages++;
+    if (this->_sendToNextGSS) {
+      this->_localCacheNextGSS->setDirect(shard, key, data);
+      this->_sendCountNextGSS++;
+    } else {
+      this->_localCache->setDirect(shard, key, data);
+      this->_sendCount++;
+    }
  } else {
    std::unordered_map<std::string, M>& vertexMap = _shardMap[shard];
    auto it = vertexMap.find(key);
@ -174,6 +196,10 @@ void CombiningOutCache<M>::appendMessage(prgl_shard_t shard,
 template <typename M>
 void CombiningOutCache<M>::flushMessages() {
  LOG(INFO) << "Beginning to send messages to other machines";
+  uint64_t gss = this->_state->globalSuperstep();
+  if (this->_sendToNextGSS) {
+    gss += 1;
+  }

  std::vector<ClusterCommRequest> requests;
  for (auto const& it : _shardMap) {
@ -194,13 +220,17 @@ void CombiningOutCache<M>::flushMessages() {
      package.add(VPackValue(shard));
      package.add(VPackValue(vertexMessagePair.first));
      this->_format->addValue(package, vertexMessagePair.second);
-      this->_sendMessages++;
+      if (this->_sendToNextGSS) {
+        this->_sendCountNextGSS++;
+      } else {
+        this->_sendCount++;
+      }
    }
    package.close();
    package.add(Utils::senderKey, VPackValue(ServerState::instance()->getId()));
    package.add(Utils::executionNumberKey,
                VPackValue(this->_state->executionNumber()));
-    package.add(Utils::globalSuperstepKey, VPackValue(this->_gss));
+    package.add(Utils::globalSuperstepKey, VPackValue(gss));
    package.close();
    // add a request
    ShardID const& shardId = this->_state->globalShardIDs()[shard];
--- a/arangod/Pregel/OutgoingCache.h
+++ b/arangod/Pregel/OutgoingCache.h
@ -52,23 +52,27 @@ class OutCache {
  WorkerConfig const* _state;
  MessageFormat<M> const* _format;
  InCache<M>* _localCache;
+  InCache<M>* _localCacheNextGSS = nullptr;
  std::string _baseUrl;
  uint32_t _batchSize = 1000;
-  uint64_t _gss;
+  bool _sendToNextGSS = false;

  /// @brief current number of vertices stored
  size_t _containedMessages = 0;
-  size_t _sendMessages = 0;
+  size_t _sendCount = 0;
+  size_t _sendCountNextGSS = 0;
  bool shouldFlushCache();

 public:
  OutCache(WorkerConfig* state, InCache<M>* cache);
+  OutCache(WorkerConfig* state, InCache<M>* cache, InCache<M>* nextGSSCache);
  virtual ~OutCache(){};

-  size_t sendMessageCount() const { return _sendMessages; }
+  size_t sendCount() const { return _sendCount; }
+  size_t sendCountNextGSS() const { return _sendCountNextGSS; }
  uint32_t batchSize() const { return _batchSize; }
  void setBatchSize(uint32_t bs) { _batchSize = bs; }
-  void sendNextGSS(bool np);
+  void sendToNextGSS(bool np) { _sendToNextGSS = np; }

  virtual void clear() = 0;
  virtual void appendMessage(prgl_shard_t shard, std::string const& key,
@ -86,6 +90,8 @@ class ArrayOutCache : public OutCache<M> {
 public:
  ArrayOutCache(WorkerConfig* state, InCache<M>* cache)
      : OutCache<M>(state, cache) {}
+  ArrayOutCache(WorkerConfig* state, InCache<M>* cache, InCache<M>* nextGSSCache)
+      : OutCache<M>(state, cache, nextGSSCache) {}
  ~ArrayOutCache();

  void clear() override;
@ -103,7 +109,11 @@ class CombiningOutCache : public OutCache<M> {
      _shardMap;

 public:
-  CombiningOutCache(WorkerConfig* state, CombiningInCache<M>* cache);
+  CombiningOutCache(WorkerConfig* state,
+                    CombiningInCache<M>* cache);
+  CombiningOutCache(WorkerConfig* state,
+                    CombiningInCache<M>* cache,
+                    InCache<M> *nextPhase);
  ~CombiningOutCache();

  void clear() override;
--- a/arangod/Pregel/Worker.cpp
+++ b/arangod/Pregel/Worker.cpp
@ -55,6 +55,7 @@ Worker<V, E, M>::Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo,
  _conductorAggregators.reset(new AggregatorHandler(algo));
  _workerAggregators.reset(new AggregatorHandler(algo));
  _graphStore.reset(new GraphStore<V, E>(vocbase, _algorithm->inputFormat()));
+  _nextGSSSendMessageCount = 0;
  if (_messageCombiner) {
    _readCache = new CombiningInCache<M>(_messageFormat.get(), _messageCombiner.get());
    _writeCache =
@ -93,19 +94,16 @@ Worker<V, E, M>::Worker(TRI_vocbase_t* vocbase, Algorithm<V, E, M>* algo,
  });
 }

+/*template <typename M>
+GSSContext::~GSSContext() {}*/
+
 template <typename V, typename E, typename M>
 Worker<V, E, M>::~Worker() {
  LOG(INFO) << "Called ~Worker()";
  _state = WorkerState::DONE;
-  if (_readCache) {
-    delete _readCache;
-  }
-  if (_writeCache) {
-    delete _writeCache;
-  }
-  if (_nextPhase) {
-    delete _nextPhase;
-  }
+  delete _readCache;
+  delete _writeCache;
+  delete _writeCacheNextGSS;
 }

 template <typename V, typename E, typename M>
@ -146,14 +144,14 @@ void Worker<V, E, M>::prepareGlobalStep(VPackSlice data) {
  if (_config.asynchronousMode()) {
    TRI_ASSERT(_readCache->receivedMessageCount() == 0);
    TRI_ASSERT(_writeCache->receivedMessageCount() == 0);
-    std::swap(_readCache, _nextPhase);
+    std::swap(_readCache, _writeCacheNextGSS);
    _writeCache->clear();
+    _requestedNextGSS = false;// only relevant for async
  } else {
    TRI_ASSERT(_writeCache->receivedMessageCount() == 0);
    std::swap(_readCache, _writeCache);
    _writeCache->clear();
  }
-  _requestedNextGSS = false;// only relevant for async
  
  // execute context
  if (_workerContext != nullptr) {
@ -164,6 +162,9 @@ void Worker<V, E, M>::prepareGlobalStep(VPackSlice data) {
 template <typename V, typename E, typename M>
 void Worker<V, E, M>::receivedMessages(VPackSlice data) {
  // LOG(INFO) << "Worker received some messages: " << data.toJson();
+  if (_state != WorkerState::COMPUTING) {
+    THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "Cannot receive messages while computng");
+  }

  VPackSlice gssSlice = data.get(Utils::globalSuperstepKey);
  VPackSlice messageSlice = data.get(Utils::messagesKey);
@ -183,7 +184,7 @@ void Worker<V, E, M>::receivedMessages(VPackSlice data) {
      }
    }
  } else if (_config.asynchronousMode() && gss == _config._globalSuperstep+1) {
-    _nextPhase->parseMessages(messageSlice);
+    _writeCacheNextGSS->parseMessages(messageSlice);
  } else {
    THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER,
                                   "Superstep out of sync");
@ -265,21 +266,24 @@ void Worker<V, E, M>::_processVertices(

  // thread local caches
  std::unique_ptr<InCache<M>> inCache;
-  std::unique_ptr<OutCache<M>> outCache, nextOutCache;
+  std::unique_ptr<OutCache<M>> outCache;
  if (_messageCombiner) {
    inCache.reset(
        new CombiningInCache<M>(_messageFormat.get(), _messageCombiner.get()));
-    outCache.reset(
-        new CombiningOutCache<M>(&_config, (CombiningInCache<M>*)inCache.get()));
    if (_config.asynchronousMode()) {
-      nextOutCache.reset(new CombiningOutCache<M>(&_config,
-                                                  (CombiningInCache<M>*)inCache.get()));
+      outCache.reset(new CombiningOutCache<M>(&_config,
+                                              (CombiningInCache<M>*)inCache.get(),
+                                              _writeCacheNextGSS));
+    } else {
+      outCache.reset(new CombiningOutCache<M>(&_config,
+                                              (CombiningInCache<M>*)inCache.get()));
    }
  } else {
    inCache.reset(new ArrayInCache<M>(_messageFormat.get()));
-    outCache.reset(new ArrayOutCache<M>(&_config, inCache.get()));
    if (_config.asynchronousMode()) {
-      nextOutCache.reset(new ArrayOutCache<M>(&_config, inCache.get()));
+      outCache.reset(new ArrayOutCache<M>(&_config, inCache.get(), _writeCacheNextGSS));
+    } else {
+       outCache.reset(new ArrayOutCache<M>(&_config, inCache.get()));
    }
  }

@ -292,7 +296,7 @@ void Worker<V, E, M>::_processVertices(
  vertexComputation->_workerAggregators = &workerAggregator;
  vertexComputation->_cache = outCache.get();
  if (_config.asynchronousMode()) {
-    outCache->sendNextGSS(_requestedNextGSS);
+    outCache->sendToNextGSS(_requestedNextGSS);
  }

  size_t activeCount = 0;
@ -321,8 +325,8 @@ void Worker<V, E, M>::_processVertices(
  // ==================== send messages to other shards ====================
  outCache->flushMessages();
  if (!_requestedNextGSS && vertexComputation->_nextPhase) {
-    MUTEX_LOCKER(guard2, _commandMutex);
    _requestedNextGSS = true;
+    _nextGSSSendMessageCount += outCache->sendCountNextGSS();
  }
  
  // merge thread local messages, _writeCache does locking
@ -333,7 +337,7 @@ void Worker<V, E, M>::_processVertices(

  WorkerStats stats;
  stats.activeCount = activeCount;
-  stats.sendCount = outCache->sendMessageCount();
+  stats.sendCount = outCache->sendCount();
  stats.superstepRuntimeSecs = TRI_microtime() - start;
  _finishedProcessing(vertexComputation->_workerAggregators, stats);
 }
--- a/arangod/Pregel/Worker.h
+++ b/arangod/Pregel/Worker.h
@ -23,6 +23,7 @@
 #ifndef ARANGODB_PREGEL_WORKER_H
 #define ARANGODB_PREGEL_WORKER_H 1

+#include <atomic>
 #include "Basics/Common.h"
 #include "Basics/Mutex.h"
 #include "Pregel/AggregatorHandler.h"
@ -47,7 +48,7 @@ class IWorker {
  virtual void startRecovery(VPackSlice data) = 0;
  virtual void compensateStep(VPackSlice data) = 0;
 };
-
+  
 template <typename V, typename E>
 class GraphStore;

@ -86,19 +87,22 @@ class Worker : public IWorker {
  // only valid while recovering to determine the offset
  // where new vertices were inserted
  size_t _preRecoveryTotal;
+  /// During async mode this should keep track of the send messages

  std::unique_ptr<AggregatorHandler> _conductorAggregators;
  std::unique_ptr<AggregatorHandler> _workerAggregators;
  std::unique_ptr<GraphStore<V, E>> _graphStore;
  std::unique_ptr<MessageFormat<M>> _messageFormat;
  std::unique_ptr<MessageCombiner<M>> _messageCombiner;
+  
  // from previous or current superstep
  InCache<M> *_readCache = nullptr;
  // for the current or next superstep
  InCache<M> *_writeCache = nullptr;
  // intended for the next superstep phase
-  InCache<M> *_nextPhase = nullptr;
-  bool _requestedNextGSS = true;
+  InCache<M> *_writeCacheNextGSS = nullptr;
+  std::atomic<uint32_t> _nextGSSSendMessageCount;
+  std::atomic<bool> _requestedNextGSS;

  WorkerStats _superstepStats;
  size_t _runningThreads;