1
0
Fork 0
arangodb/arangod/Pregel/OutgoingCache.cpp

258 lines
8.6 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2016 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Simon Grätzer
////////////////////////////////////////////////////////////////////////////////
#include "OutgoingCache.h"
#include "IncomingCache.h"
#include "Utils.h"
#include "WorkerConfig.h"
#include "Basics/MutexLocker.h"
#include "Basics/StaticStrings.h"
#include "Cluster/ClusterComm.h"
#include "VocBase/LogicalCollection.h"
#include <velocypack/Iterator.h>
#include <velocypack/velocypack-aliases.h>
using namespace arangodb;
using namespace arangodb::pregel;
template <typename M>
OutCache<M>::OutCache(WorkerConfig* state, InCache<M>* cache)
: _state(state), _format(cache->format()), _localCache(cache) {
_baseUrl = Utils::baseUrl(_state->database());
}
template <typename M>
OutCache<M>::OutCache(WorkerConfig* state, InCache<M>* cache, InCache<M>* nextGSS)
: _state(state), _format(cache->format()), _localCache(cache), _localCacheNextGSS(nextGSS) {
_baseUrl = Utils::baseUrl(_state->database());
}
// ================= ArrayOutCache ==================
template <typename M>
ArrayOutCache<M>::~ArrayOutCache() {
clear();
}
template <typename M>
void ArrayOutCache<M>::clear() {
_shardMap.clear();
this->_containedMessages = 0;
}
template <typename M>
void ArrayOutCache<M>::appendMessage(prgl_shard_t shard, std::string const& key,
M const& data) {
if (this->_state->isLocalVertexShard(shard)) {
if (this->_sendToNextGSS) {
this->_localCacheNextGSS->setDirect(shard, key, data);
this->_sendCountNextGSS++;
} else {
this->_localCache->setDirect(shard, key, data);
this->_sendCount++;
}
} else {
_shardMap[shard][key].push_back(data);
if (this->_containedMessages++ > this->_batchSize) {
flushMessages();
}
}
}
template <typename M>
void ArrayOutCache<M>::flushMessages() {
LOG(INFO) << "Beginning to send messages to other machines";
uint64_t gss = this->_state->globalSuperstep();
if (this->_sendToNextGSS) {
gss += 1;
}
std::vector<ClusterCommRequest> requests;
for (auto const& it : _shardMap) {
prgl_shard_t shard = it.first;
std::unordered_map<std::string, std::vector<M>> const& vertexMessageMap =
it.second;
if (vertexMessageMap.size() == 0) {
continue;
}
VPackBuilder package;
package.openObject();
package.add(Utils::messagesKey, VPackValue(VPackValueType::Array));
for (auto const& vertexMessagePair : vertexMessageMap) {
package.add(VPackValue(VPackValueType::Array));
package.add(VPackValue(shard));
package.add(VPackValue(vertexMessagePair.first));
for (M const& val : vertexMessagePair.second) {
this->_format->addValue(package, val);
if (this->_sendToNextGSS) {
this->_sendCountNextGSS++;
} else {
this->_sendCount++;
}
}
package.close();
}
package.close();
package.add(Utils::senderKey, VPackValue(ServerState::instance()->getId()));
package.add(Utils::executionNumberKey,
VPackValue(this->_state->executionNumber()));
package.add(Utils::globalSuperstepKey, VPackValue(gss));
package.close();
// add a request
ShardID const& shardId = this->_state->globalShardIDs()[shard];
auto body = std::make_shared<std::string>(package.toJson());
requests.emplace_back("shard:" + shardId, rest::RequestType::POST,
this->_baseUrl + Utils::messagesPath, body);
LOG(INFO) << "Worker: Sending data to other Shard: " << shardId
<< ". Message: " << package.toJson();
}
size_t nrDone = 0;
ClusterComm::instance()->performRequests(requests, 120, nrDone,
LogTopic("Pregel message transfer"));
// readResults(requests);
for (auto const& req : requests) {
auto& res = req.result;
if (res.status == CL_COMM_RECEIVED) {
LOG(INFO) << res.answer->payload().toJson();
}
}
this->clear();
}
// ================= CombiningOutCache ==================
template <typename M>
CombiningOutCache<M>::CombiningOutCache(WorkerConfig* state,
CombiningInCache<M>* cache)
: OutCache<M>(state, cache), _combiner(cache->combiner()) {}
template <typename M>
CombiningOutCache<M>::CombiningOutCache(WorkerConfig* state,
CombiningInCache<M>* cache,
InCache<M> *nextPhase)
: OutCache<M>(state, cache, nextPhase), _combiner(cache->combiner()) {}
template <typename M>
CombiningOutCache<M>::~CombiningOutCache() {
clear();
}
template <typename M>
void CombiningOutCache<M>::clear() {
_shardMap.clear();
this->_containedMessages = 0;
}
template <typename M>
void CombiningOutCache<M>::appendMessage(prgl_shard_t shard,
std::string const& key,
M const& data) {
if (this->_state->isLocalVertexShard(shard)) {
if (this->_sendToNextGSS) {
this->_localCacheNextGSS->setDirect(shard, key, data);
this->_sendCountNextGSS++;
} else {
this->_localCache->setDirect(shard, key, data);
this->_sendCount++;
}
} else {
std::unordered_map<std::string, M>& vertexMap = _shardMap[shard];
auto it = vertexMap.find(key);
if (it != vertexMap.end()) { // more than one message
_combiner->combine(vertexMap[key], data);
} else { // first message for this vertex
vertexMap.emplace(key, data);
}
if (this->_containedMessages++ > this->_batchSize) {
flushMessages();
}
}
}
template <typename M>
void CombiningOutCache<M>::flushMessages() {
LOG(INFO) << "Beginning to send messages to other machines";
uint64_t gss = this->_state->globalSuperstep();
if (this->_sendToNextGSS) {
gss += 1;
}
std::vector<ClusterCommRequest> requests;
for (auto const& it : _shardMap) {
prgl_shard_t shard = it.first;
std::unordered_map<std::string, M> const& vertexMessageMap = it.second;
if (vertexMessageMap.size() == 0) {
continue;
}
VPackOptions options = VPackOptions::Defaults;
options.buildUnindexedArrays = true;
options.buildUnindexedObjects = true;
VPackBuilder package(&options);
package.openObject();
package.add(Utils::messagesKey, VPackValue(VPackValueType::Array));
for (auto const& vertexMessagePair : vertexMessageMap) {
package.add(VPackValue(shard));
package.add(VPackValue(vertexMessagePair.first));
this->_format->addValue(package, vertexMessagePair.second);
if (this->_sendToNextGSS) {
this->_sendCountNextGSS++;
} else {
this->_sendCount++;
}
}
package.close();
package.add(Utils::senderKey, VPackValue(ServerState::instance()->getId()));
package.add(Utils::executionNumberKey,
VPackValue(this->_state->executionNumber()));
package.add(Utils::globalSuperstepKey, VPackValue(gss));
package.close();
// add a request
ShardID const& shardId = this->_state->globalShardIDs()[shard];
auto body = std::make_shared<std::string>(package.toJson());
requests.emplace_back("shard:" + shardId, rest::RequestType::POST,
this->_baseUrl + Utils::messagesPath, body);
LOG(INFO) << "Worker: Sending data to other Shard: " << shardId
<< ". Message: " << package.toJson();
}
size_t nrDone = 0;
ClusterComm::instance()->performRequests(requests, 180, nrDone,
LogTopic("Pregel message transfer"));
Utils::printResponses(requests);
this->clear();
}
// template types to create
template class arangodb::pregel::OutCache<int64_t>;
template class arangodb::pregel::OutCache<float>;
template class arangodb::pregel::ArrayOutCache<int64_t>;
template class arangodb::pregel::ArrayOutCache<float>;
template class arangodb::pregel::CombiningOutCache<int64_t>;
template class arangodb::pregel::CombiningOutCache<float>;