1
0
Fork 0
arangodb/arangod/Pregel/Algos/SLPA.cpp

169 lines
5.5 KiB
C++

////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2016 ArangoDB GmbH, Cologne, Germany
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
/// http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is ArangoDB GmbH, Cologne, Germany
///
/// @author Simon Grätzer
////////////////////////////////////////////////////////////////////////////////
#include "SLPA.h"
#include <cmath>
#include "Cluster/ClusterInfo.h"
#include "Cluster/ServerState.h"
#include "Pregel/Aggregator.h"
#include "Pregel/Algorithm.h"
#include "Pregel/GraphStore.h"
#include "Pregel/IncomingCache.h"
#include "Pregel/MasterContext.h"
#include "Pregel/VertexComputation.h"
#include "Random/RandomGenerator.h"
using namespace arangodb;
using namespace arangodb::pregel;
using namespace arangodb::pregel::algos;
static const uint64_t STABILISATION_ROUNDS = 20;
struct SLPAComputation : public VertexComputation<SLPAValue, int8_t, uint64_t> {
SLPAComputation() {}
uint64_t mostFrequent(MessageIterator<uint64_t> const& messages) {
TRI_ASSERT(messages.size() > 0);
if (messages.size() == 1) {
return **messages;
}
// most frequent value
size_t i = 0;
std::vector<uint64_t> all(messages.size());
for (uint64_t const* msg : messages) {
all[i++] = *msg;
}
std::sort(all.begin(), all.end());
uint64_t maxValue = all[0];
uint64_t currentValue = all[0];
int currentCounter = 1;
int maxCounter = 1;
for (i = 1; i < all.size(); i++) {
if (currentValue == all[i]) {
currentCounter++;
if (maxCounter < currentCounter) {
maxCounter = currentCounter;
maxValue = currentValue;
}
} else {
currentCounter = 1;
currentValue = all[i];
}
}
if (maxCounter == 1) {
return all[0];
}
return maxValue;
}
void compute(MessageIterator<uint64_t> const& messages) override {
SLPAValue* val = mutableVertexData();
if (globalSuperstep() == 0) {
val->memory.emplace(val->nodeId, 1);
} else if (messages.size() > 0) {
// listen to our neighbours
uint64_t newCommunity = mostFrequent(messages);
auto it = val->memory.find(newCommunity);
if (it == val->memory.end()) {
val->memory.emplace(newCommunity, 1);
} else {
it->second++;
}
}
// Normally the SLPA algo only lets one vertex by one speak sequentially,
// which is not really well parallizable. Additionally I figure
// since a speaker only speaks to neighbours and the speaker order is random
// we can get away with letting nodes speak in turn
bool speak = val->nodeId % 2 == globalSuperstep() % 2;
if (speak) {
// speak to our neighbours
float random = RandomGenerator::interval(UINT32_MAX);
float randomDoubleValue = random / (float)UINT32_MAX;
float cumulativeSum = 0;
// Randomly select a label with probability proportional to the
// occurrence frequency of this label in its memory
uint64_t numCommunities = globalSuperstep();//val->memory.size();
for (std::pair<uint64_t, uint64_t> const& e : val->memory) {
cumulativeSum = cumulativeSum + ((float)e.second)/ numCommunities;
if(cumulativeSum >= randomDoubleValue) {
sendMessageToAllEdges(e.first);
}
}
sendMessageToAllEdges(val->nodeId);
}
}
};
VertexComputation<SLPAValue, int8_t, uint64_t>*
SLPA::createComputation(WorkerConfig const* config) const {
return new SLPAComputation();
}
struct SLPAGraphFormat : public GraphFormat<SLPAValue, int8_t> {
std::string _resultField;
uint64_t vertexIdRange = 0;
explicit SLPAGraphFormat(std::string const& result) : _resultField(result) {}
size_t estimatedVertexSize() const override { return sizeof(LPValue); };
size_t estimatedEdgeSize() const override { return 0; };
void willLoadVertices(uint64_t count) override {
// if we aren't running in a cluster it doesn't matter
if (arangodb::ServerState::instance()->isRunningInCluster()) {
arangodb::ClusterInfo* ci = arangodb::ClusterInfo::instance();
if (ci) {
vertexIdRange = ci->uniqid(count);
}
}
}
size_t copyVertexData(std::string const& documentId,
arangodb::velocypack::Slice document, SLPAValue* value,
size_t maxSize) override {
value->nodeId = vertexIdRange++;
return sizeof(SLPAValue);
}
size_t copyEdgeData(arangodb::velocypack::Slice document, int8_t* targetPtr,
size_t maxSize) override {
return 0;
}
bool buildVertexDocument(arangodb::velocypack::Builder& b, const SLPAValue* ptr,
size_t size) const override {
//b.add(_resultField, VPackValue(ptr->currentCommunity));
return true;
}
bool buildEdgeDocument(arangodb::velocypack::Builder& b, const int8_t* ptr,
size_t size) const override {
return false;
}
};
GraphFormat<SLPAValue, int8_t>* SLPA::inputFormat() const {
return new SLPAGraphFormat(_resultField);
}