//////////////////////////////////////////////////////////////////////////////// /// DISCLAIMER /// /// Copyright 2016 ArangoDB GmbH, Cologne, Germany /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// /// Copyright holder is ArangoDB GmbH, Cologne, Germany /// /// @author Simon Grätzer //////////////////////////////////////////////////////////////////////////////// #include "GraphStore.h" #include "Basics/Common.h" #include "Basics/MutexLocker.h" #include "Pregel/CommonFormats.h" #include "Pregel/IndexHelpers.h" #include "Pregel/PregelFeature.h" #include "Pregel/TypedBuffer.h" #include "Pregel/Utils.h" #include "Pregel/WorkerConfig.h" #include "Scheduler/Scheduler.h" #include "Scheduler/SchedulerFeature.h" #include "Transaction/Context.h" #include "Transaction/Helpers.h" #include "Transaction/Methods.h" #include "Transaction/StandaloneContext.h" #include "Utils/CollectionNameResolver.h" #include "Utils/SingleCollectionTransaction.h" #include "Utils/OperationCursor.h" #include "Utils/OperationOptions.h" #include "VocBase/LogicalCollection.h" #include "VocBase/ManagedDocumentResult.h" #include "VocBase/ticks.h" #include "VocBase/vocbase.h" #ifdef _WIN32 #include #else #include #endif #include #include using namespace arangodb; using namespace arangodb::pregel; template GraphStore::GraphStore(TRI_vocbase_t& vb, GraphFormat* graphFormat) : _vocbaseGuard(vb), _graphFormat(graphFormat), _localVertexCount(0), _localEdgeCount(0), _runningThreads(0) {} template GraphStore::~GraphStore() { _destroyed = true; } static const char* shardError = "Collections need to have the same number of shards" " use distributeShardsLike"; template void GraphStore::loadShards(WorkerConfig* config, std::function const& cb) { _config = config; TRI_ASSERT(_runningThreads == 0); LOG_TOPIC("27f1e", DEBUG, Logger::PREGEL) << "Using " << config->localVertexShardIDs().size() << " threads to load data. memory-mapping is turned " << (config->useMemoryMaps() ? "on" : "off"); // hold the current position where the ith vertex shard can // start to write its data. At the end the offset should equal the // sum of the counts of all ith edge shards // Contains the shards located on this db server in the right order // assuming edges are sharded after _from, vertices after _key // then every ith vertex shard has the corresponding edges in // the ith edge shard std::map> const& vertexCollMap = _config->vertexCollectionShards(); std::map> const& edgeCollMap = _config->edgeCollectionShards(); size_t numShards = SIZE_MAX; for (auto const& pair : vertexCollMap) { std::vector const& vertexShards = pair.second; if (numShards == SIZE_MAX) { numShards = vertexShards.size(); } else if (numShards != vertexShards.size()) { THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, shardError); } for (size_t i = 0; i < vertexShards.size(); i++) { ShardID const& vertexShard = vertexShards[i]; // distributeshardslike should cause the edges for a vertex to be // in the same shard index. x in vertexShard2 => E(x) in edgeShard2 std::vector edges; for (auto const& pair2 : edgeCollMap) { std::vector const& edgeShards = pair2.second; if (vertexShards.size() != edgeShards.size()) { THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_BAD_PARAMETER, shardError); } edges.emplace_back(edgeShards[i]); } try { // we might have already loaded these shards if (_loadedShards.find(vertexShard) != _loadedShards.end()) { continue; } _loadedShards.insert(vertexShard); _runningThreads++; Scheduler* scheduler = SchedulerFeature::SCHEDULER; TRI_ASSERT(scheduler); bool queued = scheduler->queue(RequestLane::INTERNAL_LOW, [this, vertexShard, edges] { TRI_DEFER(_runningThreads--); // exception safe try { _loadVertices(vertexShard, edges); } catch (std::exception const& ex) { LOG_TOPIC("c87c9", WARN, Logger::PREGEL) << "caught exception while " << "loading pregel graph: " << ex.what(); } }); if (!queued) { LOG_TOPIC("38da2", WARN, Logger::PREGEL) << "No thread available to queue vertex loading"; } } catch (basics::Exception const& ex) { LOG_TOPIC("3f283", WARN, Logger::PREGEL) << "unhandled exception while " << "loading pregel graph: " << ex.what(); } catch (...) { LOG_TOPIC("3f282", WARN, Logger::PREGEL) << "unhandled exception while " << "loading pregel graph"; } } // we can only load one vertex collection at a time while (_runningThreads > 0) { std::this_thread::sleep_for(std::chrono::microseconds(5000)); } } Scheduler* scheduler = SchedulerFeature::SCHEDULER; bool queued = scheduler->queue(RequestLane::INTERNAL_LOW, cb); if (!queued) { THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUEUE_FULL, "No thread available to queue callback, " "canceling execution"); } } template void GraphStore::loadDocument(WorkerConfig* config, std::string const& documentID) { // figure out if we got this vertex locally PregelID _id = config->documentIdToPregel(documentID); if (config->isLocalVertexShard(_id.shard)) { loadDocument(config, _id.shard, VPackStringRef(_id.key)); } } template void GraphStore::loadDocument(WorkerConfig* config, PregelShard sourceShard, VPackStringRef const& _key) { TRI_ASSERT(false); } template RangeIterator> GraphStore::vertexIterator() { if (_vertices.empty()) { return RangeIterator>(_vertices, 0, nullptr, 0); } TypedBuffer>* front = _vertices.front().get(); return RangeIterator>(_vertices, 0, front->begin(), _localVertexCount); } template RangeIterator> GraphStore::vertexIterator(size_t i, size_t j) { if (_vertices.size() <= i) { return RangeIterator>(_vertices, 0, nullptr, 0); } size_t numVertices = 0; for (size_t x = i; x < j && x < _vertices.size(); x++) { numVertices += _vertices[x]->size(); } return RangeIterator>(_vertices, i, _vertices[i]->begin(), numVertices); } template RangeIterator> GraphStore::edgeIterator(Vertex const* entry) { if (entry->getEdgeCount() == 0) { return RangeIterator>(_edges, 0, nullptr, 0); } size_t i = 0; for (; i < _edges.size(); i++) { if (_edges[i]->begin() <= entry->getEdges() && entry->getEdges() <= _edges[i]->end()) { break; } } TRI_ASSERT(i < _edges.size()); TRI_ASSERT(i != _edges.size() - 1 || _edges[i]->size() >= entry->getEdgeCount()); return RangeIterator>(_edges, i, static_cast*>(entry->getEdges()), entry->getEdgeCount()); } namespace { template void moveAppend(std::vector& src, std::vector& dst) { if (dst.empty()) { dst = std::move(src); } else { dst.reserve(dst.size() + src.size()); std::move(std::begin(src), std::end(src), std::back_inserter(dst)); src.clear(); } } template std::unique_ptr> createBuffer(WorkerConfig const& config, size_t cap) { if (config.useMemoryMaps()) { auto ptr = std::make_unique>(cap); ptr->sequentialAccess(); return ptr; } else { return std::make_unique>(cap); } } } static constexpr size_t stringChunkSize = 32 * 1024 * 1024 * sizeof(char); template void GraphStore::_loadVertices(ShardID const& vertexShard, std::vector const& edgeShards) { LOG_TOPIC("24837", DEBUG, Logger::PREGEL) << "Pregel worker: loading from vertex shard " << vertexShard; transaction::Options trxOpts; trxOpts.waitForSync = false; trxOpts.allowImplicitCollections = true; auto ctx = transaction::StandaloneContext::Create(_vocbaseGuard.database()); transaction::Methods trx(ctx, {}, {}, {}, trxOpts); Result res = trx.begin(); if (!res.ok()) { THROW_ARANGO_EXCEPTION(res); } PregelShard sourceShard = (PregelShard)_config->shardId(vertexShard); OperationCursor cursor(trx.indexScan(vertexShard, transaction::Methods::CursorType::ALL)); // tell the formatter the number of docs we are about to load LogicalCollection* coll = cursor.collection(); uint64_t numVertices = coll->numberDocuments(&trx, transaction::CountType::Normal); _graphFormat->willLoadVertices(numVertices); LOG_TOPIC("7c31f", DEBUG, Logger::PREGEL) << "Shard '" << vertexShard << "' has " << numVertices << " vertices"; std::vector>>> vertices; std::vector>> vKeys; std::vector>>> edges; std::vector>> eKeys; TypedBuffer>* vertexBuff = nullptr; TypedBuffer* keyBuff = nullptr; size_t segmentSize = std::min(numVertices, vertexSegmentSize()); std::string documentId; // temp buffer for _id of vertex auto cb = [&](LocalDocumentId const& token, VPackSlice slice) { if (slice.isExternal()) { slice = slice.resolveExternal(); } if (vertexBuff == nullptr || vertexBuff->remainingCapacity() == 0) { vertices.push_back(createBuffer>(*_config, segmentSize)); vertexBuff = vertices.back().get(); } Vertex* ventry = vertexBuff->appendElement(); VPackValueLength keyLen; VPackSlice keySlice = transaction::helpers::extractKeyFromDocument(slice); char const* key = keySlice.getString(keyLen); if (keyBuff == nullptr || keyLen > keyBuff->remainingCapacity()) { TRI_ASSERT(keyLen < stringChunkSize); vKeys.push_back(createBuffer(*_config, stringChunkSize)); keyBuff = vKeys.back().get(); } ventry->_shard = sourceShard; ventry->_key = keyBuff->end(); ventry->_keyLength = static_cast(keyLen); TRI_ASSERT(keyLen <= std::numeric_limits::max()); // actually copy in the key memcpy(keyBuff->end(), key, keyLen); keyBuff->advance(keyLen); // load vertex data documentId = trx.extractIdString(slice); if (_graphFormat->estimatedVertexSize() > 0) { _graphFormat->copyVertexData(documentId, slice, ventry->_data); } ventry->_edges = nullptr; ventry->_edgeCount = 0; // load edges for (ShardID const& edgeShard : edgeShards) { _loadEdges(trx, *ventry, edgeShard, documentId, edges, eKeys); } }; _localVertexCount += numVertices; bool hasMore = true; while(hasMore && numVertices > 0) { TRI_ASSERT(segmentSize > 0); hasMore = cursor.nextDocument(cb, segmentSize); if (_destroyed) { LOG_TOPIC("4355a", WARN, Logger::PREGEL) << "Aborted loading graph"; break; } TRI_ASSERT(numVertices >= segmentSize); numVertices -= segmentSize; LOG_TOPIC("b9ed9", DEBUG, Logger::PREGEL) << "Shard '" << vertexShard << "', " << numVertices << " remaining vertices"; segmentSize = std::min(numVertices, vertexSegmentSize()); } std::lock_guard guard(_bufferMutex); ::moveAppend(vertices, _vertices); ::moveAppend(vKeys, _vertexKeys); ::moveAppend(edges, _edges); ::moveAppend(eKeys, _edgeKeys); LOG_TOPIC("6d389", DEBUG, Logger::PREGEL) << "Pregel worker: done loading from vertex shard " << vertexShard; } template void GraphStore::_loadEdges(transaction::Methods& trx, Vertex& vertex, ShardID const& edgeShard, std::string const& documentID, std::vector>>>& edges, std::vector>>& edgeKeys) { traverser::EdgeCollectionInfo info(&trx, edgeShard); ManagedDocumentResult mmdr; std::unique_ptr cursor = info.getEdges(documentID); TypedBuffer>* edgeBuff = edges.empty() ? nullptr : edges.back().get(); TypedBuffer* keyBuff = edgeKeys.empty() ? nullptr : edgeKeys.back().get(); auto allocateSpace = [&](size_t keyLen) { if (edgeBuff == nullptr || edgeBuff->remainingCapacity() == 0) { edges.push_back(createBuffer>(*_config, edgeSegmentSize())); edgeBuff = edges.back().get(); } if (keyBuff == nullptr || keyLen > keyBuff->remainingCapacity()) { TRI_ASSERT(keyLen < stringChunkSize); edgeKeys.push_back(createBuffer(*_config, stringChunkSize)); keyBuff = edgeKeys.back().get(); } }; size_t addedEdges = 0; auto buildEdge = [&](Edge* edge, VPackStringRef toValue) { ++addedEdges; if (++(vertex._edgeCount) == 1) { vertex._edges = edge; } std::size_t pos = toValue.find('/'); VPackStringRef collectionName = toValue.substr(0, pos); VPackStringRef key = toValue.substr(pos + 1); edge->_toKey = keyBuff->end(); edge->_toKeyLength = static_cast(key.size()); TRI_ASSERT(key.size() <= std::numeric_limits::max()); keyBuff->advance(key.size()); // actually copy in the key memcpy(edge->_toKey, key.data(), key.size()); // resolve the shard of the target vertex. ShardID responsibleShard; int res = Utils::resolveShard(_config, collectionName.toString(), StaticStrings::KeyString, key, responsibleShard); if (res != TRI_ERROR_NO_ERROR) { LOG_TOPIC("b80ba", ERR, Logger::PREGEL) << "Could not resolve target shard of edge"; return res; } // PregelShard sourceShard = (PregelShard)_config->shardId(edgeShard); edge->_targetShard = (PregelShard)_config->shardId(responsibleShard); if (edge->_targetShard == (PregelShard)-1) { LOG_TOPIC("1f413", ERR, Logger::PREGEL) << "Could not resolve target shard of edge"; return TRI_ERROR_CLUSTER_BACKEND_UNAVAILABLE; } return TRI_ERROR_NO_ERROR; }; // allow for rocksdb edge index optimization if (cursor->hasExtra() && _graphFormat->estimatedEdgeSize() == 0) { auto cb = [&](LocalDocumentId const& token, VPackSlice edgeSlice) { TRI_ASSERT(edgeSlice.isString()); VPackStringRef toValue(edgeSlice); allocateSpace(toValue.size()); Edge* edge = edgeBuff->appendElement(); buildEdge(edge, toValue); }; while (cursor->nextWithExtra(cb, 1000)) { if (_destroyed) { LOG_TOPIC("29018", WARN, Logger::PREGEL) << "Aborted loading graph"; break; } } } else { auto cb = [&](LocalDocumentId const& token, VPackSlice slice) { if (slice.isExternal()) { slice = slice.resolveExternal(); } VPackStringRef toValue(transaction::helpers::extractToFromDocument(slice)); allocateSpace(toValue.size()); Edge* edge = edgeBuff->appendElement(); int res = buildEdge(edge, toValue); if (res == TRI_ERROR_NO_ERROR) { _graphFormat->copyEdgeData(slice, edge->data()); } }; while (cursor->nextDocument(cb, 1000)) { if (_destroyed) { LOG_TOPIC("191f5", WARN, Logger::PREGEL) << "Aborted loading graph"; break; } } } // Add up all added elements _localEdgeCount += addedEdges; } /// Loops over the array starting a new transaction for different shards /// Should not dead-lock unless we have to wait really long for other threads template void GraphStore::_storeVertices(std::vector const& globalShards, RangeIterator>& it) { // transaction on one shard std::unique_ptr trx; PregelShard currentShard = (PregelShard)-1; Result res = TRI_ERROR_NO_ERROR; VPackBuilder builder; size_t numDocs = 0; // loop over vertices for (; it.hasMore(); ++it) { if (it->shard() != currentShard || numDocs >= 1000) { if (trx) { res = trx->finish(res); if (!res.ok()) { THROW_ARANGO_EXCEPTION(res); } } currentShard = it->shard(); auto ctx = transaction::StandaloneContext::Create(_vocbaseGuard.database()); ShardID const& shard = globalShards[currentShard]; transaction::Options to; trx.reset(new SingleCollectionTransaction(ctx, shard, AccessMode::Type::WRITE)); trx->addHint(transaction::Hints::Hint::INTERMEDIATE_COMMITS); res = trx->begin(); if (!res.ok()) { THROW_ARANGO_EXCEPTION(res); } numDocs = 0; } VPackStringRef const key = it->key(); V const& data = it->data(); builder.clear(); // This loop will fill a buffer of vertices until we run into a new // collection // or there are no more vertices for to store (or the buffer is full) builder.openObject(); builder.add(StaticStrings::KeyString, VPackValuePair(key.data(), key.size(), VPackValueType::String)); /// bool store = _graphFormat->buildVertexDocument(builder, &data, sizeof(V)); builder.close(); ++numDocs; if (_destroyed) { LOG_TOPIC("73ec2", WARN, Logger::PREGEL) << "Storing data was canceled prematurely"; trx->abort(); trx.reset(); break; } ShardID const& shard = globalShards[currentShard]; OperationOptions options; OperationResult opRes = trx->update(shard, builder.slice(), options); if (opRes.fail() && opRes.isNot(TRI_ERROR_ARANGO_DOCUMENT_NOT_FOUND) && opRes.isNot(TRI_ERROR_ARANGO_CONFLICT)) { THROW_ARANGO_EXCEPTION(opRes.result); } if (opRes.is(TRI_ERROR_ARANGO_CONFLICT)) { LOG_TOPIC("4e632", WARN, Logger::PREGEL) << "conflict while storing " << builder.toJson(); } } if (trx) { res = trx->finish(res); if (!res.ok()) { THROW_ARANGO_EXCEPTION(res); } } } template void GraphStore::storeResults(WorkerConfig* config, std::function cb) { _config = config; double now = TRI_microtime(); TRI_ASSERT(SchedulerFeature::SCHEDULER != nullptr); size_t numSegments = _vertices.size(); if (_localVertexCount > 100000) { // We expect at least parallelism to fit in a uint32_t. _runningThreads = static_cast(std::min(_config->parallelism(), numSegments)); } else { _runningThreads = 1; } size_t numT = _runningThreads; LOG_TOPIC("f3fd9", DEBUG, Logger::PREGEL) << "Storing vertex data using " << numT << " threads"; for (size_t i = 0; i < numT; i++) { bool queued = SchedulerFeature::SCHEDULER->queue(RequestLane::INTERNAL_LOW, [=] { size_t startI = i * (numSegments / numT); size_t endI = (i + 1) * (numSegments / numT); TRI_ASSERT(endI <= numSegments); try { RangeIterator> it = vertexIterator(startI, endI); _storeVertices(_config->globalShardIDs(), it); // TODO can't just write edges with smart graphs } catch (std::exception const& e) { LOG_TOPIC("e22c8", ERR, Logger::PREGEL) << "Storing vertex data failed: '" << e.what() << "'"; } catch (...) { LOG_TOPIC("51b87", ERR, Logger::PREGEL) << "Storing vertex data failed"; } _runningThreads--; if (_runningThreads == 0) { LOG_TOPIC("b5a21", DEBUG, Logger::PREGEL) << "Storing data took " << (TRI_microtime() - now) << "s"; cb(); } }); if (!queued) { THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_QUEUE_FULL, "No thread available to queue vertex " "storage, canceling execution"); } } } template class arangodb::pregel::GraphStore; template class arangodb::pregel::GraphStore; template class arangodb::pregel::GraphStore; template class arangodb::pregel::GraphStore; // specific algo combos template class arangodb::pregel::GraphStore; template class arangodb::pregel::GraphStore; template class arangodb::pregel::GraphStore; template class arangodb::pregel::GraphStore; template class arangodb::pregel::GraphStore; template class arangodb::pregel::GraphStore;