diff --git a/Documentation/Books/Manual/Graphs/GeneralGraphs/README.mdpp b/Documentation/Books/Manual/Graphs/GeneralGraphs/README.mdpp index 5d572583de..552411e076 100644 --- a/Documentation/Books/Manual/Graphs/GeneralGraphs/README.mdpp +++ b/Documentation/Books/Manual/Graphs/GeneralGraphs/README.mdpp @@ -32,12 +32,13 @@ There is no need to include the referenced collections within the query, this mo @END_EXAMPLE_ARANGOSH_OUTPUT @endDocuBlock generalGraphCreateGraphHowTo2 -* Define relations on the +* Define relations on the Graph @startDocuBlockInline generalGraphCreateGraphHowTo3 @EXAMPLE_ARANGOSH_OUTPUT{generalGraphCreateGraphHowTo3} ~ var graph_module = require("@arangodb/general-graph"); ~ var graph = graph_module._create("myGraph"); + ~ graph._addVertexCollection("pet"); var rel = graph_module._relation("isCustomer", ["shop"], ["customer"]); graph._extendEdgeDefinitions(rel); graph; diff --git a/Documentation/Books/Manual/Graphs/SmartGraphs/README.mdpp b/Documentation/Books/Manual/Graphs/SmartGraphs/README.mdpp new file mode 100644 index 0000000000..125a567a5e --- /dev/null +++ b/Documentation/Books/Manual/Graphs/SmartGraphs/README.mdpp @@ -0,0 +1,64 @@ +!CHAPTER SmartGraphs + +__This feature is only available in the Enterprise Edition.__ + +This chapter describes the [smart-graph](../README.md) module. +It enables you to manage graphs at scale, it will give a vast performance benefit for all graphs sharded in an ArangoDB Cluster. +On a single server this feature is pointless, hence it is only available in cluster mode. +In terms of querying there is no difference between smart and general graphs. +The former are a transparent replacement for the latter. +So for querying the graph please refer to [AQL Graph Operations](../../AQL/Graphs/index.html) and [Graph Functions](GeneralGraphs/Functions.md) sections. +The optimizer is clever enough to identify if we are on a smart graph or not. + +The difference is only in the management section: creating and modifying the underlying collections of the graph. +For a detailed API reference please refer to [Smart Graph Management](SmartGraphs/Management.md). + +!SUBSUBSECTION Benefits of SmartGraphs + +The idea behind SmartGraphs is to extract domain knowledge from the graph and shard the data based on this knowledge to increase localality of the connections in the graph. +With this knowledge queries can be executed in almost identical time compared to a single-server execution. +This performance speedup is achieved as we can reduce the network overhead to a minimum with this knowledge. +However, even if the graph has no good connection locality, using a SmartGraph will still be more performant compared to a GeneralGraph. + +__TODO Add Performance Chart? Reference to a Perf. BlogPost?__ + +!SUBSUBSECTION Getting started + +First of all SmartGraphs *cannot use existing collections*, when switching to SmartGraph from an existing dataset you have to reimport the data into a fresh SmartGraph. +This switch can be easily achieved with [arangodump](../Administration/Arangodump.md) and [arangorestore](../Administration/Arangorestore.md). +The only thing you have to change in this pipeline is that you create the new collections with the SmartGraph before starting arangorestore. + +* Create a graph + In comparison to general graph we have to add more options when creating the graph. The two options `smartGraphAttribute` and `numberOfShards` are required and cannot be modifed later. + + + @startDocuBlockInline smartGraphCreateGraphHowTo1 + arangosh> var graph_module = require("@arangodb/smart-graph"); + arangosh> var graph = graph_module._create("myGraph", [], [], {smartGraphAttribute: "region", numberOfShards: 9}); + arangosh> graph; + [ SmartGraph myGraph EdgeDefinitions: [ ] VertexCollections: [ ] ] + @endDocuBlock smartGraphCreateGraphHowTo1 + + +* Add some vertex collections + This is again identical to general graph. The module will setup correct sharding for all these collections. Note: The collections have to be new. + + + @startDocuBlockInline smartGraphCreateGraphHowTo2 + arangosh> graph._addVertexCollection("shop"); + arangosh> graph._addVertexCollection("customer"); + arangosh> graph._addVertexCollection("pet"); + arangosh> graph; + [ SmartGraph myGraph EdgeDefinitions: [ ] VertexCollections: [ "shop", "customer", "pet" ] ] + @endDocuBlock smartGraphCreateGraphHowTo2 + + +* Define relations on the Graph + + + @startDocuBlockInline smartGraphCreateGraphHowTo3 + arangosh> var rel = graph_module._relation("isCustomer", ["shop"], ["customer"]); + arangosh> graph._extendEdgeDefinitions(rel); + arangosh> graph; + [ SmartGraph myGraph EdgeDefinitions: [ "isCustomer: [shop] -> [customer]" ] VertexCollections: [ "pet" ] ] + @endDocuBlock smartGraphCreateGraphHowTo3 diff --git a/Documentation/Books/Manual/SUMMARY.md b/Documentation/Books/Manual/SUMMARY.md index b4272afb84..d928fcd952 100644 --- a/Documentation/Books/Manual/SUMMARY.md +++ b/Documentation/Books/Manual/SUMMARY.md @@ -57,6 +57,7 @@ * [General Graphs](Graphs/GeneralGraphs/README.md) * [Graph Management](Graphs/GeneralGraphs/Management.md) * [Graph Functions](Graphs/GeneralGraphs/Functions.md) + * [Smart Graphs](Graphs/SmartGraphs/README.md) * [Traversals](Graphs/Traversals/README.md) * [Using Traversal Objects](Graphs/Traversals/UsingTraversalObjects.md) * [Example Data](Graphs/Traversals/ExampleData.md) diff --git a/arangod/VocBase/SingleServerTraverser.cpp b/arangod/VocBase/SingleServerTraverser.cpp index 714b832226..9c09fd4f3e 100644 --- a/arangod/VocBase/SingleServerTraverser.cpp +++ b/arangod/VocBase/SingleServerTraverser.cpp @@ -54,8 +54,13 @@ static int FetchDocumentById(arangodb::Transaction* trx, return res; } -SingleServerEdgeCursor::SingleServerEdgeCursor(size_t nrCursors) - : _cursors(), _currentCursor(0), _currentSubCursor(0), _cachePos(0) { +SingleServerEdgeCursor::SingleServerEdgeCursor( + size_t nrCursors, std::vector const* mapping) + : _cursors(), + _currentCursor(0), + _currentSubCursor(0), + _cachePos(0), + _internalCursorMapping(mapping) { _cursors.reserve(nrCursors); _cache.reserve(1000); }; @@ -68,7 +73,12 @@ bool SingleServerEdgeCursor::next(std::vector& result, _cachePos++; if (_cachePos < _cache.size()) { result.emplace_back(_cache[_cachePos]->vpack()); - cursorId = _currentCursor; + if (_internalCursorMapping != nullptr) { + TRI_ASSERT(_currentCursor < _internalCursorMapping->size()); + cursorId = _internalCursorMapping->at(_currentCursor); + } else { + cursorId = _currentCursor; + } return true; } // We need to refill the cache. @@ -108,7 +118,12 @@ bool SingleServerEdgeCursor::next(std::vector& result, } while (_cache.empty()); TRI_ASSERT(_cachePos < _cache.size()); result.emplace_back(_cache[_cachePos]->vpack()); - cursorId = _currentCursor; + if (_internalCursorMapping != nullptr) { + TRI_ASSERT(_currentCursor < _internalCursorMapping->size()); + cursorId = _internalCursorMapping->at(_currentCursor); + } else { + cursorId = _currentCursor; + } return true; } @@ -117,7 +132,12 @@ bool SingleServerEdgeCursor::readAll(std::unordered_set& result, if (_currentCursor >= _cursors.size()) { return false; } - cursorId = _currentCursor; + if (_internalCursorMapping != nullptr) { + TRI_ASSERT(_currentCursor < _internalCursorMapping->size()); + cursorId = _internalCursorMapping->at(_currentCursor); + } else { + cursorId = _currentCursor; + } auto& cursorSet = _cursors[_currentCursor]; for (auto& cursor : cursorSet) { while (cursor->hasMore()) { diff --git a/arangod/VocBase/SingleServerTraverser.h b/arangod/VocBase/SingleServerTraverser.h index ccc6a2787f..76735c2cb6 100644 --- a/arangod/VocBase/SingleServerTraverser.h +++ b/arangod/VocBase/SingleServerTraverser.h @@ -46,9 +46,11 @@ class SingleServerEdgeCursor : public EdgeCursor { size_t _currentSubCursor; std::vector _cache; size_t _cachePos; + std::vector const* _internalCursorMapping; public: - explicit SingleServerEdgeCursor(size_t); + explicit SingleServerEdgeCursor(size_t, + std::vector const* mapping = nullptr); ~SingleServerEdgeCursor() { for (auto& it : _cursors) {