From 39a3f5bc4eba12b02dac8697da58dda5379c99e2 Mon Sep 17 00:00:00 2001 From: Jan Date: Sat, 23 Mar 2019 20:36:02 +0100 Subject: [PATCH] reintroduce smart joins after temporarily reverting them in devel (#8543) --- .../Collections/DatabaseMethods.md | 16 +- Documentation/Books/Manual/SUMMARY.md | 1 + Documentation/Books/Manual/Satellites.md | 6 +- Documentation/Books/Manual/SmartJoins.md | 195 ++++++++++++ .../Rest/Collections/post_api_collection.md | 15 + arangod/Aql/Collection.cpp | 7 + arangod/Aql/Collection.h | 7 + arangod/Aql/CollectionAccessingNode.cpp | 19 +- arangod/Aql/CollectionAccessingNode.h | 15 + arangod/Aql/EngineInfoContainerDBServer.cpp | 63 ++-- arangod/Aql/ExecutionEngine.cpp | 4 +- arangod/Aql/ExecutionNode.cpp | 2 + arangod/Aql/ExecutionNode.h | 10 + arangod/Aql/IndexNode.cpp | 2 + arangod/Aql/OptimizerRule.h | 7 +- arangod/Aql/OptimizerRules.cpp | 301 +++++------------- arangod/Aql/OptimizerRules.h | 7 +- arangod/Aql/OptimizerRulesFeature.cpp | 7 +- arangod/Cluster/ClusterMethods.cpp | 39 ++- arangod/Cluster/ClusterMethods.h | 5 + arangod/Cluster/DBServerAgencySync.cpp | 2 +- arangod/MMFiles/MMFilesCollection.cpp | 8 +- arangod/RestHandler/RestCollectionHandler.cpp | 5 +- arangod/RestServer/QueryRegistryFeature.cpp | 7 + arangod/RestServer/QueryRegistryFeature.h | 2 + arangod/RocksDBEngine/RocksDBCollection.cpp | 8 +- arangod/Transaction/Methods.cpp | 13 + arangod/Transaction/Methods.h | 3 + arangod/VocBase/LogicalCollection.cpp | 58 +++- arangod/VocBase/LogicalCollection.h | 15 +- arangod/VocBase/Methods/Upgrade.cpp | 11 +- .../js/collections/arangoCollections.js | 5 + .../js/templates/modalCollectionInfo.ejs | 11 + .../APP/frontend/js/views/collectionsView.js | 20 ++ .../modules/@arangodb/arango-collection.js | 1 + .../modules/@arangodb/arango-database.js | 4 +- js/common/bootstrap/errors.js | 4 + js/common/modules/@arangodb/aql/explainer.js | 2 +- lib/Basics/StaticStrings.cpp | 1 + lib/Basics/StaticStrings.h | 1 + lib/Basics/errors.dat | 4 + lib/Basics/voc-errors.cpp | 4 + lib/Basics/voc-errors.h | 23 ++ 43 files changed, 655 insertions(+), 285 deletions(-) create mode 100644 Documentation/Books/Manual/SmartJoins.md diff --git a/Documentation/Books/Manual/DataModeling/Collections/DatabaseMethods.md b/Documentation/Books/Manual/DataModeling/Collections/DatabaseMethods.md index 8f8e05149f..4ad54622d5 100644 --- a/Documentation/Books/Manual/DataModeling/Collections/DatabaseMethods.md +++ b/Documentation/Books/Manual/DataModeling/Collections/DatabaseMethods.md @@ -164,7 +164,7 @@ to the [naming conventions](../NamingConventions/README.md). dramatically when using joins in AQL at the costs of reduced write performance on these collections. -- *distributeShardsLike* distribute the shards of this collection +- *distributeShardsLike*: distribute the shards of this collection cloning the shard distribution of another. If this value is set, it will copy the attributes *replicationFactor*, *numberOfShards* and *shardingStrategy* from the other collection. @@ -199,6 +199,20 @@ to the [naming conventions](../NamingConventions/README.md). In single-server mode, the *shardingStrategy* attribute is meaningless and will be ignored. +- *smartJoinAttribute: in an *Enterprise Edition* cluster, this attribute + determines an attribute of the collection that must contain the shard key value + of the referred-to smart join collection. Additionally, the sharding key + for a document in this collection must contain the value of this attribute, + followed by a colon, followed by the actual primary key of the document. + + This feature can only be used in the *Enterprise Edition* and requires the + *distributeShardsLike* attribute of the collection to be set to the name + of another collection. It also requires the *shardKeys* attribute of the + collection to be set to a single shard key attribute, with an additional ':' + at the end. + A further restriction is that whenever documents are stored or updated in the + collection, the value stored in the *smartJoinAttribute* must be a string. + `db._create(collection-name, properties, type)` Specifies the optional *type* of the collection, it can either be *document* diff --git a/Documentation/Books/Manual/SUMMARY.md b/Documentation/Books/Manual/SUMMARY.md index a5b408bb71..d38e563c43 100644 --- a/Documentation/Books/Manual/SUMMARY.md +++ b/Documentation/Books/Manual/SUMMARY.md @@ -243,6 +243,7 @@ * [Auth and OAuth2](Foxx/Migrating2x/Auth.md) * [Foxx Queries](Foxx/Migrating2x/Queries.md) * [Satellite Collections](Satellites.md) +* [Smart Joins](SmartJoins.md) ## OPERATIONS diff --git a/Documentation/Books/Manual/Satellites.md b/Documentation/Books/Manual/Satellites.md index f2be40bde7..b4ed15b3aa 100644 --- a/Documentation/Books/Manual/Satellites.md +++ b/Documentation/Books/Manual/Satellites.md @@ -6,10 +6,10 @@ This feature is only available in the [**Enterprise Edition**](https://www.arangodb.com/why-arangodb/arangodb-enterprise/) {% endhint %} -When doing Joins in an ArangoDB cluster data has to exchanged between different servers. +When doing joins in an ArangoDB cluster data has to be exchanged between different servers. Joins will be executed on a coordinator. It will prepare an execution plan -and execute it. When executing the coordinator will contact all shards of the +and execute it. When executing, the coordinator will contact all shards of the starting point of the join and ask for their data. The database servers carrying out this operation will load all their local data and then ask the cluster for the other part of the join. This again will be distributed to all involved shards @@ -23,7 +23,7 @@ Satellite collections are collections that are intended to address this issue. They will facilitate the synchronous replication and replicate all its data to all database servers that are part of the cluster. -This enables the database servers to execute that part of any Join locally. +This enables the database servers to execute that part of any join locally. This greatly improves performance for such joins at the costs of increased storage requirements and poorer write performance on this data. diff --git a/Documentation/Books/Manual/SmartJoins.md b/Documentation/Books/Manual/SmartJoins.md new file mode 100644 index 0000000000..e33462f0a2 --- /dev/null +++ b/Documentation/Books/Manual/SmartJoins.md @@ -0,0 +1,195 @@ +Smart Joins +=========== + +{% hint 'info' %} +This feature is only available in the +[**Enterprise Edition**](https://www.arangodb.com/why-arangodb/arangodb-enterprise/) +{% endhint %} + +When doing joins in an ArangoDB cluster data has to be exchanged between different servers. + +Joins between collections in a cluster normally require roundtrips between the shards of +the different collections for fetching the data. Requests are routed through an extra +coordinator hop. + +For example, with two collections *collection1* and *collection2* with 4 shards each, +the coordinator will initially contact the 4 shards of *collection1*. In order to perform +the join, the DBServer nodes which manage the actual data of *collection1* need to pull +the data from the other collection, *collection2*. This causes extra roundtrips via the +coordinator, which will then pull the data for *collection2* from the responsible shards: + + arangosh> db._explain("FOR doc1 IN collection1 FOR doc2 IN collection2 FILTER doc1._key == doc2._key RETURN doc1"); + + Query String: + FOR doc1 IN collection1 FOR doc2 IN collection2 FILTER doc1._key == doc2._key RETURN doc1 + + Execution plan: + Id NodeType Site Est. Comment + 1 SingletonNode DBS 1 * ROOT + 3 EnumerateCollectionNode DBS 0 - FOR doc2 IN collection2 /* full collection scan, 4 shard(s) */ + 14 RemoteNode COOR 0 - REMOTE + 15 GatherNode COOR 0 - GATHER + 8 ScatterNode COOR 0 - SCATTER + 9 RemoteNode DBS 0 - REMOTE + 7 IndexNode DBS 0 - FOR doc1 IN collection1 /* primary index scan, 4 shard(s) */ + 10 RemoteNode COOR 0 - REMOTE + 11 GatherNode COOR 0 - GATHER + 6 ReturnNode COOR 0 - RETURN doc1 + +This is the general query execution, and it makes sense if there is no further +information available about how the data is actually distributed to the individual +shards. It works in case *collection1* and *collection2* have a different amount +of shards, or use different shard keys or strategies. However, it comes with the +additional cost of having to do 4 x 4 requests to perform the join. + +Using distributeShardsLike +-------------------------- + +In the specific case that the two collections have the same number of shards and +the same shards, the data of the two collections can be co-located on the same +server for the same shard key values. In this case the extra hop via the coordinator +is not be necessary. + +The query optimizer will remove the extra hop for the join in case it can prove +that data for the two collections is co-located. There are the following requirements +for this: + +* using the cluster version of the ArangoDB *Enterprise Edition* +* using two collections with identical sharding. This requires the second collection + to be created with its *distributeShardsLike* attribute pointing to the first + collection +* using a single shard key per collection + +Here is an example setup for this, using arangosh: + + arangosh> db._create("collection1", {numberOfShards: 4, shardKeys: ["_key"]}); + arangosh> db._create("collection2", {numberOfShards: 4, shardKeys: ["_key"], distributeShardsLike: "collection1"}); + arangosh> for (i = 0; i < 100; ++i) { + db.collection1.insert({ _key: "test" + i }); + db.collection2.insert({ _key: "test" + i }); + } + +With the two collections in place like this, an AQL query that uses a FILTER condition +that refers from the shard key of the one collection to the shard key of the other collection +and compares the two shard key values by equality is eligible for the "smart-join" +optimization of the query optimizer: + + arangosh> db._explain("FOR doc1 IN collection1 FOR doc2 IN collection2 FILTER doc1._key == doc2._key RETURN doc1"); + + Query String: + FOR doc1 IN collection1 FOR doc2 IN collection2 FILTER doc1._key == doc2._key RETURN doc1 + + Execution plan: + Id NodeType Site Est. Comment + 1 SingletonNode DBS 1 * ROOT + 3 EnumerateCollectionNode DBS 0 - FOR doc2 IN collection2 /* full collection scan, 4 shard(s) */ + 7 IndexNode DBS 0 - FOR doc1 IN collection1 /* primary index scan, 4 shard(s) */ + 10 RemoteNode COOR 0 - REMOTE + 11 GatherNode COOR 0 - GATHER + 6 ReturnNode COOR 0 - RETURN doc1 + +As can be seen above, the extra hop via the coordinator is gone here, which will mean +less cluster-internal traffic and a faster response time. + + +Smart joins will also work if the shard key of the second collection is not *_key*, +and even for non-unique shard key values, e.g.: + + arangosh> db._create("collection1", {numberOfShards: 4, shardKeys: ["_key"]}); + arangosh> db._create("collection2", {numberOfShards: 4, shardKeys: ["parent"], distributeShardsLike: "collection1"}); + arangosh> db.collection2.ensureIndex({ type: "hash", fields: ["parent"] }); + arangosh> for (i = 0; i < 100; ++i) { + db.collection1.insert({ _key: "test" + i }); + for (j = 0; j < 10; ++j) { + db.collection2.insert({ parent: "test" + i }); + } + } + + arangosh> db._explain("FOR doc1 IN collection1 FOR doc2 IN collection2 FILTER doc1._key == doc2.parent RETURN doc1"); + + Query String: + FOR doc1 IN collection1 FOR doc2 IN collection2 FILTER doc1._key == doc2.parent RETURN doc1 + + Execution plan: + Id NodeType Site Est. Comment + 1 SingletonNode DBS 1 * ROOT + 3 EnumerateCollectionNode DBS 2000 - FOR doc2 IN collection2 /* full collection scan, 4 shard(s) */ + 7 IndexNode DBS 2000 - FOR doc1 IN collection1 /* primary index scan, 4 shard(s) */ + 10 RemoteNode COOR 2000 - REMOTE + 11 GatherNode COOR 2000 - GATHER + 6 ReturnNode COOR 2000 - RETURN doc1 + + +Using smartJoinAttribute +------------------------ + +In case the join on the second collection must be performed on a non-shard key +attribute, there is the option to specify a *smartJoinAttribute* for the collection. +Note that for this case, setting *distributeShardsLike* is still required here, and that that +only a single *shardKeys* attribute can be used. +The single attribute name specified in the *shardKeys* attribute for the collection must end +with a colon character then. + +This *smartJoinAttribute* must be populated for all documents in the collection, +and must always contain a string value. The value of the *_key* attribute for each +document must consist of the value of the *smartJoinAttribute*, a colon character +and then some other user-defined key component. + +The setup thus becomes: + + arangosh> db._create("collection1", {numberOfShards: 4, shardKeys: ["_key"]}); + arangosh> db._create("collection2", {numberOfShards: 4, shardKeys: ["_key:"], smartJoinAttribute: "parent", distributeShardsLike: "collection1"}); + arangosh> db.collection2.ensureIndex({ type: "hash", fields: ["parent"] }); + arangosh> for (i = 0; i < 100; ++i) { + db.collection1.insert({ _key: "test" + i }); + db.collection2.insert({ _key: "test" + i + ":" + "ownKey" + i, parent: "test" + i }); + } + +Failure to populate the *smartGraphAttribute* with a string or not at all will lead +to a document being rejected on insert, update or replace. Similarly, failure to +prefix a document's *_key* attribute value with the value of the *smartGraphAttribute* +will also lead to the document being rejected: + + arangosh> db.collection2.insert({ parent: 123 }); + JavaScript exception in file './js/client/modules/@arangodb/arangosh.js' at 99,7: ArangoError 4008: smart join attribute not given or invalid + + arangosh> db.collection2.insert({ _key: "123:test1", parent: "124" }); + JavaScript exception in file './js/client/modules/@arangodb/arangosh.js' at 99,7: ArangoError 4007: shard key value must be prefixed with the value of the smart join attribute + +The join can now be performed via the collection's *smartJoinAttribute*: + + arangosh> db._explain("FOR doc1 IN collection1 FOR doc2 IN collection2 FILTER doc1._key == doc2.parent RETURN doc1") + + Query String: + FOR doc1 IN collection1 FOR doc2 IN collection2 FILTER doc1._key == doc2.parent RETURN doc1 + + Execution plan: + Id NodeType Site Est. Comment + 1 SingletonNode DBS 1 * ROOT + 3 EnumerateCollectionNode DBS 101 - FOR doc2 IN collection2 /* full collection scan, 4 shard(s) */ + 7 IndexNode DBS 101 - FOR doc1 IN collection1 /* primary index scan, 4 shard(s) */ + 10 RemoteNode COOR 101 - REMOTE + 11 GatherNode COOR 101 - GATHER + 6 ReturnNode COOR 101 - RETURN doc1 + + +Restricting smart joins to a single shard +----------------------------------------- + +If a FILTER condition is used on one of the shard keys, the optimizer will also try +to restrict the queries to just the required shards: + +arangosh> db._explain("FOR doc1 IN collection1 FOR doc2 IN collection2 FILTER doc1._key == 'test' && doc1._key == doc2.value RETURN doc1"); + + Query String: + FOR doc1 IN collection1 FOR doc2 IN collection2 FILTER doc1._key == 'test' && doc1._key == doc2.value + RETURN doc1 + + Execution plan: + Id NodeType Site Est. Comment + 1 SingletonNode DBS 1 * ROOT + 8 IndexNode DBS 1 - FOR doc1 IN collection1 /* primary index scan, shard: s2010246 */ + 7 IndexNode DBS 1 - FOR doc2 IN collection2 /* primary index scan, scan only, shard: s2010253 */ + 12 RemoteNode COOR 1 - REMOTE + 13 GatherNode COOR 1 - GATHER + 6 ReturnNode COOR 1 - RETURN doc1 diff --git a/Documentation/DocuBlocks/Rest/Collections/post_api_collection.md b/Documentation/DocuBlocks/Rest/Collections/post_api_collection.md index 72d1daeb69..9dd6a2a474 100644 --- a/Documentation/DocuBlocks/Rest/Collections/post_api_collection.md +++ b/Documentation/DocuBlocks/Rest/Collections/post_api_collection.md @@ -168,6 +168,21 @@ collections (requires the *Enterprise Edition* of ArangoDB). Manually overriding the sharding strategy does not yet provide a benefit, but it may later in case other sharding strategies are added. +@RESTBODYPARAM{smartJoinAttribute,string,optional,string} +In an *Enterprise Edition* cluster, this attribute determines an attribute +of the collection that must contain the shard key value of the referred-to +smart join collection. Additionally, the shard key for a document in this +collection must contain the value of this attribute, followed by a colon, +followed by the actual primary key of the document. + +This feature can only be used in the *Enterprise Edition* and requires the +*distributeShardsLike* attribute of the collection to be set to the name +of another collection. It also requires the *shardKeys* attribute of the +collection to be set to a single shard key attribute, with an additional ':' +at the end. +A further restriction is that whenever documents are stored or updated in the +collection, the value stored in the *smartJoinAttribute* must be a string. + @RESTQUERYPARAMETERS @RESTQUERYPARAM{waitForSyncReplication,integer,optional} diff --git a/arangod/Aql/Collection.cpp b/arangod/Aql/Collection.cpp index c6c47b5934..c38ea17244 100644 --- a/arangod/Aql/Collection.cpp +++ b/arangod/Aql/Collection.cpp @@ -58,6 +58,9 @@ void Collection::setExclusiveAccess() { /// @brief get the collection id TRI_voc_cid_t Collection::id() const { return getCollection()->id(); } +/// @brief collection type +TRI_col_type_e Collection::type() const { return getCollection()->type(); } + /// @brief count the number of documents in the collection size_t Collection::count(transaction::Methods* trx) const { // estimate for the number of documents in the collection. may be outdated... @@ -186,3 +189,7 @@ bool Collection::isSmart() const { return getCollection()->isSmart(); } /// @brief check if collection is a satellite collection bool Collection::isSatellite() const { return getCollection()->isSatellite(); } + +/// @brief return the name of the smart join attribute (empty string +/// if no smart join attribute is present) +std::string const& Collection::smartJoinAttribute() const { return getCollection()->smartJoinAttribute(); } diff --git a/arangod/Aql/Collection.h b/arangod/Aql/Collection.h index e4766be986..2ca6e5a3dc 100644 --- a/arangod/Aql/Collection.h +++ b/arangod/Aql/Collection.h @@ -76,6 +76,9 @@ struct Collection { // non-sharding case: simply return the name return _name; } + + /// @brief collection type + TRI_col_type_e type() const; /// @brief count the number of documents in the collection size_t count(transaction::Methods* trx) const; @@ -119,6 +122,10 @@ struct Collection { /// @brief check if collection is a satellite collection bool isSatellite() const; + /// @brief return the name of the smart join attribute (empty string + /// if no smart join attribute is present) + std::string const& smartJoinAttribute() const; + private: arangodb::LogicalCollection* _collection; diff --git a/arangod/Aql/CollectionAccessingNode.cpp b/arangod/Aql/CollectionAccessingNode.cpp index 6233f33a34..2a289cd7ad 100644 --- a/arangod/Aql/CollectionAccessingNode.cpp +++ b/arangod/Aql/CollectionAccessingNode.cpp @@ -32,20 +32,30 @@ #include "VocBase/LogicalCollection.h" #include "VocBase/vocbase.h" +#include #include using namespace arangodb; using namespace arangodb::aql; CollectionAccessingNode::CollectionAccessingNode(aql::Collection const* collection) - : _collection(collection) { + : _collection(collection), + _prototypeCollection(nullptr), + _prototypeOutVariable(nullptr) { TRI_ASSERT(_collection != nullptr); } CollectionAccessingNode::CollectionAccessingNode(ExecutionPlan* plan, arangodb::velocypack::Slice slice) : _collection(plan->getAst()->query()->collections()->get( - slice.get("collection").copyString())) { + slice.get("collection").copyString())), + _prototypeCollection(nullptr), + _prototypeOutVariable(nullptr) { + + if (slice.get("prototype").isString()) { + _prototypeCollection = plan->getAst()->query()->collections()->get(slice.get("prototype").copyString()); + } + TRI_ASSERT(_collection != nullptr); if (_collection == nullptr) { @@ -76,8 +86,11 @@ void CollectionAccessingNode::collection(aql::Collection const* collection) { void CollectionAccessingNode::toVelocyPack(arangodb::velocypack::Builder& builder) const { builder.add("database", VPackValue(_collection->vocbase()->name())); builder.add("collection", VPackValue(_collection->name())); + if (_prototypeCollection != nullptr) { + builder.add("prototype", VPackValue(_prototypeCollection->name())); + } builder.add("satellite", VPackValue(_collection->isSatellite())); - + if (ServerState::instance()->isCoordinator()) { builder.add(StaticStrings::NumberOfShards, VPackValue(_collection->numberOfShards())); } diff --git a/arangod/Aql/CollectionAccessingNode.h b/arangod/Aql/CollectionAccessingNode.h index 630473cc63..3209a50161 100644 --- a/arangod/Aql/CollectionAccessingNode.h +++ b/arangod/Aql/CollectionAccessingNode.h @@ -35,6 +35,7 @@ namespace arangodb { namespace aql { struct Collection; class ExecutionPlan; +struct Variable; class CollectionAccessingNode { public: @@ -78,11 +79,25 @@ class CollectionAccessingNode { */ std::string const& restrictedShard() const { return _restrictedTo; } + /// @brief set the prototype collection when using distributeShardsLike + void setPrototype(arangodb::aql::Collection const* prototypeCollection, + arangodb::aql::Variable const* prototypeOutVariable) { + _prototypeCollection = prototypeCollection; + _prototypeOutVariable = prototypeOutVariable; + } + + aql::Collection const* prototypeCollection() const { return _prototypeCollection; } + aql::Variable const* prototypeOutVariable() const { return _prototypeOutVariable; } + protected: aql::Collection const* _collection; /// @brief A shard this node is restricted to, may be empty std::string _restrictedTo; + + /// @brief prototype collection when using distributeShardsLike + aql::Collection const* _prototypeCollection; + aql::Variable const* _prototypeOutVariable; }; } // namespace aql diff --git a/arangod/Aql/EngineInfoContainerDBServer.cpp b/arangod/Aql/EngineInfoContainerDBServer.cpp index 1de283ac68..b7be239a06 100644 --- a/arangod/Aql/EngineInfoContainerDBServer.cpp +++ b/arangod/Aql/EngineInfoContainerDBServer.cpp @@ -293,7 +293,7 @@ void EngineInfoContainerDBServer::EngineInfo::serializeSnippet( viewNode->shards() = shards; } else #endif - if (ExecutionNode::REMOTE == nodeType) { + if (ExecutionNode::REMOTE == nodeType) { auto rem = ExecutionNode::castTo(clone); // update the remote node with the information about the query rem->server("server:" + arangodb::ServerState::instance()->getId()); @@ -319,7 +319,7 @@ void EngineInfoContainerDBServer::EngineInfo::serializeSnippet( plan.root(previous); plan.setVarUsageComputed(); - // Always Verbose + // Always verbose const unsigned flags = ExecutionNode::SERIALIZE_DETAILS; plan.root()->toVelocyPack(infoBuilder, flags, /*keepTopLevelOpen*/ false); } @@ -348,6 +348,8 @@ void EngineInfoContainerDBServer::EngineInfo::serializeSnippet( // this clone does the translation collection => shardId implicitly // at the relevant parts of the query. + std::unordered_set cleanup; + cleanup.emplace(collection->collection); collection->collection->setCurrentShard(id); ExecutionPlan plan(query.ast()); @@ -361,6 +363,26 @@ void EngineInfoContainerDBServer::EngineInfo::serializeSnippet( // we need to count nodes by type ourselves, as we will set the // "varUsageComputed" flag below (which will handle the counting) plan.increaseCounter(nodeType); + + if (nodeType == ExecutionNode::INDEX || nodeType == ExecutionNode::ENUMERATE_COLLECTION) { + auto x = dynamic_cast(clone); + auto const* prototype = x->prototypeCollection(); + if (prototype != nullptr) { + auto s1 = prototype->shardIds(); + auto s2 = x->collection()->shardIds(); + if (s1->size() == s2->size()) { + for (size_t i = 0; i < s1->size(); ++i) { + if ((*s1)[i] == id) { + // inject shard id into collection + auto collection = const_cast(x->collection()); + collection->setCurrentShard((*s2)[i]); + cleanup.emplace(collection); + break; + } + } + } + } + } if (ExecutionNode::REMOTE == nodeType) { auto rem = ExecutionNode::castTo(clone); @@ -390,7 +412,11 @@ void EngineInfoContainerDBServer::EngineInfo::serializeSnippet( plan.setVarUsageComputed(); const unsigned flags = ExecutionNode::SERIALIZE_DETAILS; plan.root()->toVelocyPack(infoBuilder, flags, /*keepTopLevelOpen*/ false); - collection->collection->resetCurrentShard(); + + // remove shard id hack for all participating collections + for (auto& it : cleanup) { + it->resetCurrentShard(); + } } void EngineInfoContainerDBServer::CollectionInfo::mergeShards( @@ -408,30 +434,19 @@ void EngineInfoContainerDBServer::addNode(ExecutionNode* node) { TRI_ASSERT(!_engineStack.empty()); _engineStack.top()->addNode(node); switch (node->getType()) { - case ExecutionNode::ENUMERATE_COLLECTION: { - auto* scatter = findFirstScatter(*node); - auto const& colNode = *ExecutionNode::castTo(node); - auto const* col = colNode.collection(); - - std::unordered_set restrictedShard; - if (colNode.isRestricted()) { - restrictedShard.emplace(colNode.restrictedShard()); - } - - handleCollection(col, AccessMode::Type::READ, scatter, restrictedShard); - updateCollection(col); - break; - } + case ExecutionNode::ENUMERATE_COLLECTION: case ExecutionNode::INDEX: { auto* scatter = findFirstScatter(*node); - auto const& idxNode = *ExecutionNode::castTo(node); - auto const* col = idxNode.collection(); - + auto const* colNode = dynamic_cast(node); + if (colNode == nullptr) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INTERNAL, "unable to cast node to CollectionAccessingNode"); + } std::unordered_set restrictedShard; - if (idxNode.isRestricted()) { - restrictedShard.emplace(idxNode.restrictedShard()); + if (colNode->isRestricted()) { + restrictedShard.emplace(colNode->restrictedShard()); } + auto const* col = colNode->collection(); handleCollection(col, AccessMode::Type::READ, scatter, restrictedShard); updateCollection(col); break; @@ -506,8 +521,8 @@ void EngineInfoContainerDBServer::closeSnippet(QueryId coordinatorEngineId) { if (it == _collectionInfos.end()) { THROW_ARANGO_EXCEPTION_MESSAGE( TRI_ERROR_INTERNAL, - "Created a DBServer QuerySnippet without a Collection. This should " - "not happen. Please report this query to ArangoDB"); + "created a DBServer QuerySnippet without a collection. This should " + "not happen"); } it->second.engines.emplace_back(std::move(e)); } diff --git a/arangod/Aql/ExecutionEngine.cpp b/arangod/Aql/ExecutionEngine.cpp index 94afcff6e2..3ba7ab0a87 100644 --- a/arangod/Aql/ExecutionEngine.cpp +++ b/arangod/Aql/ExecutionEngine.cpp @@ -125,7 +125,7 @@ Result ExecutionEngine::createBlocks(std::vector const& nodes, if (nodeType == ExecutionNode::GATHER) { // we found a gather node if (remoteNode == nullptr) { - return {TRI_ERROR_INTERNAL, "expecting a remoteNode"}; + return {TRI_ERROR_INTERNAL, "expecting a RemoteNode"}; } // now we'll create a remote node for each shard and add it to the @@ -136,7 +136,7 @@ Result ExecutionEngine::createBlocks(std::vector const& nodes, TRI_ASSERT(serversForRemote != queryIds.end()); if (serversForRemote == queryIds.end()) { return {TRI_ERROR_INTERNAL, - "Did not find a DBServer to contact for RemoteNode."}; + "Did not find a DBServer to contact for RemoteNode"}; } // use "server:" instead of "shard:" to send query fragments to diff --git a/arangod/Aql/ExecutionNode.cpp b/arangod/Aql/ExecutionNode.cpp index 123f58e83f..dcea9f356b 100644 --- a/arangod/Aql/ExecutionNode.cpp +++ b/arangod/Aql/ExecutionNode.cpp @@ -1331,6 +1331,8 @@ ExecutionNode* EnumerateCollectionNode::clone(ExecutionPlan* plan, bool withDepe outVariable, _random, _hint); c->projections(_projections); + c->_prototypeCollection = _prototypeCollection; + c->_prototypeOutVariable = _prototypeOutVariable; return cloneHelper(std::move(c), withDependencies, withProperties); } diff --git a/arangod/Aql/ExecutionNode.h b/arangod/Aql/ExecutionNode.h index 0a766489de..347dec968a 100644 --- a/arangod/Aql/ExecutionNode.h +++ b/arangod/Aql/ExecutionNode.h @@ -262,6 +262,16 @@ class ExecutionNode { } } + /// @brief get the singleton node of the node + ExecutionNode const* getSingleton() const { + auto node = this; + do { + node = node->getFirstDependency(); + } while (node != nullptr && node->getType() != SINGLETON); + + return node; + } + /// @brief get the node and its dependencies as a vector void getDependencyChain(std::vector& result, bool includeSelf) { auto current = this; diff --git a/arangod/Aql/IndexNode.cpp b/arangod/Aql/IndexNode.cpp index 2df04e401c..a8ba5d386d 100644 --- a/arangod/Aql/IndexNode.cpp +++ b/arangod/Aql/IndexNode.cpp @@ -410,6 +410,8 @@ ExecutionNode* IndexNode::clone(ExecutionPlan* plan, bool withDependencies, c->projections(_projections); c->needsGatherNodeSort(_needsGatherNodeSort); c->initIndexCoversProjections(); + c->_prototypeCollection = _prototypeCollection; + c->_prototypeOutVariable = _prototypeOutVariable; return cloneHelper(std::move(c), withDependencies, withProperties); } diff --git a/arangod/Aql/OptimizerRule.h b/arangod/Aql/OptimizerRule.h index d1f4296a5f..ce819b3a27 100644 --- a/arangod/Aql/OptimizerRule.h +++ b/arangod/Aql/OptimizerRule.h @@ -203,9 +203,10 @@ struct OptimizerRule { // make operations on sharded collections use distribute distributeInClusterRule, - - // try to find candidates for shard-local joins in the cluster - optimizeClusterJoinsRule, + +#ifdef USE_ENTERPRISE + smartJoinsRule, +#endif // make operations on sharded collections use scatter / gather / remote scatterInClusterRule, diff --git a/arangod/Aql/OptimizerRules.cpp b/arangod/Aql/OptimizerRules.cpp index 4613922e4e..1a386eb161 100644 --- a/arangod/Aql/OptimizerRules.cpp +++ b/arangod/Aql/OptimizerRules.cpp @@ -542,15 +542,6 @@ std::vector const patchUpdateRemoveState arangodb::aql::ExecutionNode::UPDATE, arangodb::aql::ExecutionNode::REPLACE, arangodb::aql::ExecutionNode::REMOVE}; -int indexOf(std::vector const& haystack, std::string const& needle) { - for (size_t i = 0; i < haystack.size(); ++i) { - if (haystack[i] == needle) { - return static_cast(i); - } - } - return -1; -} - /// @brief find the single shard id for the node to restrict an operation to /// this will check the conditions of an IndexNode or a data-modification node /// (excluding UPSERT) and check if all shard keys are used in it. If all @@ -1271,19 +1262,20 @@ void arangodb::aql::removeCollectVariablesRule(Optimizer* opt, class PropagateConstantAttributesHelper { public: - PropagateConstantAttributesHelper() : _constants(), _modified(false) {} + explicit PropagateConstantAttributesHelper(ExecutionPlan* plan) + : _plan(plan), _modified(false) {} bool modified() const { return _modified; } /// @brief inspects a plan and propages constant values in expressions - void propagateConstants(ExecutionPlan* plan) { + void propagateConstants() { SmallVector::allocator_type::arena_type a; SmallVector nodes{a}; - plan->findNodesOfType(nodes, EN::FILTER, true); + _plan->findNodesOfType(nodes, EN::FILTER, true); for (auto const& node : nodes) { auto fn = ExecutionNode::castTo(node); - auto setter = plan->getVarSetBy(fn->inVariable()->id); + auto setter = _plan->getVarSetBy(fn->inVariable()->id); if (setter != nullptr && setter->getType() == EN::CALCULATION) { auto cn = ExecutionNode::castTo(setter); auto expression = cn->expression(); @@ -1297,7 +1289,7 @@ class PropagateConstantAttributesHelper { if (!_constants.empty()) { for (auto const& node : nodes) { auto fn = ExecutionNode::castTo(node); - auto setter = plan->getVarSetBy(fn->inVariable()->id); + auto setter = _plan->getVarSetBy(fn->inVariable()->id); if (setter != nullptr && setter->getType() == EN::CALCULATION) { auto cn = ExecutionNode::castTo(setter); auto expression = cn->expression(); @@ -1444,21 +1436,56 @@ class PropagateConstantAttributesHelper { void insertConstantAttribute(AstNode* parentNode, size_t accessIndex) { Variable const* variable = nullptr; std::string name; + + AstNode* member = parentNode->getMember(accessIndex); - if (!getAttribute(parentNode->getMember(accessIndex), variable, name)) { + if (!getAttribute(member, variable, name)) { return; } auto constantValue = getConstant(variable, name); if (constantValue != nullptr) { + // first check if we would optimize away a join condition that uses a smartJoinAttribute... + // we must not do that, because that would otherwise disable smart join functionality + if (arangodb::ServerState::instance()->isCoordinator() && + parentNode->type == NODE_TYPE_OPERATOR_BINARY_EQ) { + AstNode const* current = parentNode->getMember(accessIndex == 0 ? 1 : 0); + if (current->type == NODE_TYPE_ATTRIBUTE_ACCESS) { + AstNode const* nameAttribute = current; + current = current->getMember(0); + if (current->type == NODE_TYPE_REFERENCE) { + auto setter = _plan->getVarSetBy(static_cast(current->getData())->id); + if (setter != nullptr && + (setter->getType() == EN::ENUMERATE_COLLECTION || setter->getType() == EN::INDEX)) { + auto collection = ::getCollection(setter); + if (collection != nullptr) { + auto logical = collection->getCollection(); + if (logical->hasSmartJoinAttribute() && + logical->smartJoinAttribute() == nameAttribute->getString()) { + // don't remove a smart join attribute access! + return; + } else { + std::vector const& shardKeys = logical->shardKeys(); + if (std::find(shardKeys.begin(), shardKeys.end(), nameAttribute->getString()) != shardKeys.end()) { + // don't remove equality lookups on shard keys, as this may prevent + // the restrict-to-single-shard rule from being applied later! + return; + } + } + } + } + } + } + } + parentNode->changeMember(accessIndex, const_cast(constantValue)); _modified = true; } } + ExecutionPlan* _plan; std::unordered_map> _constants; - bool _modified; }; @@ -1466,8 +1493,8 @@ class PropagateConstantAttributesHelper { void arangodb::aql::propagateConstantAttributesRule(Optimizer* opt, std::unique_ptr plan, OptimizerRule const* rule) { - PropagateConstantAttributesHelper helper; - helper.propagateConstants(plan.get()); + PropagateConstantAttributesHelper helper(plan.get()); + helper.propagateConstants(); opt->addPlan(std::move(plan), rule, helper.modified()); } @@ -3503,7 +3530,7 @@ void arangodb::aql::optimizeClusterSingleShardRule(Optimizer* opt, if (!nodes.empty() && !hasIncompatibleNodes) { // turn off all other cluster optimization rules now as they are superfluous - opt->disableRule(OptimizerRule::optimizeClusterJoinsRule); + //opt->disableRule(OptimizerRule::optimizeClusterJoinsRule); opt->disableRule(OptimizerRule::distributeInClusterRule); opt->disableRule(OptimizerRule::scatterInClusterRule); opt->disableRule(OptimizerRule::distributeFilternCalcToClusterRule); @@ -3542,205 +3569,6 @@ void arangodb::aql::optimizeClusterSingleShardRule(Optimizer* opt, opt->addPlan(std::move(plan), rule, wasModified); } -void arangodb::aql::optimizeClusterJoinsRule(Optimizer* opt, - std::unique_ptr plan, - OptimizerRule const* rule) { - TRI_ASSERT(arangodb::ServerState::instance()->isCoordinator()); - bool wasModified = false; - - SmallVector::allocator_type::arena_type s; - SmallVector nodes{s}; - std::vector const types = {ExecutionNode::ENUMERATE_COLLECTION, - ExecutionNode::INDEX}; - plan->findNodesOfType(nodes, types, true); - - for (auto& n : nodes) { - ExecutionNode* current = n->getFirstDependency(); - while (current != nullptr) { - if (current->getType() == ExecutionNode::ENUMERATE_COLLECTION || - current->getType() == ExecutionNode::INDEX) { - Collection const* c1 = ::getCollection(n); - Collection const* c2 = ::getCollection(current); - - bool qualifies = false; - - // check how many (different) responsible servers we have for this - // collection - std::unordered_set responsibleServers; - size_t n1 = c1->responsibleServers(responsibleServers); - size_t n2 = c2->responsibleServers(responsibleServers); - - if (responsibleServers.size() == 1 && c1->numberOfShards() == 1 && - c2->numberOfShards() == 1) { - // a single responsible server. so we can use a shard-local access - qualifies = true; - } else if ((c1->isSatellite() && (c2->numberOfShards() == 1 || n2 == 1)) || - (c2->isSatellite() && (c1->numberOfShards() == 1 || n1 == 1))) { - // a satellite collection and another collection with a single shard - // or single responsible server - qualifies = true; - } - - if (!qualifies && n->getType() == EN::INDEX) { - Variable const* indexVariable = ::getOutVariable(n); - Variable const* otherVariable = ::getOutVariable(current); - - std::string dist1 = c1->distributeShardsLike(); - std::string dist2 = c2->distributeShardsLike(); - - // convert cluster collection names into proper collection names - if (!dist1.empty()) { - auto trx = plan->getAst()->query()->trx(); - dist1 = trx->resolver()->getCollectionNameCluster( - static_cast(basics::StringUtils::uint64(dist1))); - } - if (!dist2.empty()) { - auto trx = plan->getAst()->query()->trx(); - dist2 = trx->resolver()->getCollectionNameCluster( - static_cast(basics::StringUtils::uint64(dist2))); - } - - if (dist1 == c2->name() || dist2 == c1->name() || - (!dist1.empty() && dist1 == dist2)) { - // collections have the same "distributeShardsLike" values - // so their shards are distributed to the same servers for the - // same shardKey values - // now check if the number of shardKeys match - auto keys1 = c1->shardKeys(); - auto keys2 = c2->shardKeys(); - - if (keys1.size() == keys2.size()) { - // same number of shard keys... now check if the shard keys are - // all used and whether we only have equality joins - Condition const* condition = - ExecutionNode::castTo(n)->condition(); - - if (condition != nullptr) { - AstNode const* root = condition->root(); - - if (root != nullptr && root->type == NODE_TYPE_OPERATOR_NARY_OR) { - size_t found = 0; - size_t numAnds = root->numMembers(); - - for (size_t i = 0; i < numAnds; ++i) { - AstNode const* andNode = root->getMember(i); - - if (andNode == nullptr) { - continue; - } - - TRI_ASSERT(andNode->type == NODE_TYPE_OPERATOR_NARY_AND); - - std::unordered_set shardKeysFound; - size_t numConds = andNode->numMembers(); - - if (numConds < keys1.size()) { - // too few join conditions, so we will definitely not - // cover all shardKeys - break; - } - - for (size_t j = 0; j < numConds; ++j) { - AstNode const* condNode = andNode->getMember(j); - - if (condNode == nullptr || condNode->type != NODE_TYPE_OPERATOR_BINARY_EQ) { - // something other than an equality join. we do not - // support this - continue; - } - - // equality comparison - // now check if this comparison has the pattern - // . == - // . - - auto const* lhs = condNode->getMember(0); - auto const* rhs = condNode->getMember(1); - - if (lhs->type != NODE_TYPE_ATTRIBUTE_ACCESS || - rhs->type != NODE_TYPE_ATTRIBUTE_ACCESS) { - // something else - continue; - } - - AstNode const* lhsData = lhs->getMember(0); - AstNode const* rhsData = rhs->getMember(0); - - if (lhsData->type != NODE_TYPE_REFERENCE || - rhsData->type != NODE_TYPE_REFERENCE) { - // something else - continue; - } - - Variable const* lhsVar = - static_cast(lhsData->getData()); - Variable const* rhsVar = - static_cast(rhsData->getData()); - - std::string leftString = lhs->getString(); - std::string rightString = rhs->getString(); - - int pos = -1; - if (lhsVar == indexVariable && rhsVar == otherVariable && - indexOf(keys1, leftString) == indexOf(keys2, rightString)) { - pos = indexOf(keys1, leftString); - // indexedCollection.shardKeyAttribute == - // otherCollection.shardKeyAttribute - } else if (lhsVar == otherVariable && rhsVar == indexVariable && - indexOf(keys2, leftString) == indexOf(keys1, rightString)) { - // otherCollection.shardKeyAttribute == - // indexedCollection.shardKeyAttribute - pos = indexOf(keys2, leftString); - } - - // we found a shardKeys match - if (pos != -1) { - shardKeysFound.emplace(pos); - } - } - - // conditions match - if (shardKeysFound.size() >= keys1.size()) { - // all shard keys covered - ++found; - } else { - // not all shard keys covered - break; - } - } - - qualifies = (found > 0 && found == numAnds); - } - } - } - } - } - // everything else does not qualify - - if (qualifies) { - wasModified = true; - - plan->excludeFromScatterGather(current); - break; // done for this pair - } - - } else if (current->getType() != ExecutionNode::FILTER && - current->getType() != ExecutionNode::CALCULATION && - current->getType() != ExecutionNode::LIMIT) { - // we allow just these nodes in between and ignore them - // we need to stop for all other types of nodes - break; - } - - current = current->getFirstDependency(); - } - } - - opt->addPlan(std::move(plan), rule, wasModified); -} - /// @brief scatter operations in cluster /// this rule inserts scatter, gather and remote nodes so operations on sharded /// collections actually work @@ -4808,6 +4636,41 @@ void arangodb::aql::restrictToSingleShardRule(Optimizer* opt, arangodb::HashSet toUnlink; std::map> modificationRestrictions; + + // forward a shard key restriction from one collection to the other if the two collections + // are used in a smart join (and use distributeShardsLike on each other) + auto forwardRestrictionToPrototype = [&plan](ExecutionNode const* current, std::string const& shardId) { + auto collectionNode = dynamic_cast(current); + if (collectionNode == nullptr) { + return; + } + auto prototypeOutVariable = collectionNode->prototypeOutVariable(); + if (prototypeOutVariable == nullptr) { + return; + } + + auto setter = plan->getVarSetBy(prototypeOutVariable->id); + if (setter == nullptr || + (setter->getType() != EN::INDEX && setter->getType() != EN::ENUMERATE_COLLECTION)) { + return; + } + + auto s1 = ::getCollection(current)->shardIds(); + auto s2 = ::getCollection(setter)->shardIds(); + + if (s1->size() != s2->size()) { + // different number of shard ids... should not happen if we have a prototype + return; + } + + // find matching shard key + for (size_t i = 0; i < s1->size(); ++i) { + if ((*s1)[i] == shardId) { + ::restrictToShard(setter, (*s2)[i]); + break; + } + } + }; for (auto& node : nodes) { TRI_ASSERT(node->getType() == ExecutionNode::REMOTE); @@ -4882,12 +4745,14 @@ void arangodb::aql::restrictToSingleShardRule(Optimizer* opt, if (finder.isSafeForOptimization(collectionVariable) && !shardId.empty()) { wasModified = true; ::restrictToShard(current, shardId); + forwardRestrictionToPrototype(current, shardId); } else if (finder.isSafeForOptimization(collection)) { auto& shards = modificationRestrictions[collection]; if (shards.size() == 1) { wasModified = true; shardId = *shards.begin(); ::restrictToShard(current, shardId); + forwardRestrictionToPrototype(current, shardId); } } } else if (currentType == ExecutionNode::UPSERT || currentType == ExecutionNode::REMOTE || diff --git a/arangod/Aql/OptimizerRules.h b/arangod/Aql/OptimizerRules.h index 3fd67db31f..26e6fda8a1 100644 --- a/arangod/Aql/OptimizerRules.h +++ b/arangod/Aql/OptimizerRules.h @@ -129,10 +129,6 @@ void substituteClusterSingleDocumentOperations(Optimizer* opt, void optimizeClusterSingleShardRule(Optimizer*, std::unique_ptr, OptimizerRule const*); -/// @brief try to find candidates for shard-local joins in the cluster -void optimizeClusterJoinsRule(Optimizer*, std::unique_ptr, - OptimizerRule const*); - /// @brief scatter operations in cluster - send all incoming rows to all remote /// clients void scatterInClusterRule(Optimizer*, std::unique_ptr, OptimizerRule const*); @@ -156,6 +152,9 @@ ExecutionNode* distributeInClusterRuleSmartEdgeCollection(ExecutionPlan*, Subque /// @brief remove scatter/gather and remote nodes for satellite collections void removeSatelliteJoinsRule(Optimizer*, std::unique_ptr, OptimizerRule const*); + +void smartJoinsRule(Optimizer*, std::unique_ptr, + OptimizerRule const*); #endif /// @brief try to restrict fragments to a single shard if possible diff --git a/arangod/Aql/OptimizerRulesFeature.cpp b/arangod/Aql/OptimizerRulesFeature.cpp index a82d270c30..405bae2a50 100644 --- a/arangod/Aql/OptimizerRulesFeature.cpp +++ b/arangod/Aql/OptimizerRulesFeature.cpp @@ -299,9 +299,6 @@ void OptimizerRulesFeature::addRules() { #if 0 registerRule("optimize-cluster-single-shard", optimizeClusterSingleShardRule, OptimizerRule::optimizeClusterSingleShardRule, DoesNotCreateAdditionalPlans, CanBeDisabled); - - registerRule("optimize-cluster-joins", optimizeClusterJoinsRule, - OptimizerRule::optimizeClusterJoinsRule, DoesNotCreateAdditionalPlans, CanBeDisabled); #endif // distribute operations in cluster @@ -336,6 +333,10 @@ void OptimizerRulesFeature::addRules() { registerRule("remove-satellite-joins", removeSatelliteJoinsRule, OptimizerRule::removeSatelliteJoinsRule, DoesNotCreateAdditionalPlans, CanBeDisabled); + + registerRule("smart-joins", smartJoinsRule, + OptimizerRule::smartJoinsRule, + DoesNotCreateAdditionalPlans, CanBeDisabled); #endif #ifdef USE_IRESEARCH diff --git a/arangod/Cluster/ClusterMethods.cpp b/arangod/Cluster/ClusterMethods.cpp index b0da29927c..b34c0c38f3 100644 --- a/arangod/Cluster/ClusterMethods.cpp +++ b/arangod/Cluster/ClusterMethods.cpp @@ -432,6 +432,13 @@ static int distributeBabyOnShards( shardID = shards->at(0); userSpecifiedKey = true; } else { + + int r = transaction::Methods::validateSmartJoinAttribute(*(collinfo.get()), value); + + if (r != TRI_ERROR_NO_ERROR) { + return r; + } + // Sort out the _key attribute: // The user is allowed to specify _key, provided that _key is the one // and only sharding attribute, because in this case we can delegate @@ -753,10 +760,40 @@ bool shardKeysChanged(LogicalCollection const& collection, VPackSlice const& old return true; } } - + return false; } +bool smartJoinAttributeChanged(LogicalCollection const& collection, + VPackSlice const& oldValue, + VPackSlice const& newValue, bool isPatch) { + if (!collection.hasSmartJoinAttribute()) { + return false; + } + if (!oldValue.isObject() || !newValue.isObject()) { + // expecting two objects. everything else is an error + return true; + } + + std::string const& s = collection.smartJoinAttribute(); + + VPackSlice n = newValue.get(s); + if (!n.isString()) { + if (isPatch && n.isNone()) { + // attribute not set in patch document. this means no update + return false; + } + + // no string value... invalid! + return true; + } + + VPackSlice o = oldValue.get(s); + TRI_ASSERT(o.isString()); + + return (arangodb::basics::VelocyPackHelper::compare(n, o, false) != 0); +} + //////////////////////////////////////////////////////////////////////////////// /// @brief returns revision for a sharded collection //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/Cluster/ClusterMethods.h b/arangod/Cluster/ClusterMethods.h index a707c5805e..5cc73658b6 100644 --- a/arangod/Cluster/ClusterMethods.h +++ b/arangod/Cluster/ClusterMethods.h @@ -64,6 +64,11 @@ std::unordered_map getForwardableRequestHeaders(Genera bool shardKeysChanged(LogicalCollection const& collection, VPackSlice const& oldValue, VPackSlice const& newValue, bool isPatch); +/// @brief check if the value of the smartJoinAttribute has changed +bool smartJoinAttributeChanged(LogicalCollection const& collection, + VPackSlice const& oldValue, + VPackSlice const& newValue, bool isPatch); + //////////////////////////////////////////////////////////////////////////////// /// @brief returns revision for a sharded collection //////////////////////////////////////////////////////////////////////////////// diff --git a/arangod/Cluster/DBServerAgencySync.cpp b/arangod/Cluster/DBServerAgencySync.cpp index 8f4ae5477a..ed835749b9 100644 --- a/arangod/Cluster/DBServerAgencySync.cpp +++ b/arangod/Cluster/DBServerAgencySync.cpp @@ -262,7 +262,7 @@ DBServerAgencySyncResult DBServerAgencySync::execute() { LOG_TOPIC(INFO, Logger::MAINTENANCE) << "Error reporting to agency: _statusCode: " << r.errorCode() << " message: " << r.errorMessage() - << ". This can be ignored, since it will be retried automaticlly."; + << ". This can be ignored, since it will be retried automatically."; } else { LOG_TOPIC(DEBUG, Logger::MAINTENANCE) << "Invalidating current in ClusterInfo"; diff --git a/arangod/MMFiles/MMFilesCollection.cpp b/arangod/MMFiles/MMFilesCollection.cpp index ff2dc0fde9..2c57642c13 100644 --- a/arangod/MMFiles/MMFilesCollection.cpp +++ b/arangod/MMFiles/MMFilesCollection.cpp @@ -3437,9 +3437,12 @@ Result MMFilesCollection::update(arangodb::transaction::Methods* trx, if (_isDBServer) { // Need to check that no sharding keys have changed: - if (arangodb::shardKeysChanged(_logicalCollection, oldDoc, builder->slice(), false)) { + if (arangodb::shardKeysChanged(_logicalCollection, oldDoc, builder->slice(), true)) { return Result(TRI_ERROR_CLUSTER_MUST_NOT_CHANGE_SHARDING_ATTRIBUTES); } + if (arangodb::smartJoinAttributeChanged(_logicalCollection, oldDoc, builder->slice(), true)) { + return Result(TRI_ERROR_CLUSTER_MUST_NOT_CHANGE_SMART_JOIN_ATTRIBUTE); + } } } else { revisionId = TRI_ExtractRevisionId(VPackSlice( @@ -3572,6 +3575,9 @@ Result MMFilesCollection::replace(transaction::Methods* trx, VPackSlice const ne if (arangodb::shardKeysChanged(_logicalCollection, oldDoc, builder->slice(), false)) { return Result(TRI_ERROR_CLUSTER_MUST_NOT_CHANGE_SHARDING_ATTRIBUTES); } + if (arangodb::smartJoinAttributeChanged(_logicalCollection, oldDoc, builder->slice(), false)) { + return Result(TRI_ERROR_CLUSTER_MUST_NOT_CHANGE_SMART_JOIN_ATTRIBUTE); + } } } diff --git a/arangod/RestHandler/RestCollectionHandler.cpp b/arangod/RestHandler/RestCollectionHandler.cpp index 7fd0a79762..20593807db 100644 --- a/arangod/RestHandler/RestCollectionHandler.cpp +++ b/arangod/RestHandler/RestCollectionHandler.cpp @@ -302,8 +302,9 @@ void RestCollectionHandler::handleCommandPost() { "indexBuckets", "keyOptions", StaticStrings::WaitForSyncString, "cacheEnabled", StaticStrings::ShardKeys, StaticStrings::NumberOfShards, StaticStrings::DistributeShardsLike, "avoidServers", StaticStrings::IsSmart, - "shardingStrategy", StaticStrings::GraphSmartGraphAttribute, - StaticStrings::ReplicationFactor, "servers"}); + "shardingStrategy", StaticStrings::GraphSmartGraphAttribute, + StaticStrings::SmartJoinAttribute, StaticStrings::ReplicationFactor, + "servers"}); VPackSlice const parameters = filtered.slice(); // now we can create the collection diff --git a/arangod/RestServer/QueryRegistryFeature.cpp b/arangod/RestServer/QueryRegistryFeature.cpp index 8c2be686f2..3bd38755a7 100644 --- a/arangod/RestServer/QueryRegistryFeature.cpp +++ b/arangod/RestServer/QueryRegistryFeature.cpp @@ -42,6 +42,7 @@ QueryRegistryFeature::QueryRegistryFeature(application_features::ApplicationServ _trackSlowQueries(true), _trackBindVars(true), _failOnWarning(false), + _smartJoins(true), _queryMemoryLimit(0), _maxQueryPlans(128), _slowQueryThreshold(10.0), @@ -128,6 +129,12 @@ void QueryRegistryFeature::collectOptions(std::shared_ptr option "single-server instances or 600 for cluster instances", new DoubleParameter(&_queryRegistryTTL), arangodb::options::makeFlags(arangodb::options::Flags::Hidden)); + + options->addOption("--query.smart-joins", + "enable smart joins query optimization", + new BooleanParameter(&_smartJoins), + arangodb::options::makeFlags(arangodb::options::Flags::Hidden, arangodb::options::Flags::Enterprise)) + .setIntroducedIn(30405).setIntroducedIn(30500); } void QueryRegistryFeature::validateOptions(std::shared_ptr options) { diff --git a/arangod/RestServer/QueryRegistryFeature.h b/arangod/RestServer/QueryRegistryFeature.h index 35f4021170..00ce0f349f 100644 --- a/arangod/RestServer/QueryRegistryFeature.h +++ b/arangod/RestServer/QueryRegistryFeature.h @@ -53,6 +53,7 @@ class QueryRegistryFeature final : public application_features::ApplicationFeatu return _slowStreamingQueryThreshold; } bool failOnWarning() const { return _failOnWarning; } + bool smartJoins() const { return _smartJoins; } uint64_t queryMemoryLimit() const { return _queryMemoryLimit; } uint64_t maxQueryPlans() const { return _maxQueryPlans; } @@ -60,6 +61,7 @@ class QueryRegistryFeature final : public application_features::ApplicationFeatu bool _trackSlowQueries; bool _trackBindVars; bool _failOnWarning; + bool _smartJoins; uint64_t _queryMemoryLimit; uint64_t _maxQueryPlans; double _slowQueryThreshold; diff --git a/arangod/RocksDBEngine/RocksDBCollection.cpp b/arangod/RocksDBEngine/RocksDBCollection.cpp index 4b7d68eee7..9b6b96ee02 100644 --- a/arangod/RocksDBEngine/RocksDBCollection.cpp +++ b/arangod/RocksDBEngine/RocksDBCollection.cpp @@ -961,9 +961,12 @@ Result RocksDBCollection::update(arangodb::transaction::Methods* trx, if (_isDBServer) { // Need to check that no sharding keys have changed: - if (arangodb::shardKeysChanged(_logicalCollection, oldDoc, builder->slice(), false)) { + if (arangodb::shardKeysChanged(_logicalCollection, oldDoc, builder->slice(), true)) { return res.reset(TRI_ERROR_CLUSTER_MUST_NOT_CHANGE_SHARDING_ATTRIBUTES); } + if (arangodb::smartJoinAttributeChanged(_logicalCollection, oldDoc, builder->slice(), true)) { + return Result(TRI_ERROR_CLUSTER_MUST_NOT_CHANGE_SMART_JOIN_ATTRIBUTE); + } } VPackSlice const newDoc(builder->slice()); @@ -1066,6 +1069,9 @@ Result RocksDBCollection::replace(transaction::Methods* trx, if (arangodb::shardKeysChanged(_logicalCollection, oldDoc, builder->slice(), false)) { return res.reset(TRI_ERROR_CLUSTER_MUST_NOT_CHANGE_SHARDING_ATTRIBUTES); } + if (arangodb::smartJoinAttributeChanged(_logicalCollection, oldDoc, builder->slice(), false)) { + return Result(TRI_ERROR_CLUSTER_MUST_NOT_CHANGE_SMART_JOIN_ATTRIBUTE); + } } VPackSlice const newDoc(builder->slice()); diff --git a/arangod/Transaction/Methods.cpp b/arangod/Transaction/Methods.cpp index 8a8cdb6ebe..c4ed1dfca0 100644 --- a/arangod/Transaction/Methods.cpp +++ b/arangod/Transaction/Methods.cpp @@ -1816,6 +1816,12 @@ OperationResult transaction::Methods::insertLocal(std::string const& collectionN return Result(TRI_ERROR_ARANGO_DOCUMENT_TYPE_INVALID); } + int r = validateSmartJoinAttribute(*collection, value); + + if (r != TRI_ERROR_NO_ERROR) { + return Result(r); + } + TRI_voc_tick_t resultMarkerTick = 0; TRI_voc_rid_t revisionId = 0; documentResult.clear(); @@ -3524,3 +3530,10 @@ Result Methods::replicateOperations(LogicalCollection const& collection, return res; } + +#ifndef USE_ENTERPRISE +/*static*/ int Methods::validateSmartJoinAttribute(LogicalCollection const&, + arangodb::velocypack::Slice) { + return TRI_ERROR_NO_ERROR; +} +#endif diff --git a/arangod/Transaction/Methods.h b/arangod/Transaction/Methods.h index eed4fed312..a3db51dedb 100644 --- a/arangod/Transaction/Methods.h +++ b/arangod/Transaction/Methods.h @@ -411,6 +411,9 @@ class Methods { } #endif + static int validateSmartJoinAttribute(LogicalCollection const& collinfo, + arangodb::velocypack::Slice value); + private: /// @brief build a VPack object with _id, _key and _rev and possibly /// oldRef (if given), the result is added to the builder in the diff --git a/arangod/VocBase/LogicalCollection.cpp b/arangod/VocBase/LogicalCollection.cpp index a59791c7a5..7883650d55 100644 --- a/arangod/VocBase/LogicalCollection.cpp +++ b/arangod/VocBase/LogicalCollection.cpp @@ -149,10 +149,10 @@ LogicalCollection::LogicalCollection(TRI_vocbase_t& vocbase, VPackSlice const& i _isSmart(Helper::readBooleanValue(info, StaticStrings::IsSmart, false)), _waitForSync(Helper::readBooleanValue(info, StaticStrings::WaitForSyncString, false)), _allowUserKeys(Helper::readBooleanValue(info, "allowUserKeys", true)), - _keyOptions(nullptr), - _keyGenerator(), - _physical(EngineSelectorFeature::ENGINE->createPhysicalCollection(*this, info)), - _sharding() { +#ifdef USE_ENTERPRISE + _smartJoinAttribute(::readStringValue(info, StaticStrings::SmartJoinAttribute, "")), +#endif + _physical(EngineSelectorFeature::ENGINE->createPhysicalCollection(*this, info)) { TRI_ASSERT(info.isObject()); if (!TRI_vocbase_t::IsAllowedName(info)) { @@ -182,6 +182,48 @@ LogicalCollection::LogicalCollection(TRI_vocbase_t& vocbase, VPackSlice const& i _sharding = std::make_unique(info, this); +#ifdef USE_ENTERPRISE + if (ServerState::instance()->isCoordinator()) { + if (!info.get(StaticStrings::SmartJoinAttribute).isNone() && + !hasSmartJoinAttribute()) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INVALID_SMART_JOIN_ATTRIBUTE, + "smartJoinAttribute must contain a string attribute name"); + } + + if (hasSmartJoinAttribute()) { + auto const& sk = _sharding->shardKeys(); + TRI_ASSERT(!sk.empty()); + + if (sk.size() != 1) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INVALID_SMART_JOIN_ATTRIBUTE, + "smartJoinAttribute can only be used for collections with a single shardKey value"); + } + TRI_ASSERT(!sk.front().empty()); + if (sk.front().back() != ':') { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INVALID_SMART_JOIN_ATTRIBUTE, + std::string("smartJoinAttribute can only be used for shardKeys ending on ':', got '") + sk.front() + "'"); + } + + if (_isSmart) { + if (_type == TRI_COL_TYPE_EDGE) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INVALID_SMART_JOIN_ATTRIBUTE, + "cannot use smartJoinAttribute on a smart edge collection"); + } else if (_type == TRI_COL_TYPE_DOCUMENT) { + VPackSlice sga = info.get(StaticStrings::GraphSmartGraphAttribute); + if (sga.isString() && sga.copyString() != info.get(StaticStrings::SmartJoinAttribute).copyString()) { + THROW_ARANGO_EXCEPTION_MESSAGE(TRI_ERROR_INVALID_SMART_JOIN_ATTRIBUTE, + "smartJoinAttribute must be equal to smartGraphAttribute"); + } + } + } + } + } +#else + // whatever we got passed in, in a non-enterprise build, we just ignore + // any specification for the smartJoinAttribute + _smartJoinAttribute.clear(); +#endif + if (ServerState::instance()->isDBServer() || !ServerState::instance()->isRunningInCluster()) { _followers.reset(new FollowerInfo(this)); @@ -378,10 +420,6 @@ TRI_voc_rid_t LogicalCollection::revision(transaction::Methods* trx) const { return _physical->revision(trx); } -bool LogicalCollection::waitForSync() const { return _waitForSync; } - -bool LogicalCollection::isSmart() const { return _isSmart; } - std::unique_ptr const& LogicalCollection::followers() const { return _followers; } @@ -615,6 +653,10 @@ arangodb::Result LogicalCollection::appendVelocyPack(arangodb::velocypack::Build // Cluster Specific result.add(StaticStrings::IsSmart, VPackValue(_isSmart)); + if (hasSmartJoinAttribute()) { + result.add(StaticStrings::SmartJoinAttribute, VPackValue(_smartJoinAttribute)); + } + if (!forPersistence) { // with 'forPersistence' added by LogicalDataSource::toVelocyPack // FIXME TODO is this needed in !forPersistence??? diff --git a/arangod/VocBase/LogicalCollection.h b/arangod/VocBase/LogicalCollection.h index 8df5e34674..9ec2a9d6a4 100644 --- a/arangod/VocBase/LogicalCollection.h +++ b/arangod/VocBase/LogicalCollection.h @@ -160,12 +160,17 @@ class LogicalCollection : public LogicalDataSource { // SECTION: Properties TRI_voc_rid_t revision(transaction::Methods*) const; - bool waitForSync() const; - bool isSmart() const; + bool waitForSync() const { return _waitForSync; } + void waitForSync(bool value) { _waitForSync = value; } + bool isSmart() const { return _isSmart; } /// @brief is this a cluster-wide Plan (ClusterInfo) collection bool isAStub() const { return _isAStub; } - void waitForSync(bool value) { _waitForSync = value; } + bool hasSmartJoinAttribute() const { return !smartJoinAttribute().empty(); } + + /// @brief return the name of the smart join attribute (empty string + /// if no smart join attribute is present) + std::string const& smartJoinAttribute() const { return _smartJoinAttribute; } // SECTION: sharding ShardingInfo* shardingInfo() const; @@ -396,9 +401,11 @@ class LogicalCollection : public LogicalDataSource { bool const _allowUserKeys; + std::string _smartJoinAttribute; + // SECTION: Key Options - // @brief options for key creation, TODO Really VPack? + // @brief options for key creation std::shared_ptr const> _keyOptions; std::unique_ptr _keyGenerator; diff --git a/arangod/VocBase/Methods/Upgrade.cpp b/arangod/VocBase/Methods/Upgrade.cpp index 9bd4109826..d9971d731b 100644 --- a/arangod/VocBase/Methods/Upgrade.cpp +++ b/arangod/VocBase/Methods/Upgrade.cpp @@ -355,11 +355,16 @@ UpgradeResult methods::Upgrade::runTasks(TRI_vocbase_t& vocbase, VersionResult& LOG_TOPIC(ERR, Logger::STARTUP) << msg << " Aborting procedure."; return UpgradeResult(TRI_ERROR_INTERNAL, msg, vinfo.status); } - } catch (basics::Exception const& e) { + } catch (arangodb::basics::Exception const& e) { LOG_TOPIC(ERR, Logger::STARTUP) - << "Executing " << t.name << " (" << t.description << ") failed with " - << e.message() << ". Aborting procedure."; + << "Executing " << t.name << " (" << t.description << ") failed with error: " + << e.what() << ". Aborting procedure."; return UpgradeResult(e.code(), e.what(), vinfo.status); + } catch (std::exception const& e) { + LOG_TOPIC(ERR, Logger::STARTUP) + << "Executing " << t.name << " (" << t.description << ") failed with error: " + << e.what() << ". Aborting procedure."; + return UpgradeResult(TRI_ERROR_FAILED, e.what(), vinfo.status); } // remember we already executed this one diff --git a/js/apps/system/_admin/aardvark/APP/frontend/js/collections/arangoCollections.js b/js/apps/system/_admin/aardvark/APP/frontend/js/collections/arangoCollections.js index 27995310b3..fd3681996b 100644 --- a/js/apps/system/_admin/aardvark/APP/frontend/js/collections/arangoCollections.js +++ b/js/apps/system/_admin/aardvark/APP/frontend/js/collections/arangoCollections.js @@ -167,6 +167,11 @@ data.shardKeys = object.shardBy; } + if (object.smartJoinAttribute && + object.smartJoinAttribute !== '') { + data.smartJoinAttribute = object.smartJoinAttribute; + } + if (object.replicationFactor) { data.replicationFactor = object.replicationFactor; var pattern = new RegExp(/^[0-9]+$/); diff --git a/js/apps/system/_admin/aardvark/APP/frontend/js/templates/modalCollectionInfo.ejs b/js/apps/system/_admin/aardvark/APP/frontend/js/templates/modalCollectionInfo.ejs index f5a3a5e10e..54fe348c14 100644 --- a/js/apps/system/_admin/aardvark/APP/frontend/js/templates/modalCollectionInfo.ejs +++ b/js/apps/system/_admin/aardvark/APP/frontend/js/templates/modalCollectionInfo.ejs @@ -96,6 +96,17 @@ <% } %> + <% if (figuresData.smartJoinAttribute) { %> + + Smart join attribute: + + + + + + + <% } %> + <% if (figuresData.replicationFactor) { %> Replication factor: diff --git a/js/apps/system/_admin/aardvark/APP/frontend/js/views/collectionsView.js b/js/apps/system/_admin/aardvark/APP/frontend/js/views/collectionsView.js index ad5997b0f9..b4c37f9779 100644 --- a/js/apps/system/_admin/aardvark/APP/frontend/js/views/collectionsView.js +++ b/js/apps/system/_admin/aardvark/APP/frontend/js/views/collectionsView.js @@ -355,6 +355,7 @@ var collSync = $('#new-collection-sync').val(); var shards = 1; var shardBy = []; + var smartJoinAttribute = ''; if (replicationFactor === '') { replicationFactor = 1; @@ -364,6 +365,10 @@ } if (isCoordinator) { + if (frontendConfig.isEnterprise && $('#smart-join-attribute').val() !== '') { + smartJoinAttribute = $('#smart-join-attribute').val().trim(); + } + shards = $('#new-collection-shards').val(); if (shards === '') { @@ -429,6 +434,9 @@ if (self.engine.name !== 'rocksdb') { tmpObj.journalSize = collSize; } + if (smartJoinAttribute !== '') { + tmpObj.smartJoinAttribute = smartJoinAttribute; + } this.collection.newCollection(tmpObj, callback); window.modalView.hide(); arangoHelper.arangoNotification('Collection', 'Collection "' + collName + '" will be created.'); @@ -525,6 +533,18 @@ [{value: false, label: 'No'}, {value: true, label: 'Yes'}] ) ); + advancedTableContent.push( + window.modalView.createTextEntry( + 'smart-join-attribute', + 'Smart join attribute', + '', + 'String attribute name. Can be left empty if smart joins are not used.', + '', + false, + [ + ] + ) + ); } advancedTableContent.push( window.modalView.createTextEntry( diff --git a/js/client/modules/@arangodb/arango-collection.js b/js/client/modules/@arangodb/arango-collection.js index cdab223864..8a562a875c 100644 --- a/js/client/modules/@arangodb/arango-collection.js +++ b/js/client/modules/@arangodb/arango-collection.js @@ -350,6 +350,7 @@ ArangoCollection.prototype.properties = function (properties) { 'waitForSync': true, 'shardKeys': false, 'smartGraphAttribute': false, + 'smartJoinAttribute': false, 'numberOfShards': false, 'keyOptions': false, 'indexBuckets': true, diff --git a/js/client/modules/@arangodb/arango-database.js b/js/client/modules/@arangodb/arango-database.js index 8ea0f7f67b..daf493e93e 100644 --- a/js/client/modules/@arangodb/arango-database.js +++ b/js/client/modules/@arangodb/arango-database.js @@ -346,8 +346,8 @@ ArangoDatabase.prototype._create = function (name, properties, type, options) { [ 'waitForSync', 'journalSize', 'isSystem', 'isVolatile', 'doCompact', 'keyOptions', 'shardKeys', 'numberOfShards', 'distributeShardsLike', 'indexBuckets', 'id', 'isSmart', - 'replicationFactor', 'shardingStrategy', 'smartGraphAttribute', - 'avoidServers', 'cacheEnabled'].forEach(function (p) { + 'replicationFactor', 'shardingStrategy', 'smartGraphAttribute', + 'smartJoinAttribute', 'avoidServers', 'cacheEnabled'].forEach(function (p) { if (properties.hasOwnProperty(p)) { body[p] = properties[p]; } diff --git a/js/common/bootstrap/errors.js b/js/common/bootstrap/errors.js index 3aabb5308a..a923ac364e 100644 --- a/js/common/bootstrap/errors.js +++ b/js/common/bootstrap/errors.js @@ -329,6 +329,10 @@ "ERROR_KEY_MUST_BE_PREFIXED_WITH_SMART_GRAPH_ATTRIBUTE" : { "code" : 4003, "message" : "in smart vertex collections _key must be prefixed with the value of the smart graph attribute" }, "ERROR_ILLEGAL_SMART_GRAPH_ATTRIBUTE" : { "code" : 4004, "message" : "attribute cannot be used as smart graph attribute" }, "ERROR_SMART_GRAPH_ATTRIBUTE_MISMATCH" : { "code" : 4005, "message" : "smart graph attribute mismatch" }, + "ERROR_INVALID_SMART_JOIN_ATTRIBUTE" : { "code" : 4006, "message" : "invalid smart join attribute declaration" }, + "ERROR_KEY_MUST_BE_PREFIXED_WITH_SMART_JOIN_ATTRIBUTE" : { "code" : 4007, "message" : "shard key value must be prefixed with the value of the smart join attribute" }, + "ERROR_NO_SMART_JOIN_ATTRIBUTE" : { "code" : 4008, "message" : "smart join attribute not given or invalid" }, + "ERROR_CLUSTER_MUST_NOT_CHANGE_SMART_JOIN_ATTRIBUTE" : { "code" : 4009, "message" : "must not change the value of the smartJoinAttribute" }, "ERROR_CLUSTER_REPAIRS_FAILED" : { "code" : 5000, "message" : "error during cluster repairs" }, "ERROR_CLUSTER_REPAIRS_NOT_ENOUGH_HEALTHY" : { "code" : 5001, "message" : "not enough (healthy) db servers" }, "ERROR_CLUSTER_REPAIRS_REPLICATION_FACTOR_VIOLATED" : { "code" : 5002, "message" : "replication factor violated during cluster repairs" }, diff --git a/js/common/modules/@arangodb/aql/explainer.js b/js/common/modules/@arangodb/aql/explainer.js index 2805a2afa2..2baa402bad 100644 --- a/js/common/modules/@arangodb/aql/explainer.js +++ b/js/common/modules/@arangodb/aql/explainer.js @@ -1456,7 +1456,7 @@ function processQuery(query, explain, planIndex) { case 'RemoteNode': return keyword('REMOTE'); case 'DistributeNode': - return keyword('DISTRIBUTE'); + return keyword('DISTRIBUTE') + ' ' + annotation('/* create keys: ' + node.createKeys + ', variable: ') + variableName(node.variable) + ' ' + annotation('*/'); case 'ScatterNode': return keyword('SCATTER'); case 'GatherNode': diff --git a/lib/Basics/StaticStrings.cpp b/lib/Basics/StaticStrings.cpp index 35b5d6478d..cdd5e4ce18 100644 --- a/lib/Basics/StaticStrings.cpp +++ b/lib/Basics/StaticStrings.cpp @@ -195,6 +195,7 @@ std::string const StaticStrings::IsSmart("isSmart"); std::string const StaticStrings::NumberOfShards("numberOfShards"); std::string const StaticStrings::ReplicationFactor("replicationFactor"); std::string const StaticStrings::ShardKeys("shardKeys"); +std::string const StaticStrings::SmartJoinAttribute("smartJoinAttribute"); // graph attribute names std::string const StaticStrings::GraphCollection("_graphs"); diff --git a/lib/Basics/StaticStrings.h b/lib/Basics/StaticStrings.h index 2160ec4e79..4098a4bf59 100644 --- a/lib/Basics/StaticStrings.h +++ b/lib/Basics/StaticStrings.h @@ -180,6 +180,7 @@ class StaticStrings { static std::string const DistributeShardsLike; static std::string const ReplicationFactor; static std::string const ShardKeys; + static std::string const SmartJoinAttribute; // graph attribute names static std::string const GraphCollection; diff --git a/lib/Basics/errors.dat b/lib/Basics/errors.dat index dc13d87b17..666ee28c47 100755 --- a/lib/Basics/errors.dat +++ b/lib/Basics/errors.dat @@ -454,6 +454,10 @@ ERROR_CANNOT_DROP_SMART_COLLECTION,4002,"cannot drop this smart collection","Thi ERROR_KEY_MUST_BE_PREFIXED_WITH_SMART_GRAPH_ATTRIBUTE,4003,"in smart vertex collections _key must be prefixed with the value of the smart graph attribute","In a smart vertex collection _key must be prefixed with the value of the smart graph attribute." ERROR_ILLEGAL_SMART_GRAPH_ATTRIBUTE,4004,"attribute cannot be used as smart graph attribute","The given smartGraph attribute is illegal and connot be used for sharding. All system attributes are forbidden." ERROR_SMART_GRAPH_ATTRIBUTE_MISMATCH,4005,"smart graph attribute mismatch","The smart graph attribute of the given collection does not match the smart graph attribute of the graph." +ERROR_INVALID_SMART_JOIN_ATTRIBUTE,4006,"invalid smart join attribute declaration","Will be raised when the smartJoinAttribute declaration is invalid." +ERROR_KEY_MUST_BE_PREFIXED_WITH_SMART_JOIN_ATTRIBUTE,4007,"shard key value must be prefixed with the value of the smart join attribute","when using smartJoinAttribute for a collection, the shard key value must be prefixed with the value of the smart join attribute." +ERROR_NO_SMART_JOIN_ATTRIBUTE,4008,"smart join attribute not given or invalid","The given document does not have the required smart join attribute set or it has an invalid value." +ERROR_CLUSTER_MUST_NOT_CHANGE_SMART_JOIN_ATTRIBUTE,4009,"must not change the value of the smartJoinAttribute","Will be raised if there is an attempt to update the value of the smartJoinAttribute." ################################################################################ ## Cluster repair errors diff --git a/lib/Basics/voc-errors.cpp b/lib/Basics/voc-errors.cpp index d76606b864..38c0077657 100644 --- a/lib/Basics/voc-errors.cpp +++ b/lib/Basics/voc-errors.cpp @@ -328,6 +328,10 @@ void TRI_InitializeErrorMessages() { REG_ERROR(ERROR_KEY_MUST_BE_PREFIXED_WITH_SMART_GRAPH_ATTRIBUTE, "in smart vertex collections _key must be prefixed with the value of the smart graph attribute"); REG_ERROR(ERROR_ILLEGAL_SMART_GRAPH_ATTRIBUTE, "attribute cannot be used as smart graph attribute"); REG_ERROR(ERROR_SMART_GRAPH_ATTRIBUTE_MISMATCH, "smart graph attribute mismatch"); + REG_ERROR(ERROR_INVALID_SMART_JOIN_ATTRIBUTE, "invalid smart join attribute declaration"); + REG_ERROR(ERROR_KEY_MUST_BE_PREFIXED_WITH_SMART_JOIN_ATTRIBUTE, "shard key value must be prefixed with the value of the smart join attribute"); + REG_ERROR(ERROR_NO_SMART_JOIN_ATTRIBUTE, "smart join attribute not given or invalid"); + REG_ERROR(ERROR_CLUSTER_MUST_NOT_CHANGE_SMART_JOIN_ATTRIBUTE, "must not change the value of the smartJoinAttribute"); REG_ERROR(ERROR_CLUSTER_REPAIRS_FAILED, "error during cluster repairs"); REG_ERROR(ERROR_CLUSTER_REPAIRS_NOT_ENOUGH_HEALTHY, "not enough (healthy) db servers"); REG_ERROR(ERROR_CLUSTER_REPAIRS_REPLICATION_FACTOR_VIOLATED, "replication factor violated during cluster repairs"); diff --git a/lib/Basics/voc-errors.h b/lib/Basics/voc-errors.h index 1942a6e4a3..b95a1f49c0 100644 --- a/lib/Basics/voc-errors.h +++ b/lib/Basics/voc-errors.h @@ -1734,6 +1734,29 @@ constexpr int TRI_ERROR_ILLEGAL_SMART_GRAPH_ATTRIBUTE /// graph attribute of the graph. constexpr int TRI_ERROR_SMART_GRAPH_ATTRIBUTE_MISMATCH = 4005; +/// 4006: ERROR_INVALID_SMART_JOIN_ATTRIBUTE +/// "invalid smart join attribute declaration" +/// Will be raised when the smartJoinAttribute declaration is invalid. +constexpr int TRI_ERROR_INVALID_SMART_JOIN_ATTRIBUTE = 4006; + +/// 4007: ERROR_KEY_MUST_BE_PREFIXED_WITH_SMART_JOIN_ATTRIBUTE +/// "shard key value must be prefixed with the value of the smart join attribute" +/// when using smartJoinAttribute for a collection, the shard key value must be +/// prefixed with the value of the smart join attribute. +constexpr int TRI_ERROR_KEY_MUST_BE_PREFIXED_WITH_SMART_JOIN_ATTRIBUTE = 4007; + +/// 4008: ERROR_NO_SMART_JOIN_ATTRIBUTE +/// "smart join attribute not given or invalid" +/// The given document does not have the required smart join attribute set or +/// it has an invalid value. +constexpr int TRI_ERROR_NO_SMART_JOIN_ATTRIBUTE = 4008; + +/// 4009: ERROR_CLUSTER_MUST_NOT_CHANGE_SMART_JOIN_ATTRIBUTE +/// "must not change the value of the smartJoinAttribute" +/// Will be raised if there is an attempt to update the value of the +/// smartJoinAttribute. +constexpr int TRI_ERROR_CLUSTER_MUST_NOT_CHANGE_SMART_JOIN_ATTRIBUTE = 4009; + /// 5000: ERROR_CLUSTER_REPAIRS_FAILED /// "error during cluster repairs" /// General error during cluster repairs